pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.138.dist-info/METADATA +525 -0
  5. pytrilogy-0.3.138.dist-info/RECORD +182 -0
  6. pytrilogy-0.3.138.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +9 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +87 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +143 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2672 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +494 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +748 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +517 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +106 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1359 -0
  112. trilogy/dialect/bigquery.py +256 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +177 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +91 -0
  121. trilogy/dialect/presto.py +104 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +90 -0
  124. trilogy/dialect/sql_server.py +92 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +750 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +7 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/dependency/Cargo.lock +617 -0
  148. trilogy/scripts/dependency/Cargo.toml +39 -0
  149. trilogy/scripts/dependency/README.md +131 -0
  150. trilogy/scripts/dependency/build.sh +25 -0
  151. trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
  152. trilogy/scripts/dependency/src/lib.rs +16 -0
  153. trilogy/scripts/dependency/src/main.rs +770 -0
  154. trilogy/scripts/dependency/src/parser.rs +435 -0
  155. trilogy/scripts/dependency/src/preql.pest +208 -0
  156. trilogy/scripts/dependency/src/python_bindings.rs +289 -0
  157. trilogy/scripts/dependency/src/resolver.rs +716 -0
  158. trilogy/scripts/dependency/tests/base.preql +3 -0
  159. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  160. trilogy/scripts/dependency/tests/customer.preql +6 -0
  161. trilogy/scripts/dependency/tests/main.preql +9 -0
  162. trilogy/scripts/dependency/tests/orders.preql +7 -0
  163. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  164. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  165. trilogy/scripts/dependency.py +323 -0
  166. trilogy/scripts/display.py +460 -0
  167. trilogy/scripts/environment.py +46 -0
  168. trilogy/scripts/parallel_execution.py +483 -0
  169. trilogy/scripts/single_execution.py +131 -0
  170. trilogy/scripts/trilogy.py +772 -0
  171. trilogy/std/__init__.py +0 -0
  172. trilogy/std/color.preql +3 -0
  173. trilogy/std/date.preql +13 -0
  174. trilogy/std/display.preql +18 -0
  175. trilogy/std/geography.preql +22 -0
  176. trilogy/std/metric.preql +15 -0
  177. trilogy/std/money.preql +67 -0
  178. trilogy/std/net.preql +14 -0
  179. trilogy/std/ranking.preql +7 -0
  180. trilogy/std/report.preql +5 -0
  181. trilogy/std/semantic.preql +6 -0
  182. trilogy/utility.py +34 -0
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ import difflib
4
+ from pathlib import Path
5
+ from typing import Annotated, Dict, ItemsView, Never, ValuesView
6
+
7
+ from pydantic import BaseModel, ConfigDict, Field
8
+ from pydantic.functional_validators import PlainValidator
9
+
10
+ from trilogy.constants import DEFAULT_NAMESPACE
11
+ from trilogy.core.exceptions import (
12
+ UndefinedConceptException,
13
+ )
14
+ from trilogy.core.models.build import BuildConcept, BuildDatasource, BuildFunction
15
+ from trilogy.core.models.core import DataType
16
+
17
+
18
+ class BuildEnvironmentConceptDict(dict):
19
+ def __init__(self, *args, **kwargs) -> None:
20
+ super().__init__(self, *args, **kwargs)
21
+
22
+ def values(self) -> ValuesView[BuildConcept]: # type: ignore
23
+ return super().values()
24
+
25
+ def get(self, key: str, default: BuildConcept | None = None) -> BuildConcept | None: # type: ignore
26
+ try:
27
+ return self.__getitem__(key)
28
+ except UndefinedConceptException:
29
+ return default
30
+
31
+ def raise_undefined(
32
+ self, key: str, line_no: int | None = None, file: Path | str | None = None
33
+ ) -> Never:
34
+ # build environment should never check for missing values.
35
+ if line_no is not None:
36
+ message = f"Concept '{key}' not found in environment at line {line_no}."
37
+ else:
38
+ message = f"Concept '{key}' not found in environment."
39
+ raise UndefinedConceptException(message, [])
40
+
41
+ def __getitem__(
42
+ self, key: str, line_no: int | None = None, file: Path | None = None
43
+ ) -> BuildConcept:
44
+ try:
45
+ return super(BuildEnvironmentConceptDict, self).__getitem__(key)
46
+ except KeyError:
47
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
48
+ return self.__getitem__(key.split(".", 1)[1], line_no)
49
+ if DEFAULT_NAMESPACE + "." + key in self:
50
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
51
+ self.raise_undefined(key, line_no, file)
52
+
53
+ def _find_similar_concepts(self, concept_name: str):
54
+ def strip_local(input: str):
55
+ if input.startswith(f"{DEFAULT_NAMESPACE}."):
56
+ return input[len(DEFAULT_NAMESPACE) + 1 :]
57
+ return input
58
+
59
+ matches = difflib.get_close_matches(
60
+ strip_local(concept_name), [strip_local(x) for x in self.keys()]
61
+ )
62
+ return matches
63
+
64
+ def items(self) -> ItemsView[str, BuildConcept]: # type: ignore
65
+ return super().items()
66
+
67
+
68
+ class BuildEnvironmentDatasourceDict(dict):
69
+ def __init__(self, *args, **kwargs) -> None:
70
+ super().__init__(self, *args, **kwargs)
71
+
72
+ def __getitem__(self, key: str) -> BuildDatasource:
73
+ try:
74
+ return super(BuildEnvironmentDatasourceDict, self).__getitem__(key)
75
+ except KeyError:
76
+ if DEFAULT_NAMESPACE + "." + key in self:
77
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key)
78
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
79
+ return self.__getitem__(key.split(".", 1)[1])
80
+ raise
81
+
82
+ def values(self) -> ValuesView[BuildDatasource]: # type: ignore
83
+ return super().values()
84
+
85
+ def items(self) -> ItemsView[str, BuildDatasource]: # type: ignore
86
+ return super().items()
87
+
88
+
89
+ def validate_concepts(v) -> BuildEnvironmentConceptDict:
90
+ if isinstance(v, BuildEnvironmentConceptDict):
91
+ return v
92
+ elif isinstance(v, dict):
93
+ return BuildEnvironmentConceptDict(**{x: y for x, y in v.items()})
94
+ raise ValueError
95
+
96
+
97
+ def validate_datasources(v) -> BuildEnvironmentDatasourceDict:
98
+ if isinstance(v, BuildEnvironmentDatasourceDict):
99
+ return v
100
+ elif isinstance(v, dict):
101
+ return BuildEnvironmentDatasourceDict(**{x: y for x, y in v.items()})
102
+ raise ValueError
103
+
104
+
105
+ class BuildEnvironment(BaseModel):
106
+ model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
107
+
108
+ concepts: Annotated[
109
+ BuildEnvironmentConceptDict, PlainValidator(validate_concepts)
110
+ ] = Field(default_factory=BuildEnvironmentConceptDict)
111
+
112
+ canonical_concepts: Annotated[
113
+ BuildEnvironmentConceptDict, PlainValidator(validate_concepts)
114
+ ] = Field(default_factory=BuildEnvironmentConceptDict)
115
+
116
+ datasources: Annotated[
117
+ BuildEnvironmentDatasourceDict, PlainValidator(validate_datasources)
118
+ ] = Field(default_factory=BuildEnvironmentDatasourceDict)
119
+ functions: Dict[str, BuildFunction] = Field(default_factory=dict)
120
+ data_types: Dict[str, DataType] = Field(default_factory=dict)
121
+ namespace: str = DEFAULT_NAMESPACE
122
+ cte_name_map: Dict[str, str] = Field(default_factory=dict)
123
+ materialized_concepts: set[str] = Field(default_factory=set)
124
+ materialized_canonical_concepts: set[str] = Field(default_factory=set)
125
+ non_partial_materialized_canonical_concepts: set[str] = Field(default_factory=set)
126
+ alias_origin_lookup: Dict[str, BuildConcept] = Field(default_factory=dict)
127
+
128
+ def gen_concept_list_caches(self) -> None:
129
+ concrete_concepts: list[BuildConcept] = []
130
+ non_partial_concrete_concepts: list[BuildConcept] = []
131
+ for datasource in self.datasources.values():
132
+ for column in datasource.columns:
133
+ if column.is_complete:
134
+ non_partial_concrete_concepts.append(column.concept)
135
+ concrete_concepts.append(column.concept)
136
+ concrete_addresses = set([x.address for x in concrete_concepts])
137
+ canonical_addresses = set([x.canonical_address for x in concrete_concepts])
138
+ non_partial_canonical_addresses = set(
139
+ [x.canonical_address for x in non_partial_concrete_concepts]
140
+ )
141
+
142
+ self.materialized_concepts = set(
143
+ [
144
+ c.address
145
+ for c in self.concepts.values()
146
+ if c.address in concrete_addresses
147
+ ]
148
+ + [
149
+ c.address
150
+ for c in self.alias_origin_lookup.values()
151
+ if c.address in concrete_addresses
152
+ ],
153
+ )
154
+ self.materialized_canonical_concepts = set(
155
+ [
156
+ c.canonical_address
157
+ for c in self.concepts.values()
158
+ if c.canonical_address in canonical_addresses
159
+ ]
160
+ + [
161
+ c.canonical_address
162
+ for c in self.alias_origin_lookup.values()
163
+ if c.canonical_address in canonical_addresses
164
+ ],
165
+ )
166
+ self.non_partial_materialized_canonical_concepts = set(
167
+ [
168
+ c.canonical_address
169
+ for c in self.concepts.values()
170
+ if c.canonical_address in non_partial_canonical_addresses
171
+ ]
172
+ + [
173
+ c.canonical_address
174
+ for c in self.alias_origin_lookup.values()
175
+ if c.canonical_address in non_partial_canonical_addresses
176
+ ],
177
+ )
178
+
179
+
180
+ BuildEnvironment.model_rebuild()
@@ -0,0 +1,494 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC
4
+ from collections import UserDict, UserList
5
+ from datetime import date, datetime
6
+ from decimal import Decimal
7
+ from enum import Enum
8
+ from typing import (
9
+ Any,
10
+ Callable,
11
+ Dict,
12
+ Generic,
13
+ List,
14
+ Sequence,
15
+ Tuple,
16
+ TypeVar,
17
+ Union,
18
+ get_args,
19
+ )
20
+
21
+ from pydantic import (
22
+ BaseModel,
23
+ ConfigDict,
24
+ field_validator,
25
+ )
26
+ from pydantic_core import core_schema
27
+
28
+ from trilogy.constants import (
29
+ MagicConstants,
30
+ )
31
+ from trilogy.core.enums import DatePart, Modifier, Ordering
32
+
33
+
34
+ class DataTyped(ABC):
35
+
36
+ # this is not abstract
37
+ # only because when it's a pydantic property, it fails validation
38
+ @property
39
+ def output_datatype(self) -> CONCRETE_TYPES: # type: ignore
40
+ """
41
+ This is a huge hack to get property vs pydantic attribute inheritance to work.
42
+ """
43
+ if "output_datatype" in self.__dict__:
44
+ return self.__dict__["output_datatype"]
45
+ raise NotImplementedError
46
+
47
+
48
+ class Addressable(ABC):
49
+
50
+ @property
51
+ def _address(self):
52
+ return self.address
53
+
54
+
55
+ TYPEDEF_TYPES = Union[
56
+ "DataType",
57
+ "MapType",
58
+ "ArrayType",
59
+ "NumericType",
60
+ "StructType",
61
+ "DataTyped",
62
+ "TraitDataType",
63
+ ]
64
+
65
+ CONCRETE_TYPES = Union[
66
+ "DataType",
67
+ "MapType",
68
+ "ArrayType",
69
+ "NumericType",
70
+ "StructType",
71
+ "TraitDataType",
72
+ ]
73
+
74
+ KT = TypeVar("KT")
75
+ VT = TypeVar("VT")
76
+ LT = TypeVar("LT")
77
+
78
+
79
+ class DataType(Enum):
80
+ # PRIMITIVES
81
+ STRING = "string"
82
+ BOOL = "bool"
83
+ MAP = "map"
84
+ NUMBER = "number"
85
+ FLOAT = "float"
86
+ NUMERIC = "numeric"
87
+ INTEGER = "int"
88
+ BIGINT = "bigint"
89
+ DATE = "date"
90
+ DATETIME = "datetime"
91
+ TIMESTAMP = "timestamp"
92
+ ARRAY = "array"
93
+ DATE_PART = "date_part"
94
+ STRUCT = "struct"
95
+ GEOGRAPHY = "geography"
96
+ NULL = "null"
97
+
98
+ # GRANULAR
99
+ UNIX_SECONDS = "unix_seconds"
100
+
101
+ # PARSING
102
+ UNKNOWN = "unknown"
103
+ ANY = "any"
104
+
105
+ @property
106
+ def data_type(self):
107
+ return self
108
+
109
+ def __str__(self) -> str:
110
+ return self.name
111
+
112
+
113
+ class TraitDataType(BaseModel):
114
+ type: DataType | NumericType | StructType | ArrayType | MapType
115
+ traits: list[str]
116
+
117
+ def __hash__(self):
118
+ return hash(self.type)
119
+
120
+ def __str__(self) -> str:
121
+ return f"Trait<{self.type}, {self.traits}>"
122
+
123
+ def __eq__(self, other):
124
+ if isinstance(other, DataType):
125
+ return self.type == other
126
+ elif isinstance(other, TraitDataType):
127
+ return self.type == other.type and self.traits == other.traits
128
+ return False
129
+
130
+ @property
131
+ def data_type(self):
132
+ return self.type
133
+
134
+ @property
135
+ def value(self):
136
+ return self.data_type.value
137
+
138
+
139
+ class NumericType(BaseModel):
140
+ precision: int = 20
141
+ scale: int = 5
142
+
143
+ def __str__(self) -> str:
144
+ return f"Numeric({self.precision},{self.scale})"
145
+
146
+ @property
147
+ def data_type(self):
148
+ return DataType.NUMERIC
149
+
150
+ @property
151
+ def value(self):
152
+ return self.data_type.value
153
+
154
+
155
+ class ArrayType(BaseModel):
156
+ model_config = ConfigDict(frozen=True)
157
+ type: TYPEDEF_TYPES
158
+
159
+ @field_validator("type", mode="plain")
160
+ def validate_type(cls, v):
161
+ return v
162
+
163
+ def __str__(self) -> str:
164
+ return f"ListType<{self.type}>"
165
+
166
+ @property
167
+ def data_type(self):
168
+ return DataType.ARRAY
169
+
170
+ @property
171
+ def value(self) -> str:
172
+ return self.data_type.value
173
+
174
+ @property
175
+ def value_data_type(
176
+ self,
177
+ ) -> CONCRETE_TYPES:
178
+ if isinstance(self.type, DataTyped):
179
+ return self.type.output_datatype
180
+ return self.type
181
+
182
+
183
+ class MapType(BaseModel):
184
+ key_type: TYPEDEF_TYPES
185
+ value_type: TYPEDEF_TYPES
186
+
187
+ @field_validator("value_type", mode="plain")
188
+ def validate_type(cls, v):
189
+ return v
190
+
191
+ @field_validator("key_type", mode="plain")
192
+ def validate_key_type(cls, v):
193
+ return v
194
+
195
+ @property
196
+ def data_type(self):
197
+ return DataType.MAP
198
+
199
+ @property
200
+ def value(self):
201
+ return self.data_type.value
202
+
203
+ @property
204
+ def value_data_type(
205
+ self,
206
+ ) -> CONCRETE_TYPES:
207
+ if isinstance(self.value_type, DataTyped):
208
+ return self.value_type.output_datatype
209
+ return self.value_type
210
+
211
+ @property
212
+ def key_data_type(
213
+ self,
214
+ ) -> CONCRETE_TYPES:
215
+ if isinstance(self.key_type, DataTyped):
216
+ return self.key_type.output_datatype
217
+ return self.key_type
218
+
219
+
220
+ class StructComponent(BaseModel):
221
+ name: str
222
+ type: TYPEDEF_TYPES
223
+ modifiers: list[Modifier] = []
224
+
225
+ @field_validator("type", mode="plain")
226
+ def validate_type(cls, v):
227
+ return v
228
+
229
+
230
+ class StructType(BaseModel):
231
+ fields: Sequence[StructComponent | TYPEDEF_TYPES]
232
+ fields_map: Dict[str, DataTyped | int | float | str | StructComponent]
233
+
234
+ def __repr__(self):
235
+ return "struct<{}>".format(
236
+ ", ".join(
237
+ f"{field.name}:{field.type.name}"
238
+ for field in self.fields
239
+ if isinstance(field, StructComponent)
240
+ )
241
+ )
242
+
243
+ def __str__(self) -> str:
244
+ return self.__repr__()
245
+
246
+ @field_validator("fields", mode="plain")
247
+ def validate_type(cls, v):
248
+ final = []
249
+ for field in v:
250
+ final.append(field)
251
+ return final
252
+
253
+ @field_validator("fields_map", mode="plain")
254
+ def validate_fields_map(cls, v):
255
+ return v
256
+
257
+ @property
258
+ def data_type(self):
259
+ return DataType.STRUCT
260
+
261
+ @property
262
+ def value(self):
263
+ return self.data_type.value
264
+
265
+ @property
266
+ def field_types(self) -> Dict[str, CONCRETE_TYPES]:
267
+ out: Dict[str, CONCRETE_TYPES] = {}
268
+ keys = list(self.fields_map.keys())
269
+ for idx, field in enumerate(self.fields):
270
+ if isinstance(field, StructComponent):
271
+ out[field.name] = arg_to_datatype(field.type)
272
+ elif isinstance(field, DataTyped):
273
+ out[keys[idx]] = field.output_datatype
274
+ else:
275
+ out[keys[idx]] = field
276
+ return out
277
+
278
+ def __hash__(self):
279
+ return hash(str(self))
280
+
281
+
282
+ class ListWrapper(Generic[VT], UserList):
283
+ """Used to distinguish parsed list objects from other lists"""
284
+
285
+ def __init__(self, *args, type: DataType, nullable: bool = False, **kwargs):
286
+ super().__init__(*args, **kwargs)
287
+ self.type = type
288
+ self.nullable = nullable
289
+
290
+ @classmethod
291
+ def __get_pydantic_core_schema__(
292
+ cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
293
+ ) -> core_schema.CoreSchema:
294
+ args = get_args(source_type)
295
+ if args:
296
+ schema = handler(List[args]) # type: ignore
297
+ else:
298
+ schema = handler(List)
299
+ return core_schema.no_info_after_validator_function(cls.validate, schema)
300
+
301
+ @classmethod
302
+ def validate(cls, v):
303
+ return cls(v, type=arg_to_datatype(v[0]))
304
+
305
+
306
+ class MapWrapper(Generic[KT, VT], UserDict):
307
+ """Used to distinguish parsed map objects from other dicts"""
308
+
309
+ def __init__(self, *args, key_type: DataType, value_type: DataType, **kwargs):
310
+ super().__init__(*args, **kwargs)
311
+ self.key_type = key_type
312
+ self.value_type = value_type
313
+
314
+ @classmethod
315
+ def __get_pydantic_core_schema__(
316
+ cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
317
+ ) -> core_schema.CoreSchema:
318
+ args = get_args(source_type)
319
+ if args:
320
+ schema = handler(Dict[args]) # type: ignore
321
+ else:
322
+ schema = handler(Dict)
323
+ return core_schema.no_info_after_validator_function(cls.validate, schema)
324
+
325
+ @classmethod
326
+ def validate(cls, v):
327
+ return cls(
328
+ v,
329
+ key_type=arg_to_datatype(list(v.keys()).pop()),
330
+ value_type=arg_to_datatype(list(v.values()).pop()),
331
+ )
332
+
333
+
334
+ class TupleWrapper(Generic[VT], tuple):
335
+ """Used to distinguish parsed tuple objects from other tuples"""
336
+
337
+ def __init__(self, val, type: DataType, nullable: bool = False, **kwargs):
338
+ super().__init__()
339
+ self.type = type
340
+ self.val = val
341
+ self.nullable = nullable
342
+
343
+ def __getnewargs__(self):
344
+ return (self.val, self.type)
345
+
346
+ def __new__(cls, val, type: DataType, **kwargs):
347
+ return super().__new__(cls, tuple(val))
348
+ # self.type = type
349
+
350
+ @classmethod
351
+ def __get_pydantic_core_schema__(
352
+ cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
353
+ ) -> core_schema.CoreSchema:
354
+ args = get_args(source_type)
355
+ if args:
356
+ schema = handler(Tuple[args]) # type: ignore
357
+ else:
358
+ schema = handler(Tuple)
359
+ return core_schema.no_info_after_validator_function(cls.validate, schema)
360
+
361
+ @classmethod
362
+ def validate(cls, v):
363
+ return cls(v, type=arg_to_datatype(v[0]))
364
+
365
+
366
+ def list_to_wrapper(args):
367
+ rtypes = [arg_to_datatype(arg) for arg in args]
368
+ types = [arg for arg in rtypes if arg != DataType.NULL]
369
+ if not len(set(types)) == 1:
370
+ raise SyntaxError(f"Cannot create a list with this set of types: {set(types)}")
371
+ return ListWrapper(args, type=types[0], nullable=DataType.NULL in rtypes)
372
+
373
+
374
+ def tuple_to_wrapper(args):
375
+ rtypes = [arg_to_datatype(arg) for arg in args]
376
+ types = [arg for arg in rtypes if arg != DataType.NULL]
377
+ if not len(set(types)) == 1:
378
+ raise SyntaxError(f"Cannot create a tuple with this set of types: {set(types)}")
379
+ return TupleWrapper(args, type=types[0], nullable=DataType.NULL in rtypes)
380
+
381
+
382
+ def dict_to_map_wrapper(arg):
383
+ key_types = [arg_to_datatype(arg) for arg in arg.keys()]
384
+
385
+ value_types = [arg_to_datatype(arg) for arg in arg.values()]
386
+ assert len(set(key_types)) == 1
387
+ assert len(set(key_types)) == 1
388
+ return MapWrapper(arg, key_type=key_types[0], value_type=value_types[0])
389
+
390
+
391
+ def merge_datatypes(
392
+ inputs: list[
393
+ DataType | ArrayType | StructType | MapType | NumericType | TraitDataType
394
+ ],
395
+ ) -> DataType | ArrayType | StructType | MapType | NumericType | TraitDataType:
396
+ """This is a temporary hack for doing between
397
+ allowable datatype transformation matrix"""
398
+ if len(inputs) == 1:
399
+ return inputs[0]
400
+ if set(inputs) == {DataType.INTEGER, DataType.FLOAT}:
401
+ return DataType.FLOAT
402
+ if set(inputs) == {DataType.INTEGER, DataType.NUMERIC}:
403
+ return DataType.NUMERIC
404
+ if any(isinstance(x, NumericType) for x in inputs) and all(
405
+ isinstance(x, NumericType)
406
+ or x in (DataType.INTEGER, DataType.FLOAT, DataType.NUMERIC)
407
+ for x in inputs
408
+ ):
409
+ candidate = next(x for x in inputs if isinstance(x, NumericType))
410
+ return candidate
411
+ return inputs[0]
412
+
413
+
414
+ def is_compatible_datatype(left, right):
415
+ # for unknown types, we can't make any assumptions
416
+ if isinstance(left, list):
417
+ return any(is_compatible_datatype(ltype, right) for ltype in left)
418
+ if isinstance(right, list):
419
+ return any(is_compatible_datatype(left, rtype) for rtype in right)
420
+ if left == DataType.ANY or right == DataType.ANY:
421
+ return True
422
+ if all(
423
+ isinstance(x, NumericType)
424
+ or x in (DataType.INTEGER, DataType.FLOAT, DataType.NUMERIC)
425
+ for x in (left, right)
426
+ ):
427
+ return True
428
+ elif isinstance(left, NumericType) or isinstance(right, NumericType):
429
+ return False
430
+ if right == DataType.UNKNOWN or left == DataType.UNKNOWN:
431
+ return True
432
+ if left == right:
433
+ return True
434
+ if {left, right} == {DataType.NUMERIC, DataType.FLOAT}:
435
+ return True
436
+
437
+ if {left, right} == {DataType.NUMERIC, DataType.INTEGER}:
438
+ return True
439
+ if {left, right} == {DataType.FLOAT, DataType.INTEGER}:
440
+ return True
441
+ return False
442
+
443
+
444
+ def arg_to_datatype(arg) -> CONCRETE_TYPES:
445
+
446
+ if isinstance(arg, MagicConstants):
447
+ if arg == MagicConstants.NULL:
448
+ return DataType.NULL
449
+ raise ValueError(f"Cannot parse arg datatype for arg of type {arg}")
450
+ elif isinstance(arg, bool):
451
+ return DataType.BOOL
452
+ elif isinstance(arg, Ordering):
453
+ return DataType.STRING # TODO: revisit
454
+ elif isinstance(arg, int):
455
+ return DataType.INTEGER
456
+ elif isinstance(arg, str):
457
+ return DataType.STRING
458
+ elif isinstance(arg, float):
459
+ return DataType.FLOAT
460
+ elif isinstance(arg, Decimal):
461
+ return DataType.NUMERIC
462
+ elif isinstance(arg, DataType):
463
+ return arg
464
+ elif isinstance(arg, NumericType):
465
+ return arg
466
+ elif isinstance(arg, TraitDataType):
467
+ return arg
468
+ elif isinstance(arg, ListWrapper):
469
+ return ArrayType(type=arg.type)
470
+ elif isinstance(arg, ArrayType):
471
+ return arg
472
+ elif isinstance(arg, MapType):
473
+ return arg
474
+ elif isinstance(arg, DataTyped):
475
+ return arg.output_datatype
476
+ elif isinstance(arg, TupleWrapper):
477
+ return ArrayType(type=arg.type)
478
+ elif isinstance(arg, list):
479
+ wrapper = list_to_wrapper(arg)
480
+ return ArrayType(type=wrapper.type)
481
+ elif isinstance(arg, MapWrapper):
482
+ return MapType(key_type=arg.key_type, value_type=arg.value_type)
483
+ elif isinstance(arg, datetime):
484
+ return DataType.DATETIME
485
+ elif isinstance(arg, date):
486
+ return DataType.DATE
487
+ elif isinstance(arg, StructComponent):
488
+ return arg_to_datatype(arg.type)
489
+ elif isinstance(arg, DatePart):
490
+ return DataType.DATE_PART
491
+ else:
492
+ raise ValueError(
493
+ f"Cannot parse arg datatype for arg of raw type {type(arg)} value {arg}"
494
+ )