pytrilogy 0.3.142__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp312-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.142.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.142.dist-info/RECORD +200 -0
  6. pytrilogy-0.3.142.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +16 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2669 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +501 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +751 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1392 -0
  112. trilogy/dialect/bigquery.py +308 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +231 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +769 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +9 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/agent.py +41 -0
  148. trilogy/scripts/agent_info.py +303 -0
  149. trilogy/scripts/common.py +355 -0
  150. trilogy/scripts/dependency/Cargo.lock +617 -0
  151. trilogy/scripts/dependency/Cargo.toml +39 -0
  152. trilogy/scripts/dependency/README.md +131 -0
  153. trilogy/scripts/dependency/build.sh +25 -0
  154. trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
  155. trilogy/scripts/dependency/src/lib.rs +16 -0
  156. trilogy/scripts/dependency/src/main.rs +770 -0
  157. trilogy/scripts/dependency/src/parser.rs +435 -0
  158. trilogy/scripts/dependency/src/preql.pest +208 -0
  159. trilogy/scripts/dependency/src/python_bindings.rs +303 -0
  160. trilogy/scripts/dependency/src/resolver.rs +716 -0
  161. trilogy/scripts/dependency/tests/base.preql +3 -0
  162. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  163. trilogy/scripts/dependency/tests/customer.preql +6 -0
  164. trilogy/scripts/dependency/tests/main.preql +9 -0
  165. trilogy/scripts/dependency/tests/orders.preql +7 -0
  166. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  167. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  168. trilogy/scripts/dependency.py +323 -0
  169. trilogy/scripts/display.py +512 -0
  170. trilogy/scripts/environment.py +46 -0
  171. trilogy/scripts/fmt.py +32 -0
  172. trilogy/scripts/ingest.py +471 -0
  173. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  174. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  175. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  176. trilogy/scripts/ingest_helpers/typing.py +161 -0
  177. trilogy/scripts/init.py +105 -0
  178. trilogy/scripts/parallel_execution.py +713 -0
  179. trilogy/scripts/plan.py +189 -0
  180. trilogy/scripts/run.py +63 -0
  181. trilogy/scripts/serve.py +140 -0
  182. trilogy/scripts/serve_helpers/__init__.py +41 -0
  183. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  184. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  185. trilogy/scripts/serve_helpers/models.py +38 -0
  186. trilogy/scripts/single_execution.py +131 -0
  187. trilogy/scripts/testing.py +119 -0
  188. trilogy/scripts/trilogy.py +68 -0
  189. trilogy/std/__init__.py +0 -0
  190. trilogy/std/color.preql +3 -0
  191. trilogy/std/date.preql +13 -0
  192. trilogy/std/display.preql +18 -0
  193. trilogy/std/geography.preql +22 -0
  194. trilogy/std/metric.preql +15 -0
  195. trilogy/std/money.preql +67 -0
  196. trilogy/std/net.preql +14 -0
  197. trilogy/std/ranking.preql +7 -0
  198. trilogy/std/report.preql +5 -0
  199. trilogy/std/semantic.preql +6 -0
  200. trilogy/utility.py +34 -0
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ import difflib
4
+ from pathlib import Path
5
+ from typing import Annotated, Dict, ItemsView, Never, ValuesView
6
+
7
+ from pydantic import BaseModel, ConfigDict, Field
8
+ from pydantic.functional_validators import PlainValidator
9
+
10
+ from trilogy.constants import DEFAULT_NAMESPACE
11
+ from trilogy.core.exceptions import (
12
+ UndefinedConceptException,
13
+ )
14
+ from trilogy.core.models.build import BuildConcept, BuildDatasource, BuildFunction
15
+ from trilogy.core.models.core import DataType
16
+
17
+
18
+ class BuildEnvironmentConceptDict(dict):
19
+ def __init__(self, *args, **kwargs) -> None:
20
+ super().__init__(self, *args, **kwargs)
21
+
22
+ def values(self) -> ValuesView[BuildConcept]: # type: ignore
23
+ return super().values()
24
+
25
+ def get(self, key: str, default: BuildConcept | None = None) -> BuildConcept | None: # type: ignore
26
+ try:
27
+ return self.__getitem__(key)
28
+ except UndefinedConceptException:
29
+ return default
30
+
31
+ def raise_undefined(
32
+ self, key: str, line_no: int | None = None, file: Path | str | None = None
33
+ ) -> Never:
34
+ # build environment should never check for missing values.
35
+ if line_no is not None:
36
+ message = f"Concept '{key}' not found in environment at line {line_no}."
37
+ else:
38
+ message = f"Concept '{key}' not found in environment."
39
+ raise UndefinedConceptException(message, [])
40
+
41
+ def __getitem__(
42
+ self, key: str, line_no: int | None = None, file: Path | None = None
43
+ ) -> BuildConcept:
44
+ try:
45
+ return super(BuildEnvironmentConceptDict, self).__getitem__(key)
46
+ except KeyError:
47
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
48
+ return self.__getitem__(key.split(".", 1)[1], line_no)
49
+ if DEFAULT_NAMESPACE + "." + key in self:
50
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
51
+ self.raise_undefined(key, line_no, file)
52
+
53
+ def _find_similar_concepts(self, concept_name: str):
54
+ def strip_local(input: str):
55
+ if input.startswith(f"{DEFAULT_NAMESPACE}."):
56
+ return input[len(DEFAULT_NAMESPACE) + 1 :]
57
+ return input
58
+
59
+ matches = difflib.get_close_matches(
60
+ strip_local(concept_name), [strip_local(x) for x in self.keys()]
61
+ )
62
+ return matches
63
+
64
+ def items(self) -> ItemsView[str, BuildConcept]: # type: ignore
65
+ return super().items()
66
+
67
+
68
+ class BuildEnvironmentDatasourceDict(dict):
69
+ def __init__(self, *args, **kwargs) -> None:
70
+ super().__init__(self, *args, **kwargs)
71
+
72
+ def __getitem__(self, key: str) -> BuildDatasource:
73
+ try:
74
+ return super(BuildEnvironmentDatasourceDict, self).__getitem__(key)
75
+ except KeyError:
76
+ if DEFAULT_NAMESPACE + "." + key in self:
77
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key)
78
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
79
+ return self.__getitem__(key.split(".", 1)[1])
80
+ raise
81
+
82
+ def values(self) -> ValuesView[BuildDatasource]: # type: ignore
83
+ return super().values()
84
+
85
+ def items(self) -> ItemsView[str, BuildDatasource]: # type: ignore
86
+ return super().items()
87
+
88
+
89
+ def validate_concepts(v) -> BuildEnvironmentConceptDict:
90
+ if isinstance(v, BuildEnvironmentConceptDict):
91
+ return v
92
+ elif isinstance(v, dict):
93
+ return BuildEnvironmentConceptDict(**{x: y for x, y in v.items()})
94
+ raise ValueError
95
+
96
+
97
+ def validate_datasources(v) -> BuildEnvironmentDatasourceDict:
98
+ if isinstance(v, BuildEnvironmentDatasourceDict):
99
+ return v
100
+ elif isinstance(v, dict):
101
+ return BuildEnvironmentDatasourceDict(**{x: y for x, y in v.items()})
102
+ raise ValueError
103
+
104
+
105
+ class BuildEnvironment(BaseModel):
106
+ model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
107
+
108
+ concepts: Annotated[
109
+ BuildEnvironmentConceptDict, PlainValidator(validate_concepts)
110
+ ] = Field(default_factory=BuildEnvironmentConceptDict)
111
+
112
+ canonical_concepts: Annotated[
113
+ BuildEnvironmentConceptDict, PlainValidator(validate_concepts)
114
+ ] = Field(default_factory=BuildEnvironmentConceptDict)
115
+
116
+ datasources: Annotated[
117
+ BuildEnvironmentDatasourceDict, PlainValidator(validate_datasources)
118
+ ] = Field(default_factory=BuildEnvironmentDatasourceDict)
119
+ functions: Dict[str, BuildFunction] = Field(default_factory=dict)
120
+ data_types: Dict[str, DataType] = Field(default_factory=dict)
121
+ namespace: str = DEFAULT_NAMESPACE
122
+ cte_name_map: Dict[str, str] = Field(default_factory=dict)
123
+ materialized_concepts: set[str] = Field(default_factory=set)
124
+ materialized_canonical_concepts: set[str] = Field(default_factory=set)
125
+ non_partial_materialized_canonical_concepts: set[str] = Field(default_factory=set)
126
+ alias_origin_lookup: Dict[str, BuildConcept] = Field(default_factory=dict)
127
+
128
+ def gen_concept_list_caches(self) -> None:
129
+ concrete_concepts: list[BuildConcept] = []
130
+ non_partial_concrete_concepts: list[BuildConcept] = []
131
+ for datasource in self.datasources.values():
132
+ for column in datasource.columns:
133
+ if column.is_complete:
134
+ non_partial_concrete_concepts.append(column.concept)
135
+ concrete_concepts.append(column.concept)
136
+ concrete_addresses = set([x.address for x in concrete_concepts])
137
+ canonical_addresses = set([x.canonical_address for x in concrete_concepts])
138
+ non_partial_canonical_addresses = set(
139
+ [x.canonical_address for x in non_partial_concrete_concepts]
140
+ )
141
+
142
+ self.materialized_concepts = set(
143
+ [
144
+ c.address
145
+ for c in self.concepts.values()
146
+ if c.address in concrete_addresses
147
+ ]
148
+ + [
149
+ c.address
150
+ for c in self.alias_origin_lookup.values()
151
+ if c.address in concrete_addresses
152
+ ],
153
+ )
154
+ self.materialized_canonical_concepts = set(
155
+ [
156
+ c.canonical_address
157
+ for c in self.concepts.values()
158
+ if c.canonical_address in canonical_addresses
159
+ ]
160
+ + [
161
+ c.canonical_address
162
+ for c in self.alias_origin_lookup.values()
163
+ if c.canonical_address in canonical_addresses
164
+ ],
165
+ )
166
+ self.non_partial_materialized_canonical_concepts = set(
167
+ [
168
+ c.canonical_address
169
+ for c in self.concepts.values()
170
+ if c.canonical_address in non_partial_canonical_addresses
171
+ ]
172
+ + [
173
+ c.canonical_address
174
+ for c in self.alias_origin_lookup.values()
175
+ if c.canonical_address in non_partial_canonical_addresses
176
+ ],
177
+ )
178
+
179
+
180
+ BuildEnvironment.model_rebuild()
@@ -0,0 +1,501 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC
4
+ from collections import UserDict, UserList
5
+ from datetime import date, datetime
6
+ from decimal import Decimal
7
+ from enum import Enum
8
+ from typing import (
9
+ Any,
10
+ Callable,
11
+ Dict,
12
+ Generic,
13
+ List,
14
+ Sequence,
15
+ Tuple,
16
+ TypeVar,
17
+ Union,
18
+ get_args,
19
+ )
20
+
21
+ from pydantic import (
22
+ BaseModel,
23
+ ConfigDict,
24
+ field_validator,
25
+ )
26
+ from pydantic_core import core_schema
27
+
28
+ from trilogy.constants import (
29
+ MagicConstants,
30
+ )
31
+ from trilogy.core.enums import DatePart, Modifier, Ordering
32
+
33
+
34
+ class DataTyped(ABC):
35
+
36
+ # this is not abstract
37
+ # only because when it's a pydantic property, it fails validation
38
+ @property
39
+ def output_datatype(self) -> CONCRETE_TYPES: # type: ignore
40
+ """
41
+ This is a huge hack to get property vs pydantic attribute inheritance to work.
42
+ """
43
+ if "output_datatype" in self.__dict__:
44
+ return self.__dict__["output_datatype"]
45
+ raise NotImplementedError
46
+
47
+
48
+ class Addressable(ABC):
49
+
50
+ @property
51
+ def _address(self):
52
+ return self.address
53
+
54
+
55
+ TYPEDEF_TYPES = Union[
56
+ "DataType",
57
+ "MapType",
58
+ "ArrayType",
59
+ "NumericType",
60
+ "StructType",
61
+ "DataTyped",
62
+ "TraitDataType",
63
+ ]
64
+
65
+ CONCRETE_TYPES = Union[
66
+ "DataType",
67
+ "MapType",
68
+ "ArrayType",
69
+ "NumericType",
70
+ "StructType",
71
+ "TraitDataType",
72
+ ]
73
+
74
+ KT = TypeVar("KT")
75
+ VT = TypeVar("VT")
76
+ LT = TypeVar("LT")
77
+
78
+
79
+ class DataType(Enum):
80
+ # PRIMITIVES
81
+ STRING = "string"
82
+ BOOL = "bool"
83
+ MAP = "map"
84
+ NUMBER = "number"
85
+ FLOAT = "float"
86
+ NUMERIC = "numeric"
87
+ INTEGER = "int"
88
+ BIGINT = "bigint"
89
+ DATE = "date"
90
+ DATETIME = "datetime"
91
+ TIMESTAMP = "timestamp"
92
+ ARRAY = "array"
93
+ DATE_PART = "date_part"
94
+ STRUCT = "struct"
95
+ GEOGRAPHY = "geography"
96
+ NULL = "null"
97
+
98
+ # GRANULAR
99
+ UNIX_SECONDS = "unix_seconds"
100
+
101
+ # PARSING
102
+ UNKNOWN = "unknown"
103
+ ANY = "any"
104
+
105
+ @property
106
+ def data_type(self):
107
+ return self
108
+
109
+ def __str__(self) -> str:
110
+ return self.name
111
+
112
+
113
+ class TraitDataType(BaseModel):
114
+ type: DataType | NumericType | StructType | ArrayType | MapType
115
+ traits: list[str]
116
+
117
+ def __hash__(self):
118
+ return hash(self.type)
119
+
120
+ def __str__(self) -> str:
121
+ return f"Trait<{self.type}, {self.traits}>"
122
+
123
+ def __eq__(self, other):
124
+ if isinstance(other, DataType):
125
+ return self.type == other
126
+ elif isinstance(other, TraitDataType):
127
+ return self.type == other.type and self.traits == other.traits
128
+ return False
129
+
130
+ @property
131
+ def data_type(self):
132
+ return self.type
133
+
134
+ @property
135
+ def value(self):
136
+ return self.data_type.value
137
+
138
+
139
+ class NumericType(BaseModel):
140
+ precision: int = 20
141
+ scale: int = 5
142
+
143
+ def __str__(self) -> str:
144
+ return f"Numeric({self.precision},{self.scale})"
145
+
146
+ def __hash__(self):
147
+ return hash((DataType.NUMERIC, self.precision, self.scale))
148
+
149
+ @property
150
+ def data_type(self):
151
+ return DataType.NUMERIC
152
+
153
+ @property
154
+ def value(self):
155
+ return self.data_type.value
156
+
157
+
158
+ class ArrayType(BaseModel):
159
+ model_config = ConfigDict(frozen=True)
160
+ type: TYPEDEF_TYPES
161
+
162
+ @field_validator("type", mode="plain")
163
+ def validate_type(cls, v):
164
+ return v
165
+
166
+ def __str__(self) -> str:
167
+ return f"ListType<{self.type}>"
168
+
169
+ @property
170
+ def data_type(self):
171
+ return DataType.ARRAY
172
+
173
+ @property
174
+ def value(self) -> str:
175
+ return self.data_type.value
176
+
177
+ @property
178
+ def value_data_type(
179
+ self,
180
+ ) -> CONCRETE_TYPES:
181
+ if isinstance(self.type, DataTyped):
182
+ return self.type.output_datatype
183
+ return self.type
184
+
185
+
186
+ class MapType(BaseModel):
187
+ key_type: TYPEDEF_TYPES
188
+ value_type: TYPEDEF_TYPES
189
+
190
+ @field_validator("value_type", mode="plain")
191
+ def validate_type(cls, v):
192
+ return v
193
+
194
+ @field_validator("key_type", mode="plain")
195
+ def validate_key_type(cls, v):
196
+ return v
197
+
198
+ def __hash__(self):
199
+ return hash((DataType.MAP, self.key_type, self.value_type))
200
+
201
+ @property
202
+ def data_type(self):
203
+ return DataType.MAP
204
+
205
+ @property
206
+ def value(self):
207
+ return self.data_type.value
208
+
209
+ @property
210
+ def value_data_type(
211
+ self,
212
+ ) -> CONCRETE_TYPES:
213
+ if isinstance(self.value_type, DataTyped):
214
+ return self.value_type.output_datatype
215
+ return self.value_type
216
+
217
+ @property
218
+ def key_data_type(
219
+ self,
220
+ ) -> CONCRETE_TYPES:
221
+ if isinstance(self.key_type, DataTyped):
222
+ return self.key_type.output_datatype
223
+ return self.key_type
224
+
225
+
226
+ class StructComponent(BaseModel):
227
+ name: str
228
+ type: TYPEDEF_TYPES
229
+ modifiers: list[Modifier] = []
230
+
231
+ @field_validator("type", mode="plain")
232
+ def validate_type(cls, v):
233
+ return v
234
+
235
+
236
+ class StructType(BaseModel):
237
+ fields: Sequence[StructComponent | TYPEDEF_TYPES]
238
+ fields_map: Dict[str, DataTyped | int | float | str | StructComponent]
239
+
240
+ def __repr__(self):
241
+ return "struct<{}>".format(
242
+ ", ".join(
243
+ f"{field.name}:{field.type.name}"
244
+ for field in self.fields
245
+ if isinstance(field, StructComponent)
246
+ )
247
+ )
248
+
249
+ def __str__(self) -> str:
250
+ return self.__repr__()
251
+
252
+ @field_validator("fields", mode="plain")
253
+ def validate_type(cls, v):
254
+ final = []
255
+ for field in v:
256
+ final.append(field)
257
+ return final
258
+
259
+ @field_validator("fields_map", mode="plain")
260
+ def validate_fields_map(cls, v):
261
+ return v
262
+
263
+ @property
264
+ def data_type(self):
265
+ return DataType.STRUCT
266
+
267
+ @property
268
+ def value(self):
269
+ return self.data_type.value
270
+
271
+ @property
272
+ def field_types(self) -> Dict[str, CONCRETE_TYPES]:
273
+ out: Dict[str, CONCRETE_TYPES] = {}
274
+ keys = list(self.fields_map.keys())
275
+ for idx, field in enumerate(self.fields):
276
+ if isinstance(field, StructComponent):
277
+ out[field.name] = arg_to_datatype(field.type)
278
+ elif isinstance(field, DataTyped):
279
+ out[keys[idx]] = field.output_datatype
280
+ else:
281
+ out[keys[idx]] = field
282
+ return out
283
+
284
+ def __hash__(self):
285
+ return hash(str(self))
286
+
287
+
288
+ class ListWrapper(Generic[VT], UserList):
289
+ """Used to distinguish parsed list objects from other lists"""
290
+
291
+ def __init__(self, *args, type: DataType, nullable: bool = False, **kwargs):
292
+ super().__init__(*args, **kwargs)
293
+ self.type = type
294
+ self.nullable = nullable
295
+
296
+ @classmethod
297
+ def __get_pydantic_core_schema__(
298
+ cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
299
+ ) -> core_schema.CoreSchema:
300
+ args = get_args(source_type)
301
+ if args:
302
+ schema = handler(List[args]) # type: ignore
303
+ else:
304
+ schema = handler(List)
305
+ return core_schema.no_info_after_validator_function(cls.validate, schema)
306
+
307
+ @classmethod
308
+ def validate(cls, v):
309
+ return cls(v, type=arg_to_datatype(v[0]))
310
+
311
+
312
+ class MapWrapper(Generic[KT, VT], UserDict):
313
+ """Used to distinguish parsed map objects from other dicts"""
314
+
315
+ def __init__(self, *args, key_type: DataType, value_type: DataType, **kwargs):
316
+ super().__init__(*args, **kwargs)
317
+ self.key_type = key_type
318
+ self.value_type = value_type
319
+
320
+ @classmethod
321
+ def __get_pydantic_core_schema__(
322
+ cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
323
+ ) -> core_schema.CoreSchema:
324
+ args = get_args(source_type)
325
+ if args:
326
+ schema = handler(Dict[args]) # type: ignore
327
+ else:
328
+ schema = handler(Dict)
329
+ return core_schema.no_info_after_validator_function(cls.validate, schema)
330
+
331
+ @classmethod
332
+ def validate(cls, v):
333
+ return cls(
334
+ v,
335
+ key_type=arg_to_datatype(list(v.keys()).pop()),
336
+ value_type=arg_to_datatype(list(v.values()).pop()),
337
+ )
338
+
339
+
340
+ class TupleWrapper(Generic[VT], tuple):
341
+ """Used to distinguish parsed tuple objects from other tuples"""
342
+
343
+ def __init__(self, val, type: DataType, nullable: bool = False, **kwargs):
344
+ super().__init__()
345
+ self.type = type
346
+ self.val = val
347
+ self.nullable = nullable
348
+
349
+ def __getnewargs__(self):
350
+ return (self.val, self.type)
351
+
352
+ def __new__(cls, val, type: DataType, **kwargs):
353
+ return super().__new__(cls, tuple(val))
354
+ # self.type = type
355
+
356
+ @classmethod
357
+ def __get_pydantic_core_schema__(
358
+ cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
359
+ ) -> core_schema.CoreSchema:
360
+ args = get_args(source_type)
361
+ if args:
362
+ schema = handler(Tuple[args]) # type: ignore
363
+ else:
364
+ schema = handler(Tuple)
365
+ return core_schema.no_info_after_validator_function(cls.validate, schema)
366
+
367
+ @classmethod
368
+ def validate(cls, v):
369
+ return cls(v, type=arg_to_datatype(v[0]))
370
+
371
+
372
+ def list_to_wrapper(args):
373
+ rtypes = [arg_to_datatype(arg) for arg in args]
374
+ types = [arg for arg in rtypes if arg != DataType.NULL]
375
+ if not len(set(types)) == 1:
376
+ raise SyntaxError(f"Cannot create a list with this set of types: {set(types)}")
377
+ return ListWrapper(args, type=types[0], nullable=DataType.NULL in rtypes)
378
+
379
+
380
+ def tuple_to_wrapper(args):
381
+ rtypes = [arg_to_datatype(arg) for arg in args]
382
+ types = [arg for arg in rtypes if arg != DataType.NULL]
383
+ if not len(set(types)) == 1:
384
+ raise SyntaxError(f"Cannot create a tuple with this set of types: {set(types)}")
385
+ return TupleWrapper(args, type=types[0], nullable=DataType.NULL in rtypes)
386
+
387
+
388
+ def dict_to_map_wrapper(arg):
389
+ key_types = [arg_to_datatype(arg) for arg in arg.keys()]
390
+
391
+ value_types = [arg_to_datatype(arg) for arg in arg.values()]
392
+ assert len(set(key_types)) == 1
393
+ assert len(set(key_types)) == 1
394
+ return MapWrapper(arg, key_type=key_types[0], value_type=value_types[0])
395
+
396
+
397
+ def merge_datatypes(
398
+ inputs: list[
399
+ DataType | ArrayType | StructType | MapType | NumericType | TraitDataType
400
+ ],
401
+ ) -> DataType | ArrayType | StructType | MapType | NumericType | TraitDataType:
402
+ """This is a temporary hack for doing between
403
+ allowable datatype transformation matrix"""
404
+ if len(inputs) == 1:
405
+ return inputs[0]
406
+
407
+ if set(inputs) == {DataType.INTEGER, DataType.FLOAT}:
408
+ return DataType.FLOAT
409
+ if set(inputs) == {DataType.INTEGER, DataType.NUMERIC}:
410
+ return DataType.NUMERIC
411
+ if any(isinstance(x, NumericType) for x in inputs) and all(
412
+ isinstance(x, NumericType)
413
+ or x in (DataType.INTEGER, DataType.FLOAT, DataType.NUMERIC)
414
+ for x in inputs
415
+ ):
416
+ candidate = next(x for x in inputs if isinstance(x, NumericType))
417
+ return candidate
418
+ return inputs[0]
419
+
420
+
421
+ def is_compatible_datatype(left, right):
422
+ # for unknown types, we can't make any assumptions
423
+ if isinstance(left, list):
424
+ return any(is_compatible_datatype(ltype, right) for ltype in left)
425
+ if isinstance(right, list):
426
+ return any(is_compatible_datatype(left, rtype) for rtype in right)
427
+ if left == DataType.ANY or right == DataType.ANY:
428
+ return True
429
+ if all(
430
+ isinstance(x, NumericType)
431
+ or x in (DataType.INTEGER, DataType.FLOAT, DataType.NUMERIC)
432
+ for x in (left, right)
433
+ ):
434
+ return True
435
+ elif isinstance(left, NumericType) or isinstance(right, NumericType):
436
+ return False
437
+ if right == DataType.UNKNOWN or left == DataType.UNKNOWN:
438
+ return True
439
+ if left == right:
440
+ return True
441
+ if {left, right} == {DataType.NUMERIC, DataType.FLOAT}:
442
+ return True
443
+
444
+ if {left, right} == {DataType.NUMERIC, DataType.INTEGER}:
445
+ return True
446
+ if {left, right} == {DataType.FLOAT, DataType.INTEGER}:
447
+ return True
448
+ return False
449
+
450
+
451
+ def arg_to_datatype(arg) -> CONCRETE_TYPES:
452
+
453
+ if isinstance(arg, MagicConstants):
454
+ if arg == MagicConstants.NULL:
455
+ return DataType.NULL
456
+ raise ValueError(f"Cannot parse arg datatype for arg of type {arg}")
457
+ elif isinstance(arg, bool):
458
+ return DataType.BOOL
459
+ elif isinstance(arg, Ordering):
460
+ return DataType.STRING # TODO: revisit
461
+ elif isinstance(arg, int):
462
+ return DataType.INTEGER
463
+ elif isinstance(arg, str):
464
+ return DataType.STRING
465
+ elif isinstance(arg, float):
466
+ return DataType.FLOAT
467
+ elif isinstance(arg, Decimal):
468
+ return DataType.NUMERIC
469
+ elif isinstance(arg, DataType):
470
+ return arg
471
+ elif isinstance(arg, NumericType):
472
+ return arg
473
+ elif isinstance(arg, TraitDataType):
474
+ return arg
475
+ elif isinstance(arg, ListWrapper):
476
+ return ArrayType(type=arg.type)
477
+ elif isinstance(arg, ArrayType):
478
+ return arg
479
+ elif isinstance(arg, MapType):
480
+ return arg
481
+ elif isinstance(arg, DataTyped):
482
+ return arg.output_datatype
483
+ elif isinstance(arg, TupleWrapper):
484
+ return ArrayType(type=arg.type)
485
+ elif isinstance(arg, list):
486
+ wrapper = list_to_wrapper(arg)
487
+ return ArrayType(type=wrapper.type)
488
+ elif isinstance(arg, MapWrapper):
489
+ return MapType(key_type=arg.key_type, value_type=arg.value_type)
490
+ elif isinstance(arg, datetime):
491
+ return DataType.DATETIME
492
+ elif isinstance(arg, date):
493
+ return DataType.DATE
494
+ elif isinstance(arg, StructComponent):
495
+ return arg_to_datatype(arg.type)
496
+ elif isinstance(arg, DatePart):
497
+ return DataType.DATE_PART
498
+ else:
499
+ raise ValueError(
500
+ f"Cannot parse arg datatype for arg of raw type {type(arg)} value {arg}"
501
+ )