pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (76) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1859 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +701 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +178 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +55 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +18 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +70 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +148 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/__init__.py +4 -0
  65. trilogy/hooks/base_hook.py +6 -4
  66. trilogy/hooks/query_debugger.py +113 -97
  67. trilogy/parser.py +1 -1
  68. trilogy/parsing/common.py +307 -64
  69. trilogy/parsing/parse_engine.py +277 -618
  70. trilogy/parsing/render.py +50 -26
  71. trilogy/scripts/trilogy.py +2 -1
  72. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  73. trilogy/core/models.py +0 -4960
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
  76. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,151 @@
1
+ from __future__ import annotations
2
+
3
+ import difflib
4
+ from pathlib import Path
5
+ from typing import Annotated, Dict, ItemsView, Never, ValuesView
6
+
7
+ from pydantic import BaseModel, ConfigDict, Field
8
+ from pydantic.functional_validators import PlainValidator
9
+
10
+ from trilogy.constants import DEFAULT_NAMESPACE
11
+ from trilogy.core.exceptions import (
12
+ UndefinedConceptException,
13
+ )
14
+ from trilogy.core.models.build import BuildConcept, BuildDatasource, BuildFunction
15
+ from trilogy.core.models.core import DataType
16
+
17
+
18
+ class BuildEnvironmentConceptDict(dict):
19
+ def __init__(self, *args, **kwargs) -> None:
20
+ super().__init__(self, *args, **kwargs)
21
+
22
+ def values(self) -> ValuesView[BuildConcept]: # type: ignore
23
+ return super().values()
24
+
25
+ def get(self, key: str, default: BuildConcept | None = None) -> BuildConcept | None: # type: ignore
26
+ try:
27
+ return self.__getitem__(key)
28
+ except UndefinedConceptException:
29
+ return default
30
+
31
+ def raise_undefined(
32
+ self, key: str, line_no: int | None = None, file: Path | str | None = None
33
+ ) -> Never:
34
+
35
+ matches = self._find_similar_concepts(key)
36
+ message = f"Undefined concept: {key}."
37
+ if matches:
38
+ message += f" Suggestions: {matches}"
39
+
40
+ if line_no:
41
+ if file:
42
+ raise UndefinedConceptException(
43
+ f"{file}: {line_no}: " + message, matches
44
+ )
45
+ raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
46
+ raise UndefinedConceptException(message, matches)
47
+
48
+ def __getitem__(
49
+ self, key: str, line_no: int | None = None, file: Path | None = None
50
+ ) -> BuildConcept:
51
+ try:
52
+ return super(BuildEnvironmentConceptDict, self).__getitem__(key)
53
+ except KeyError:
54
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
55
+ return self.__getitem__(key.split(".", 1)[1], line_no)
56
+ if DEFAULT_NAMESPACE + "." + key in self:
57
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
58
+ self.raise_undefined(key, line_no, file)
59
+
60
+ def _find_similar_concepts(self, concept_name: str):
61
+ def strip_local(input: str):
62
+ if input.startswith(f"{DEFAULT_NAMESPACE}."):
63
+ return input[len(DEFAULT_NAMESPACE) + 1 :]
64
+ return input
65
+
66
+ matches = difflib.get_close_matches(
67
+ strip_local(concept_name), [strip_local(x) for x in self.keys()]
68
+ )
69
+ return matches
70
+
71
+ def items(self) -> ItemsView[str, BuildConcept]: # type: ignore
72
+ return super().items()
73
+
74
+
75
+ class BuildEnvironmentDatasourceDict(dict):
76
+ def __init__(self, *args, **kwargs) -> None:
77
+ super().__init__(self, *args, **kwargs)
78
+
79
+ def __getitem__(self, key: str) -> BuildDatasource:
80
+ try:
81
+ return super(BuildEnvironmentDatasourceDict, self).__getitem__(key)
82
+ except KeyError:
83
+ if DEFAULT_NAMESPACE + "." + key in self:
84
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key)
85
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
86
+ return self.__getitem__(key.split(".", 1)[1])
87
+ raise
88
+
89
+ def values(self) -> ValuesView[BuildDatasource]: # type: ignore
90
+ return super().values()
91
+
92
+ def items(self) -> ItemsView[str, BuildDatasource]: # type: ignore
93
+ return super().items()
94
+
95
+
96
+ def validate_concepts(v) -> BuildEnvironmentConceptDict:
97
+ if isinstance(v, BuildEnvironmentConceptDict):
98
+ return v
99
+ elif isinstance(v, dict):
100
+ return BuildEnvironmentConceptDict(
101
+ **{x: BuildConcept.model_validate(y) for x, y in v.items()}
102
+ )
103
+ raise ValueError
104
+
105
+
106
+ def validate_datasources(v) -> BuildEnvironmentDatasourceDict:
107
+ if isinstance(v, BuildEnvironmentDatasourceDict):
108
+ return v
109
+ elif isinstance(v, dict):
110
+ return BuildEnvironmentDatasourceDict(
111
+ **{x: BuildDatasource.model_validate(y) for x, y in v.items()}
112
+ )
113
+ raise ValueError
114
+
115
+
116
+ class BuildEnvironment(BaseModel):
117
+ model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
118
+
119
+ concepts: Annotated[
120
+ BuildEnvironmentConceptDict, PlainValidator(validate_concepts)
121
+ ] = Field(default_factory=BuildEnvironmentConceptDict)
122
+ datasources: Annotated[
123
+ BuildEnvironmentDatasourceDict, PlainValidator(validate_datasources)
124
+ ] = Field(default_factory=BuildEnvironmentDatasourceDict)
125
+ functions: Dict[str, BuildFunction] = Field(default_factory=dict)
126
+ data_types: Dict[str, DataType] = Field(default_factory=dict)
127
+ namespace: str = DEFAULT_NAMESPACE
128
+ cte_name_map: Dict[str, str] = Field(default_factory=dict)
129
+ materialized_concepts: set[str] = Field(default_factory=set)
130
+ alias_origin_lookup: Dict[str, BuildConcept] = Field(default_factory=dict)
131
+
132
+ def gen_concept_list_caches(self) -> None:
133
+ concrete_addresses = set()
134
+ for datasource in self.datasources.values():
135
+ for concept in datasource.output_concepts:
136
+ concrete_addresses.add(concept.address)
137
+ self.materialized_concepts = set(
138
+ [
139
+ c.address
140
+ for c in self.concepts.values()
141
+ if c.address in concrete_addresses
142
+ ]
143
+ + [
144
+ c.address
145
+ for c in self.alias_origin_lookup.values()
146
+ if c.address in concrete_addresses
147
+ ],
148
+ )
149
+
150
+
151
+ BuildEnvironment.model_rebuild()
@@ -0,0 +1,370 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC
4
+ from collections import UserDict, UserList
5
+ from datetime import date, datetime
6
+ from enum import Enum
7
+ from typing import (
8
+ Any,
9
+ Callable,
10
+ Dict,
11
+ Generic,
12
+ List,
13
+ Sequence,
14
+ Tuple,
15
+ TypeVar,
16
+ Union,
17
+ get_args,
18
+ )
19
+
20
+ from pydantic import (
21
+ BaseModel,
22
+ ConfigDict,
23
+ field_validator,
24
+ )
25
+ from pydantic_core import core_schema
26
+
27
+ from trilogy.constants import (
28
+ MagicConstants,
29
+ )
30
+
31
+
32
+ class DataTyped(ABC):
33
+
34
+ # this is not abstract
35
+ # only because when it's a pydantic property, it fails validation
36
+ @property
37
+ def output_datatype(self) -> CONCRETE_TYPES: # type: ignore
38
+ """
39
+ This is a huge hack to get property vs pydantic attribute inheritance to work.
40
+ """
41
+ if "output_datatype" in self.__dict__:
42
+ return self.__dict__["output_datatype"]
43
+ raise NotImplementedError
44
+
45
+
46
+ class Addressable(ABC):
47
+
48
+ @property
49
+ def _address(self):
50
+ return self.address
51
+
52
+
53
+ TYPEDEF_TYPES = Union[
54
+ "DataType", "MapType", "ListType", "NumericType", "StructType", "DataTyped"
55
+ ]
56
+
57
+ CONCRETE_TYPES = Union[
58
+ "DataType",
59
+ "MapType",
60
+ "ListType",
61
+ "NumericType",
62
+ "StructType",
63
+ ]
64
+
65
+ KT = TypeVar("KT")
66
+ VT = TypeVar("VT")
67
+ LT = TypeVar("LT")
68
+
69
+
70
+ class DataType(Enum):
71
+ # PRIMITIVES
72
+ STRING = "string"
73
+ BOOL = "bool"
74
+ MAP = "map"
75
+ LIST = "list"
76
+ NUMBER = "number"
77
+ FLOAT = "float"
78
+ NUMERIC = "numeric"
79
+ INTEGER = "int"
80
+ BIGINT = "bigint"
81
+ DATE = "date"
82
+ DATETIME = "datetime"
83
+ TIMESTAMP = "timestamp"
84
+ ARRAY = "array"
85
+ DATE_PART = "date_part"
86
+ STRUCT = "struct"
87
+ NULL = "null"
88
+
89
+ # GRANULAR
90
+ UNIX_SECONDS = "unix_seconds"
91
+
92
+ # PARSING
93
+ UNKNOWN = "unknown"
94
+
95
+ @property
96
+ def data_type(self):
97
+ return self
98
+
99
+
100
+ class NumericType(BaseModel):
101
+ precision: int = 20
102
+ scale: int = 5
103
+
104
+ @property
105
+ def data_type(self):
106
+ return DataType.NUMERIC
107
+
108
+ @property
109
+ def value(self):
110
+ return self.data_type.value
111
+
112
+
113
+ class ListType(BaseModel):
114
+ model_config = ConfigDict(frozen=True)
115
+ type: TYPEDEF_TYPES
116
+
117
+ @field_validator("type", mode="plain")
118
+ def validate_type(cls, v):
119
+ return v
120
+
121
+ def __str__(self) -> str:
122
+ return f"ListType<{self.type}>"
123
+
124
+ @property
125
+ def data_type(self):
126
+ return DataType.LIST
127
+
128
+ @property
129
+ def value(self):
130
+ return self.data_type.value
131
+
132
+ @property
133
+ def value_data_type(
134
+ self,
135
+ ) -> CONCRETE_TYPES:
136
+ if isinstance(self.type, DataTyped):
137
+ return self.type.output_datatype
138
+ return self.type
139
+
140
+
141
+ class MapType(BaseModel):
142
+ key_type: DataType
143
+ value_type: TYPEDEF_TYPES
144
+
145
+ @field_validator("value_type", mode="plain")
146
+ def validate_type(cls, v):
147
+ return v
148
+
149
+ @property
150
+ def data_type(self):
151
+ return DataType.MAP
152
+
153
+ @property
154
+ def value(self):
155
+ return self.data_type.value
156
+
157
+ @property
158
+ def value_data_type(
159
+ self,
160
+ ) -> CONCRETE_TYPES:
161
+ if isinstance(self.value_type, DataTyped):
162
+ return self.value_type.output_datatype
163
+ return self.value_type
164
+
165
+ @property
166
+ def key_data_type(
167
+ self,
168
+ ) -> CONCRETE_TYPES:
169
+ if isinstance(self.key_type, DataTyped):
170
+ return self.key_type.output_datatype
171
+ return self.key_type
172
+
173
+
174
+ class StructType(BaseModel):
175
+ fields: Sequence[TYPEDEF_TYPES]
176
+ fields_map: Dict[str, DataTyped | int | float | str]
177
+
178
+ @field_validator("fields", mode="plain")
179
+ def validate_type(cls, v):
180
+ final = []
181
+ for field in v:
182
+ final.append(field)
183
+ return final
184
+
185
+ @field_validator("fields_map", mode="plain")
186
+ def validate_fields_map(cls, v):
187
+ return v
188
+
189
+ @property
190
+ def data_type(self):
191
+ return DataType.STRUCT
192
+
193
+ @property
194
+ def value(self):
195
+ return self.data_type.value
196
+
197
+
198
+ class ListWrapper(Generic[VT], UserList):
199
+ """Used to distinguish parsed list objects from other lists"""
200
+
201
+ def __init__(self, *args, type: DataType, **kwargs):
202
+ super().__init__(*args, **kwargs)
203
+ self.type = type
204
+
205
+ @classmethod
206
+ def __get_pydantic_core_schema__(
207
+ cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
208
+ ) -> core_schema.CoreSchema:
209
+ args = get_args(source_type)
210
+ if args:
211
+ schema = handler(List[args]) # type: ignore
212
+ else:
213
+ schema = handler(List)
214
+ return core_schema.no_info_after_validator_function(cls.validate, schema)
215
+
216
+ @classmethod
217
+ def validate(cls, v):
218
+ return cls(v, type=arg_to_datatype(v[0]))
219
+
220
+
221
+ class MapWrapper(Generic[KT, VT], UserDict):
222
+ """Used to distinguish parsed map objects from other dicts"""
223
+
224
+ def __init__(self, *args, key_type: DataType, value_type: DataType, **kwargs):
225
+ super().__init__(*args, **kwargs)
226
+ self.key_type = key_type
227
+ self.value_type = value_type
228
+
229
+ @classmethod
230
+ def __get_pydantic_core_schema__(
231
+ cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
232
+ ) -> core_schema.CoreSchema:
233
+ args = get_args(source_type)
234
+ if args:
235
+ schema = handler(Dict[args]) # type: ignore
236
+ else:
237
+ schema = handler(Dict)
238
+ return core_schema.no_info_after_validator_function(cls.validate, schema)
239
+
240
+ @classmethod
241
+ def validate(cls, v):
242
+ return cls(
243
+ v,
244
+ key_type=arg_to_datatype(list(v.keys()).pop()),
245
+ value_type=arg_to_datatype(list(v.values()).pop()),
246
+ )
247
+
248
+
249
+ class TupleWrapper(Generic[VT], tuple):
250
+ """Used to distinguish parsed tuple objects from other tuples"""
251
+
252
+ def __init__(self, val, type: DataType, **kwargs):
253
+ super().__init__()
254
+ self.type = type
255
+ self.val = val
256
+
257
+ def __getnewargs__(self):
258
+ return (self.val, self.type)
259
+
260
+ def __new__(cls, val, type: DataType, **kwargs):
261
+ return super().__new__(cls, tuple(val))
262
+ # self.type = type
263
+
264
+ @classmethod
265
+ def __get_pydantic_core_schema__(
266
+ cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
267
+ ) -> core_schema.CoreSchema:
268
+ args = get_args(source_type)
269
+ if args:
270
+ schema = handler(Tuple[args]) # type: ignore
271
+ else:
272
+ schema = handler(Tuple)
273
+ return core_schema.no_info_after_validator_function(cls.validate, schema)
274
+
275
+ @classmethod
276
+ def validate(cls, v):
277
+ return cls(v, type=arg_to_datatype(v[0]))
278
+
279
+
280
+ def list_to_wrapper(args):
281
+ types = [arg_to_datatype(arg) for arg in args]
282
+ assert len(set(types)) == 1
283
+ return ListWrapper(args, type=types[0])
284
+
285
+
286
+ def tuple_to_wrapper(args):
287
+ types = [arg_to_datatype(arg) for arg in args]
288
+ assert len(set(types)) == 1
289
+ return TupleWrapper(args, type=types[0])
290
+
291
+
292
+ def dict_to_map_wrapper(arg):
293
+ key_types = [arg_to_datatype(arg) for arg in arg.keys()]
294
+
295
+ value_types = [arg_to_datatype(arg) for arg in arg.values()]
296
+ assert len(set(key_types)) == 1
297
+ assert len(set(key_types)) == 1
298
+ return MapWrapper(arg, key_type=key_types[0], value_type=value_types[0])
299
+
300
+
301
+ def merge_datatypes(
302
+ inputs: list[DataType | ListType | StructType | MapType | NumericType],
303
+ ) -> DataType | ListType | StructType | MapType | NumericType:
304
+ """This is a temporary hack for doing between
305
+ allowable datatype transformation matrix"""
306
+ if len(inputs) == 1:
307
+ return inputs[0]
308
+ if set(inputs) == {DataType.INTEGER, DataType.FLOAT}:
309
+ return DataType.FLOAT
310
+ if set(inputs) == {DataType.INTEGER, DataType.NUMERIC}:
311
+ return DataType.NUMERIC
312
+ if any(isinstance(x, NumericType) for x in inputs) and all(
313
+ isinstance(x, NumericType)
314
+ or x in (DataType.INTEGER, DataType.FLOAT, DataType.NUMERIC)
315
+ for x in inputs
316
+ ):
317
+ candidate = next(x for x in inputs if isinstance(x, NumericType))
318
+ return candidate
319
+ return inputs[0]
320
+
321
+
322
+ def is_compatible_datatype(left, right):
323
+ # for unknown types, we can't make any assumptions
324
+ if right == DataType.UNKNOWN or left == DataType.UNKNOWN:
325
+ return True
326
+ if left == right:
327
+ return True
328
+ if {left, right} == {DataType.NUMERIC, DataType.FLOAT}:
329
+ return True
330
+ if {left, right} == {DataType.NUMERIC, DataType.INTEGER}:
331
+ return True
332
+ if {left, right} == {DataType.FLOAT, DataType.INTEGER}:
333
+ return True
334
+ return False
335
+
336
+
337
+ def arg_to_datatype(arg) -> CONCRETE_TYPES:
338
+ if isinstance(arg, MagicConstants):
339
+ if arg == MagicConstants.NULL:
340
+ return DataType.NULL
341
+ raise ValueError(f"Cannot parse arg datatype for arg of type {arg}")
342
+ elif isinstance(arg, bool):
343
+ return DataType.BOOL
344
+ elif isinstance(arg, int):
345
+ return DataType.INTEGER
346
+ elif isinstance(arg, str):
347
+ return DataType.STRING
348
+ elif isinstance(arg, float):
349
+ return DataType.FLOAT
350
+ elif isinstance(arg, NumericType):
351
+ return arg
352
+ elif isinstance(arg, ListWrapper):
353
+ return ListType(type=arg.type)
354
+ elif isinstance(arg, DataTyped):
355
+ return arg.output_datatype
356
+ elif isinstance(arg, TupleWrapper):
357
+ return ListType(type=arg.type)
358
+ elif isinstance(arg, list):
359
+ wrapper = list_to_wrapper(arg)
360
+ return ListType(type=wrapper.type)
361
+ elif isinstance(arg, MapWrapper):
362
+ return MapType(key_type=arg.key_type, value_type=arg.value_type)
363
+ elif isinstance(arg, datetime):
364
+ return DataType.DATETIME
365
+ elif isinstance(arg, date):
366
+ return DataType.DATE
367
+ else:
368
+ raise ValueError(
369
+ f"Cannot parse arg datatype for arg of raw type {type(arg)} value {arg}"
370
+ )