pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (76) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1859 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +701 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +178 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +55 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +18 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +70 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +148 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/__init__.py +4 -0
  65. trilogy/hooks/base_hook.py +6 -4
  66. trilogy/hooks/query_debugger.py +113 -97
  67. trilogy/parser.py +1 -1
  68. trilogy/parsing/common.py +307 -64
  69. trilogy/parsing/parse_engine.py +277 -618
  70. trilogy/parsing/render.py +50 -26
  71. trilogy/scripts/trilogy.py +2 -1
  72. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  73. trilogy/core/models.py +0 -4960
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
  76. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,413 @@
1
+ from functools import cached_property
2
+ from pathlib import Path
3
+ from typing import Annotated, List, Optional, Union
4
+
5
+ from pydantic import BaseModel, Field, computed_field, field_validator
6
+ from pydantic.functional_validators import PlainValidator
7
+
8
+ from trilogy.constants import CONFIG
9
+ from trilogy.core.enums import (
10
+ FunctionClass,
11
+ IOType,
12
+ Modifier,
13
+ ShowCategory,
14
+ )
15
+ from trilogy.core.models.author import (
16
+ AggregateWrapper,
17
+ AlignClause,
18
+ Concept,
19
+ ConceptRef,
20
+ FilterItem,
21
+ Function,
22
+ Grain,
23
+ HasUUID,
24
+ HavingClause,
25
+ Metadata,
26
+ MultiSelectLineage,
27
+ OrderBy,
28
+ SelectLineage,
29
+ UndefinedConcept,
30
+ WhereClause,
31
+ WindowItem,
32
+ )
33
+ from trilogy.core.models.datasource import Address, ColumnAssignment, Datasource
34
+ from trilogy.core.models.environment import (
35
+ Environment,
36
+ EnvironmentConceptDict,
37
+ validate_concepts,
38
+ )
39
+ from trilogy.core.statements.common import SelectTypeMixin
40
+ from trilogy.utility import unique
41
+
42
+
43
+ class ConceptTransform(BaseModel):
44
+ function: Function | FilterItem | WindowItem | AggregateWrapper
45
+ output: Concept # this has to be a full concept, as it may not exist in environment
46
+ modifiers: List[Modifier] = Field(default_factory=list)
47
+
48
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
49
+ return ConceptTransform(
50
+ function=self.function.with_merge(source, target, modifiers),
51
+ output=self.output.with_merge(source, target, modifiers),
52
+ modifiers=self.modifiers + modifiers,
53
+ )
54
+
55
+ def with_namespace(self, namespace: str) -> "ConceptTransform":
56
+ return ConceptTransform(
57
+ function=self.function.with_namespace(namespace),
58
+ output=self.output.with_namespace(namespace),
59
+ modifiers=self.modifiers,
60
+ )
61
+
62
+
63
+ class SelectItem(BaseModel):
64
+ content: Union[ConceptTransform, ConceptRef]
65
+ modifiers: List[Modifier] = Field(default_factory=list)
66
+
67
+ @field_validator("content", mode="before")
68
+ def parse_content(cls, v):
69
+ if isinstance(v, Concept):
70
+ return v.reference
71
+ return v
72
+
73
+ @property
74
+ def concept(self) -> ConceptRef:
75
+ if isinstance(self.content, (ConceptRef)):
76
+ return self.content
77
+ elif isinstance(self.content, Concept):
78
+ return self.content.reference
79
+ return self.content.output.reference
80
+
81
+ @property
82
+ def is_undefined(self) -> bool:
83
+ return True if isinstance(self.content, UndefinedConcept) else False
84
+
85
+
86
+ class SelectStatement(HasUUID, SelectTypeMixin, BaseModel):
87
+ selection: List[SelectItem]
88
+ order_by: Optional[OrderBy] = None
89
+ limit: Optional[int] = None
90
+ meta: Metadata = Field(default_factory=lambda: Metadata())
91
+ local_concepts: Annotated[
92
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
93
+ ] = Field(default_factory=EnvironmentConceptDict)
94
+ grain: Grain = Field(default_factory=Grain)
95
+
96
+ def as_lineage(self, environment: Environment) -> SelectLineage:
97
+ return SelectLineage(
98
+ selection=[
99
+ environment.concepts[x.concept.address].reference
100
+ for x in self.selection
101
+ ],
102
+ order_by=self.order_by,
103
+ limit=self.limit,
104
+ where_clause=self.where_clause,
105
+ having_clause=self.having_clause,
106
+ local_concepts=self.local_concepts,
107
+ hidden_components=self.hidden_components,
108
+ grain=self.grain,
109
+ )
110
+
111
+ @classmethod
112
+ def from_inputs(
113
+ cls,
114
+ environment: Environment,
115
+ selection: List[SelectItem],
116
+ order_by: OrderBy | None = None,
117
+ limit: int | None = None,
118
+ meta: Metadata | None = None,
119
+ where_clause: WhereClause | None = None,
120
+ having_clause: HavingClause | None = None,
121
+ ) -> "SelectStatement":
122
+ output = SelectStatement(
123
+ selection=selection,
124
+ where_clause=where_clause,
125
+ having_clause=having_clause,
126
+ limit=limit,
127
+ order_by=order_by,
128
+ meta=meta or Metadata(),
129
+ )
130
+ output.grain = output.calculate_grain(environment)
131
+ for x in selection:
132
+
133
+ if x.is_undefined and environment.concepts.fail_on_missing:
134
+ environment.concepts.raise_undefined(
135
+ x.concept.address, meta.line_number if meta else None
136
+ )
137
+ elif isinstance(x.content, ConceptTransform):
138
+ if isinstance(x.content.output, UndefinedConcept):
139
+ continue
140
+ if (
141
+ CONFIG.select_as_definition
142
+ and not environment.frozen
143
+ and x.concept.address not in environment.concepts
144
+ ):
145
+ environment.add_concept(x.content.output)
146
+ x.content.output = x.content.output.set_select_grain(
147
+ output.grain, environment
148
+ )
149
+ # we might not need this
150
+ output.local_concepts[x.content.output.address] = x.content.output
151
+
152
+ elif isinstance(x.content, ConceptRef):
153
+ output.local_concepts[x.content.address] = environment.concepts[
154
+ x.content.address
155
+ ] # .set_select_grain(output.grain, environment)
156
+
157
+ output.validate_syntax(environment)
158
+ return output
159
+
160
+ def calculate_grain(self, environment: Environment | None = None) -> Grain:
161
+ targets = []
162
+ for x in self.selection:
163
+ targets.append(x.concept)
164
+ result = Grain.from_concepts(
165
+ targets, where_clause=self.where_clause, environment=environment
166
+ )
167
+ return result
168
+
169
+ def validate_syntax(self, environment: Environment):
170
+ if self.where_clause:
171
+ for x in self.where_clause.concept_arguments:
172
+ if isinstance(x, UndefinedConcept):
173
+ environment.concepts.raise_undefined(
174
+ x.address, x.metadata.line_number if x.metadata else None
175
+ )
176
+ all_in_output = [x for x in self.output_components]
177
+ if self.where_clause:
178
+ for cref in self.where_clause.concept_arguments:
179
+ concept = environment.concepts[cref.address]
180
+ if isinstance(concept, UndefinedConcept):
181
+ continue
182
+ if (
183
+ concept.lineage
184
+ and isinstance(concept.lineage, Function)
185
+ and concept.lineage.operator
186
+ in FunctionClass.AGGREGATE_FUNCTIONS.value
187
+ ):
188
+ if concept.address in self.locally_derived:
189
+ raise SyntaxError(
190
+ f"Cannot reference an aggregate derived in the select ({concept.address}) in the same statement where clause; move to the HAVING clause instead; Line: {self.meta.line_number}"
191
+ )
192
+
193
+ if (
194
+ concept.lineage
195
+ and isinstance(concept.lineage, AggregateWrapper)
196
+ and concept.lineage.function.operator
197
+ in FunctionClass.AGGREGATE_FUNCTIONS.value
198
+ ):
199
+ if concept.address in self.locally_derived:
200
+ raise SyntaxError(
201
+ f"Cannot reference an aggregate derived in the select ({concept.address}) in the same statement where clause; move to the HAVING clause instead; Line: {self.meta.line_number}"
202
+ )
203
+ if self.having_clause:
204
+ for cref in self.having_clause.concept_arguments:
205
+ if cref.address not in [x for x in self.output_components]:
206
+ raise SyntaxError(
207
+ f"Cannot reference a column ({cref.address}) that is not in the select projection in the HAVING clause, move to WHERE; Line: {self.meta.line_number}"
208
+ )
209
+ if self.order_by:
210
+ for cref in self.order_by.concept_arguments:
211
+ if cref.address not in all_in_output:
212
+ raise SyntaxError(
213
+ f"Cannot order by column {cref.address} that is not in the output projection; line: {self.meta.line_number}"
214
+ )
215
+
216
+ def __str__(self):
217
+ from trilogy.parsing.render import render_query
218
+
219
+ return render_query(self)
220
+
221
+ @field_validator("selection", mode="before")
222
+ @classmethod
223
+ def selection_validation(cls, v):
224
+ new = []
225
+ for item in v:
226
+ if isinstance(item, (Concept, ConceptTransform)):
227
+ new.append(SelectItem(content=item))
228
+ else:
229
+ new.append(item)
230
+ return new
231
+
232
+ @property
233
+ def locally_derived(self) -> set[str]:
234
+ locally_derived: set[str] = set()
235
+ for item in self.selection:
236
+ if isinstance(item.content, ConceptTransform):
237
+ locally_derived.add(item.concept.address)
238
+ return locally_derived
239
+
240
+ @property
241
+ def output_components(self) -> List[ConceptRef]:
242
+ return [x.concept for x in self.selection]
243
+
244
+ @property
245
+ def hidden_components(self) -> set[str]:
246
+ return set(
247
+ x.concept.address for x in self.selection if Modifier.HIDDEN in x.modifiers
248
+ )
249
+
250
+ def to_datasource(
251
+ self,
252
+ namespace: str,
253
+ name: str,
254
+ address: Address,
255
+ environment: Environment,
256
+ grain: Grain | None = None,
257
+ ) -> Datasource:
258
+ if self.where_clause or self.having_clause:
259
+ modifiers = [Modifier.PARTIAL]
260
+ else:
261
+ modifiers = []
262
+ columns = [
263
+ # TODO: replace hardcoded replacement here
264
+ # if the concept is a locally derived concept, it cannot ever be partial
265
+ # but if it's a concept pulled in from upstream and we have a where clause, it should be partial
266
+ ColumnAssignment(
267
+ alias=c.address.replace(".", "_"),
268
+ concept=environment.concepts[c.address].reference,
269
+ modifiers=modifiers if c.address not in self.locally_derived else [],
270
+ )
271
+ for c in self.output_components
272
+ ]
273
+
274
+ condition = None
275
+ if self.where_clause:
276
+ condition = self.where_clause.conditional
277
+ if self.having_clause:
278
+ if condition:
279
+ condition = self.having_clause.conditional + condition
280
+ else:
281
+ condition = self.having_clause.conditional
282
+
283
+ new_datasource = Datasource(
284
+ name=name,
285
+ address=address,
286
+ grain=grain or self.grain,
287
+ columns=columns,
288
+ namespace=namespace,
289
+ non_partial_for=WhereClause(conditional=condition) if condition else None,
290
+ )
291
+ return new_datasource
292
+
293
+
294
+ class RawSQLStatement(BaseModel):
295
+ text: str
296
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
297
+
298
+
299
+ class CopyStatement(BaseModel):
300
+ target: str
301
+ target_type: IOType
302
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
303
+ select: SelectStatement
304
+
305
+
306
+ class MultiSelectStatement(HasUUID, SelectTypeMixin, BaseModel):
307
+ selects: List[SelectStatement]
308
+ align: AlignClause
309
+ namespace: str
310
+ derived_concepts: List[Concept]
311
+ order_by: Optional[OrderBy] = None
312
+ limit: Optional[int] = None
313
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
314
+ local_concepts: Annotated[
315
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
316
+ ] = Field(default_factory=EnvironmentConceptDict)
317
+
318
+ def as_lineage(self, environment: Environment):
319
+ return MultiSelectLineage(
320
+ selects=[x.as_lineage(environment) for x in self.selects],
321
+ align=self.align,
322
+ namespace=self.namespace,
323
+ # derived_concepts = self.derived_concepts,
324
+ limit=self.limit,
325
+ order_by=self.order_by,
326
+ where_clause=self.where_clause,
327
+ having_clause=self.having_clause,
328
+ hidden_components=self.hidden_components,
329
+ )
330
+
331
+ def __repr__(self):
332
+ return "MultiSelect<" + " MERGE ".join([str(s) for s in self.selects]) + ">"
333
+
334
+ @property
335
+ def grain(self):
336
+ base = Grain()
337
+ for select in self.selects:
338
+ base += select.grain
339
+ return base
340
+
341
+ @property
342
+ def output_components(self) -> List[ConceptRef]:
343
+ output = [x.reference for x in self.derived_concepts]
344
+ for select in self.selects:
345
+ output += [
346
+ x
347
+ for x in select.output_components
348
+ if x.address not in select.hidden_components
349
+ ]
350
+ return unique(output, "address")
351
+
352
+ @computed_field # type: ignore
353
+ @cached_property
354
+ def hidden_components(self) -> set[str]:
355
+ output: set[str] = set()
356
+ for select in self.selects:
357
+ output = output.union(select.hidden_components)
358
+ return output
359
+
360
+
361
+ class RowsetDerivationStatement(HasUUID, BaseModel):
362
+ name: str
363
+ select: SelectStatement | MultiSelectStatement
364
+ namespace: str
365
+
366
+ def __repr__(self):
367
+ return f"RowsetDerivation<{str(self.select)}>"
368
+
369
+ def __str__(self):
370
+ return self.__repr__()
371
+
372
+
373
+ class MergeStatementV2(HasUUID, BaseModel):
374
+ sources: list[Concept]
375
+ targets: dict[str, Concept]
376
+ source_wildcard: str | None = None
377
+ target_wildcard: str | None = None
378
+ modifiers: List[Modifier] = Field(default_factory=list)
379
+
380
+
381
+ class ImportStatement(HasUUID, BaseModel):
382
+ alias: str
383
+ path: Path
384
+
385
+
386
+ class PersistStatement(HasUUID, BaseModel):
387
+ datasource: Datasource
388
+ select: SelectStatement
389
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
390
+
391
+ @property
392
+ def identifier(self):
393
+ return self.datasource.identifier
394
+
395
+ @property
396
+ def address(self):
397
+ return self.datasource.address
398
+
399
+
400
+ class ShowStatement(BaseModel):
401
+ content: SelectStatement | PersistStatement | ShowCategory
402
+
403
+
404
+ class Limit(BaseModel):
405
+ count: int
406
+
407
+
408
+ class ConceptDeclarationStatement(HasUUID, BaseModel):
409
+ concept: Concept
410
+
411
+
412
+ class ConceptDerivationStatement(BaseModel):
413
+ concept: Concept
File without changes
@@ -0,0 +1,30 @@
1
+ from typing import List, Union
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from trilogy.core.enums import IOType
6
+ from trilogy.core.models.author import ConceptRef, HavingClause, WhereClause
7
+ from trilogy.core.models.datasource import Address, Datasource
8
+
9
+
10
+ class CopyQueryMixin(BaseModel):
11
+ target: str
12
+ target_type: IOType
13
+
14
+
15
+ class MaterializedDataset(BaseModel):
16
+ address: Address
17
+
18
+
19
+ class PersistQueryMixin(BaseModel):
20
+ output_to: MaterializedDataset
21
+ datasource: Datasource
22
+
23
+
24
+ class SelectTypeMixin(BaseModel):
25
+ where_clause: Union["WhereClause", None] = Field(default=None)
26
+ having_clause: Union["HavingClause", None] = Field(default=None)
27
+
28
+ @property
29
+ def output_components(self) -> List[ConceptRef]:
30
+ raise NotImplementedError
@@ -0,0 +1,42 @@
1
+ from typing import Annotated, List, Optional, Union
2
+
3
+ from pydantic import BaseModel, Field
4
+ from pydantic.functional_validators import PlainValidator
5
+
6
+ from trilogy.core.models.build import (
7
+ BuildConcept,
8
+ BuildDatasource,
9
+ BuildOrderBy,
10
+ )
11
+ from trilogy.core.models.environment import EnvironmentConceptDict, validate_concepts
12
+ from trilogy.core.models.execute import CTE, UnionCTE
13
+ from trilogy.core.statements.common import CopyQueryMixin, PersistQueryMixin
14
+
15
+
16
+ class ProcessedQuery(BaseModel):
17
+ output_columns: List[BuildConcept]
18
+ ctes: List[CTE | UnionCTE]
19
+ base: CTE | UnionCTE
20
+ hidden_columns: set[str] = Field(default_factory=set)
21
+ limit: Optional[int] = None
22
+ order_by: Optional[BuildOrderBy] = None
23
+ local_concepts: Annotated[
24
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
25
+ ] = Field(default_factory=EnvironmentConceptDict)
26
+
27
+
28
+ class ProcessedQueryPersist(ProcessedQuery, PersistQueryMixin):
29
+ pass
30
+
31
+
32
+ class ProcessedCopyStatement(ProcessedQuery, CopyQueryMixin):
33
+ pass
34
+
35
+
36
+ class ProcessedRawSQLStatement(BaseModel):
37
+ text: str
38
+
39
+
40
+ class ProcessedShowStatement(BaseModel):
41
+ output_columns: List[BuildConcept]
42
+ output_values: List[Union[BuildConcept, BuildDatasource, ProcessedQuery]]