pytrilogy 0.0.1.102__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (77) hide show
  1. pytrilogy-0.0.1.102.dist-info/LICENSE.md +19 -0
  2. pytrilogy-0.0.1.102.dist-info/METADATA +277 -0
  3. pytrilogy-0.0.1.102.dist-info/RECORD +77 -0
  4. pytrilogy-0.0.1.102.dist-info/WHEEL +5 -0
  5. pytrilogy-0.0.1.102.dist-info/entry_points.txt +2 -0
  6. pytrilogy-0.0.1.102.dist-info/top_level.txt +1 -0
  7. trilogy/__init__.py +8 -0
  8. trilogy/compiler.py +0 -0
  9. trilogy/constants.py +30 -0
  10. trilogy/core/__init__.py +0 -0
  11. trilogy/core/constants.py +3 -0
  12. trilogy/core/enums.py +270 -0
  13. trilogy/core/env_processor.py +33 -0
  14. trilogy/core/environment_helpers.py +156 -0
  15. trilogy/core/ergonomics.py +187 -0
  16. trilogy/core/exceptions.py +23 -0
  17. trilogy/core/functions.py +320 -0
  18. trilogy/core/graph_models.py +55 -0
  19. trilogy/core/internal.py +37 -0
  20. trilogy/core/models.py +3145 -0
  21. trilogy/core/processing/__init__.py +0 -0
  22. trilogy/core/processing/concept_strategies_v3.py +603 -0
  23. trilogy/core/processing/graph_utils.py +44 -0
  24. trilogy/core/processing/node_generators/__init__.py +25 -0
  25. trilogy/core/processing/node_generators/basic_node.py +71 -0
  26. trilogy/core/processing/node_generators/common.py +239 -0
  27. trilogy/core/processing/node_generators/concept_merge.py +152 -0
  28. trilogy/core/processing/node_generators/filter_node.py +83 -0
  29. trilogy/core/processing/node_generators/group_node.py +92 -0
  30. trilogy/core/processing/node_generators/group_to_node.py +99 -0
  31. trilogy/core/processing/node_generators/merge_node.py +148 -0
  32. trilogy/core/processing/node_generators/multiselect_node.py +189 -0
  33. trilogy/core/processing/node_generators/rowset_node.py +130 -0
  34. trilogy/core/processing/node_generators/select_node.py +328 -0
  35. trilogy/core/processing/node_generators/unnest_node.py +37 -0
  36. trilogy/core/processing/node_generators/window_node.py +85 -0
  37. trilogy/core/processing/nodes/__init__.py +76 -0
  38. trilogy/core/processing/nodes/base_node.py +251 -0
  39. trilogy/core/processing/nodes/filter_node.py +49 -0
  40. trilogy/core/processing/nodes/group_node.py +110 -0
  41. trilogy/core/processing/nodes/merge_node.py +326 -0
  42. trilogy/core/processing/nodes/select_node_v2.py +198 -0
  43. trilogy/core/processing/nodes/unnest_node.py +54 -0
  44. trilogy/core/processing/nodes/window_node.py +34 -0
  45. trilogy/core/processing/utility.py +278 -0
  46. trilogy/core/query_processor.py +331 -0
  47. trilogy/dialect/__init__.py +0 -0
  48. trilogy/dialect/base.py +679 -0
  49. trilogy/dialect/bigquery.py +80 -0
  50. trilogy/dialect/common.py +43 -0
  51. trilogy/dialect/config.py +55 -0
  52. trilogy/dialect/duckdb.py +83 -0
  53. trilogy/dialect/enums.py +95 -0
  54. trilogy/dialect/postgres.py +86 -0
  55. trilogy/dialect/presto.py +82 -0
  56. trilogy/dialect/snowflake.py +82 -0
  57. trilogy/dialect/sql_server.py +89 -0
  58. trilogy/docs/__init__.py +0 -0
  59. trilogy/engine.py +48 -0
  60. trilogy/executor.py +242 -0
  61. trilogy/hooks/__init__.py +0 -0
  62. trilogy/hooks/base_hook.py +37 -0
  63. trilogy/hooks/graph_hook.py +24 -0
  64. trilogy/hooks/query_debugger.py +133 -0
  65. trilogy/metadata/__init__.py +0 -0
  66. trilogy/parser.py +10 -0
  67. trilogy/parsing/__init__.py +0 -0
  68. trilogy/parsing/common.py +176 -0
  69. trilogy/parsing/config.py +5 -0
  70. trilogy/parsing/exceptions.py +2 -0
  71. trilogy/parsing/helpers.py +1 -0
  72. trilogy/parsing/parse_engine.py +1951 -0
  73. trilogy/parsing/render.py +483 -0
  74. trilogy/py.typed +0 -0
  75. trilogy/scripts/__init__.py +0 -0
  76. trilogy/scripts/trilogy.py +127 -0
  77. trilogy/utility.py +31 -0
@@ -0,0 +1,251 @@
1
+ from typing import List, Optional, Sequence
2
+ from collections import defaultdict
3
+
4
+ from trilogy.core.models import (
5
+ Grain,
6
+ QueryDatasource,
7
+ SourceType,
8
+ Concept,
9
+ Environment,
10
+ Conditional,
11
+ UnnestJoin,
12
+ Datasource,
13
+ Comparison,
14
+ Parenthetical,
15
+ LooseConceptList,
16
+ )
17
+ from trilogy.core.enums import Purpose, JoinType, PurposeLineage, Granularity
18
+ from trilogy.utility import unique
19
+ from dataclasses import dataclass
20
+
21
+
22
+ def concept_list_to_grain(
23
+ inputs: List[Concept], parent_sources: Sequence[QueryDatasource | Datasource]
24
+ ) -> Grain:
25
+ candidates = [
26
+ c
27
+ for c in inputs
28
+ if c.purpose == Purpose.KEY and c.granularity != Granularity.SINGLE_ROW
29
+ ]
30
+ for x in inputs:
31
+ if x.granularity == Granularity.SINGLE_ROW:
32
+ continue
33
+ if x.purpose == Purpose.PROPERTY and not any(
34
+ [key in candidates for key in (x.keys or [])]
35
+ ):
36
+ candidates.append(x)
37
+ elif x.purpose == Purpose.CONSTANT:
38
+ candidates.append(x)
39
+ elif x.purpose == Purpose.METRIC:
40
+ # metrics that were previously calculated must be included in grain
41
+ if any([x in parent.output_concepts for parent in parent_sources]):
42
+ candidates.append(x)
43
+
44
+ return Grain(components=candidates)
45
+
46
+
47
+ def resolve_concept_map(
48
+ inputs: List[QueryDatasource],
49
+ targets: List[Concept],
50
+ inherited_inputs: List[Concept],
51
+ full_joins: List[Concept] | None = None,
52
+ ) -> dict[str, set[Datasource | QueryDatasource | UnnestJoin]]:
53
+ targets = targets or []
54
+ concept_map: dict[str, set[Datasource | QueryDatasource | UnnestJoin]] = (
55
+ defaultdict(set)
56
+ )
57
+ full_addresses = {c.address for c in full_joins} if full_joins else set()
58
+ for input in inputs:
59
+ for concept in input.output_concepts:
60
+ if concept.address not in input.non_partial_concept_addresses:
61
+ continue
62
+ if concept.address not in [t.address for t in inherited_inputs]:
63
+ continue
64
+ if concept.address in full_addresses:
65
+ concept_map[concept.address].add(input)
66
+ elif concept.address not in concept_map:
67
+ concept_map[concept.address].add(input)
68
+
69
+ # second loop, include partials
70
+ for input in inputs:
71
+ for concept in input.output_concepts:
72
+ if concept.address not in [t.address for t in inherited_inputs]:
73
+ continue
74
+ if len(concept_map.get(concept.address, [])) == 0:
75
+ concept_map[concept.address].add(input)
76
+ # this adds our new derived metrics, which are not created in this CTE
77
+ for target in targets:
78
+ if target not in inherited_inputs:
79
+ # an empty source means it is defined in this CTE
80
+ concept_map[target.address] = set()
81
+ return concept_map
82
+
83
+
84
+ def get_all_parent_partial(all_concepts: List[Concept], parents: List["StrategyNode"]):
85
+ return [
86
+ c
87
+ for c in all_concepts
88
+ if len([c.address in [x.address for x in p.partial_concepts] for p in parents])
89
+ >= 1
90
+ and all([c.address in [x.address for x in p.partial_concepts] for p in parents])
91
+ ]
92
+
93
+
94
+ class StrategyNode:
95
+ source_type = SourceType.ABSTRACT
96
+
97
+ def __init__(
98
+ self,
99
+ input_concepts: List[Concept],
100
+ output_concepts: List[Concept],
101
+ environment: Environment,
102
+ g,
103
+ whole_grain: bool = False,
104
+ parents: List["StrategyNode"] | None = None,
105
+ partial_concepts: List[Concept] | None = None,
106
+ depth: int = 0,
107
+ conditions: Conditional | Comparison | Parenthetical | None = None,
108
+ force_group: bool | None = None,
109
+ grain: Optional[Grain] = None,
110
+ ):
111
+ self.input_concepts: List[Concept] = (
112
+ unique(input_concepts, "address") if input_concepts else []
113
+ )
114
+ self.input_lcl = LooseConceptList(concepts=self.input_concepts)
115
+ self.output_concepts: List[Concept] = unique(output_concepts, "address")
116
+ self.output_lcl = LooseConceptList(concepts=self.output_concepts)
117
+
118
+ self.environment = environment
119
+ self.g = g
120
+ self.whole_grain = whole_grain
121
+ self.parents = parents or []
122
+ self.resolution_cache: Optional[QueryDatasource] = None
123
+ self.partial_concepts = partial_concepts or get_all_parent_partial(
124
+ self.output_concepts, self.parents
125
+ )
126
+ self.partial_lcl = LooseConceptList(concepts=self.partial_concepts)
127
+ self.depth = depth
128
+ self.conditions = conditions
129
+ self.grain = grain
130
+ self.force_group = force_group
131
+ self.tainted = False
132
+ for parent in self.parents:
133
+ if not parent:
134
+ raise SyntaxError("Unresolvable parent")
135
+
136
+ def add_output_concept(self, concept: Concept):
137
+ self.output_concepts.append(concept)
138
+ self.output_lcl = LooseConceptList(concepts=self.output_concepts)
139
+ self.rebuild_cache()
140
+
141
+ @property
142
+ def logging_prefix(self) -> str:
143
+ return "\t" * self.depth
144
+
145
+ @property
146
+ def all_concepts(self) -> list[Concept]:
147
+ return [*self.output_concepts]
148
+
149
+ @property
150
+ def all_used_concepts(self) -> list[Concept]:
151
+ return [*self.input_concepts]
152
+
153
+ def __repr__(self):
154
+ concepts = self.all_concepts
155
+ contents = ",".join(sorted([c.address for c in concepts]))
156
+ return f"{self.__class__.__name__}<{contents}>"
157
+
158
+ def _resolve(self) -> QueryDatasource:
159
+ parent_sources = [p.resolve() for p in self.parents]
160
+
161
+ # if conditional:
162
+ # for condition in conditions[1:]:
163
+ # conditional += condition
164
+ grain = Grain(components=self.output_concepts)
165
+ source_map = resolve_concept_map(
166
+ parent_sources, self.output_concepts, self.input_concepts
167
+ )
168
+ return QueryDatasource(
169
+ input_concepts=self.input_concepts,
170
+ output_concepts=self.output_concepts,
171
+ datasources=parent_sources,
172
+ source_type=self.source_type,
173
+ source_map=source_map,
174
+ joins=[],
175
+ grain=grain,
176
+ condition=self.conditions,
177
+ partial_concepts=self.partial_concepts,
178
+ force_group=self.force_group,
179
+ )
180
+
181
+ def rebuild_cache(self) -> QueryDatasource:
182
+ self.tainted = True
183
+ if not self.resolution_cache:
184
+ return self.resolve()
185
+ self.resolution_cache = None
186
+ return self.resolve()
187
+
188
+ def resolve(self) -> QueryDatasource:
189
+ if self.resolution_cache:
190
+ return self.resolution_cache
191
+ qds = self._resolve()
192
+ self.resolution_cache = qds
193
+ return qds
194
+
195
+
196
+ @dataclass
197
+ class NodeJoin:
198
+ left_node: StrategyNode
199
+ right_node: StrategyNode
200
+ concepts: List[Concept]
201
+ join_type: JoinType
202
+ filter_to_mutual: bool = False
203
+
204
+ def __post_init__(self):
205
+ final_concepts = []
206
+ for concept in self.concepts:
207
+ include = True
208
+ for ds in [self.left_node, self.right_node]:
209
+ if concept.address not in [c.address for c in ds.all_concepts]:
210
+ if self.filter_to_mutual:
211
+ include = False
212
+ else:
213
+ raise SyntaxError(
214
+ f"Invalid join, missing {concept} on {str(ds)}, have"
215
+ f" {[c.address for c in ds.all_concepts]}"
216
+ )
217
+ if include:
218
+ final_concepts.append(concept)
219
+ if not final_concepts and self.concepts:
220
+ # if one datasource only has constants
221
+ # we can join on 1=1
222
+ for ds in [self.left_node, self.right_node]:
223
+ if all(
224
+ [c.derivation == PurposeLineage.CONSTANT for c in ds.all_concepts]
225
+ ):
226
+ self.concepts = []
227
+ return
228
+
229
+ left_keys = [c.address for c in self.left_node.all_concepts]
230
+ right_keys = [c.address for c in self.right_node.all_concepts]
231
+ match_concepts = [c.address for c in self.concepts]
232
+ raise SyntaxError(
233
+ "No mutual join keys found between"
234
+ f" {self.left_node} and"
235
+ f" {self.right_node}, left_keys {left_keys},"
236
+ f" right_keys {right_keys},"
237
+ f" provided join concepts {match_concepts}"
238
+ )
239
+ self.concepts = final_concepts
240
+
241
+ @property
242
+ def unique_id(self) -> str:
243
+ nodes = sorted([self.left_node, self.right_node], key=lambda x: str(x))
244
+ return str(nodes) + self.join_type.value
245
+
246
+ def __str__(self):
247
+ return (
248
+ f"{self.join_type.value} JOIN {self.left_node} and"
249
+ f" {self.right_node} on"
250
+ f" {','.join([str(k) for k in self.concepts])}"
251
+ )
@@ -0,0 +1,49 @@
1
+ from typing import List
2
+
3
+
4
+ from trilogy.core.models import (
5
+ SourceType,
6
+ Concept,
7
+ Conditional,
8
+ Comparison,
9
+ Parenthetical,
10
+ )
11
+ from trilogy.core.processing.nodes.base_node import StrategyNode
12
+
13
+
14
+ class FilterNode(StrategyNode):
15
+ """Filter nodes represent a restriction operation
16
+ on a concept that creates a new derived concept.
17
+
18
+ They should only output a concept and it's filtered
19
+ version, but will have parents that provide all required
20
+ filtering keys as inputs.
21
+ """
22
+
23
+ source_type = SourceType.FILTER
24
+
25
+ def __init__(
26
+ self,
27
+ input_concepts: List[Concept],
28
+ output_concepts: List[Concept],
29
+ environment,
30
+ g,
31
+ whole_grain: bool = False,
32
+ parents: List["StrategyNode"] | None = None,
33
+ depth: int = 0,
34
+ conditions: Conditional | Comparison | Parenthetical | None = None,
35
+ partial_concepts: List[Concept] | None = None,
36
+ force_group: bool = False,
37
+ ):
38
+ super().__init__(
39
+ output_concepts=output_concepts,
40
+ environment=environment,
41
+ g=g,
42
+ whole_grain=whole_grain,
43
+ parents=parents,
44
+ depth=depth,
45
+ input_concepts=input_concepts,
46
+ conditions=conditions,
47
+ partial_concepts=partial_concepts,
48
+ force_group=force_group,
49
+ )
@@ -0,0 +1,110 @@
1
+ from typing import List, Optional
2
+
3
+ from trilogy.constants import logger
4
+ from trilogy.core.models import (
5
+ Grain,
6
+ QueryDatasource,
7
+ SourceType,
8
+ Concept,
9
+ Environment,
10
+ LooseConceptList,
11
+ )
12
+ from trilogy.core.processing.nodes.base_node import (
13
+ StrategyNode,
14
+ resolve_concept_map,
15
+ concept_list_to_grain,
16
+ )
17
+
18
+
19
+ LOGGER_PREFIX = "[CONCEPT DETAIL - GROUP NODE]"
20
+
21
+
22
+ class GroupNode(StrategyNode):
23
+ source_type = SourceType.GROUP
24
+
25
+ def __init__(
26
+ self,
27
+ output_concepts: List[Concept],
28
+ input_concepts: List[Concept],
29
+ environment: Environment,
30
+ g,
31
+ whole_grain: bool = False,
32
+ parents: List["StrategyNode"] | None = None,
33
+ depth: int = 0,
34
+ partial_concepts: Optional[List[Concept]] = None,
35
+ ):
36
+ super().__init__(
37
+ input_concepts=input_concepts,
38
+ output_concepts=output_concepts,
39
+ environment=environment,
40
+ g=g,
41
+ whole_grain=whole_grain,
42
+ parents=parents,
43
+ depth=depth,
44
+ partial_concepts=partial_concepts,
45
+ )
46
+
47
+ def _resolve(self) -> QueryDatasource:
48
+ parent_sources: list[QueryDatasource] = [p.resolve() for p in self.parents]
49
+
50
+ grain = concept_list_to_grain(self.output_concepts, [])
51
+ comp_grain = Grain()
52
+ for source in parent_sources:
53
+ comp_grain += source.grain
54
+
55
+ # dynamically select if we need to group
56
+ # because sometimes, we are already at required grain
57
+ if comp_grain == grain and self.output_lcl == self.input_lcl:
58
+ # if there is no group by, and inputs equal outputs
59
+ # return the parent
60
+ logger.info(
61
+ f"{self.logging_prefix}{LOGGER_PREFIX} Output of group by node equals input of group by node"
62
+ f" {self.output_lcl}"
63
+ f" grains {comp_grain} and {grain}"
64
+ )
65
+ if (
66
+ len(parent_sources) == 1
67
+ and LooseConceptList(concepts=parent_sources[0].output_concepts)
68
+ == self.output_lcl
69
+ ):
70
+ logger.info(
71
+ f"{self.logging_prefix}{LOGGER_PREFIX} No group by required, returning parent node"
72
+ )
73
+ return parent_sources[0]
74
+ # otherwise if no group by, just treat it as a select
75
+ source_type = SourceType.SELECT
76
+ else:
77
+
78
+ logger.info(
79
+ f"{self.logging_prefix}{LOGGER_PREFIX} Group node has different output than input, forcing group"
80
+ f" {self.input_lcl}"
81
+ " vs"
82
+ f" {self.output_lcl}"
83
+ " and"
84
+ f" upstream grains {[str(source.grain) for source in parent_sources]}"
85
+ " vs"
86
+ f" target grain {grain}"
87
+ )
88
+ for parent in parent_sources:
89
+ logger.info(
90
+ f"{self.logging_prefix}{LOGGER_PREFIX} Parent node"
91
+ f" {[c.address for c in parent.output_concepts]}"
92
+ " grain"
93
+ f" {parent.grain}"
94
+ )
95
+ source_type = SourceType.GROUP
96
+ return QueryDatasource(
97
+ input_concepts=self.input_concepts,
98
+ output_concepts=self.output_concepts,
99
+ datasources=parent_sources,
100
+ source_type=source_type,
101
+ source_map=resolve_concept_map(
102
+ parent_sources,
103
+ targets=self.output_concepts,
104
+ inherited_inputs=self.input_concepts,
105
+ ),
106
+ joins=[],
107
+ grain=grain,
108
+ partial_concepts=self.partial_concepts,
109
+ condition=self.conditions,
110
+ )