pytrilogy 0.0.1.110__py3-none-any.whl → 0.0.1.111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (33) hide show
  1. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/RECORD +33 -33
  3. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +1 -1
  6. trilogy/core/models.py +85 -67
  7. trilogy/core/optimization.py +23 -8
  8. trilogy/core/processing/concept_strategies_v3.py +44 -19
  9. trilogy/core/processing/node_generators/basic_node.py +2 -0
  10. trilogy/core/processing/node_generators/common.py +3 -1
  11. trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
  12. trilogy/core/processing/node_generators/filter_node.py +36 -6
  13. trilogy/core/processing/node_generators/node_merge_node.py +34 -23
  14. trilogy/core/processing/node_generators/rowset_node.py +30 -6
  15. trilogy/core/processing/node_generators/select_node.py +23 -9
  16. trilogy/core/processing/node_generators/unnest_node.py +24 -3
  17. trilogy/core/processing/node_generators/window_node.py +4 -2
  18. trilogy/core/processing/nodes/__init__.py +7 -6
  19. trilogy/core/processing/nodes/base_node.py +40 -6
  20. trilogy/core/processing/nodes/filter_node.py +15 -1
  21. trilogy/core/processing/nodes/group_node.py +20 -1
  22. trilogy/core/processing/nodes/merge_node.py +36 -7
  23. trilogy/core/processing/nodes/select_node_v2.py +34 -39
  24. trilogy/core/processing/nodes/unnest_node.py +12 -0
  25. trilogy/core/processing/nodes/window_node.py +11 -0
  26. trilogy/core/processing/utility.py +0 -14
  27. trilogy/core/query_processor.py +125 -29
  28. trilogy/dialect/base.py +45 -40
  29. trilogy/executor.py +31 -3
  30. trilogy/parsing/parse_engine.py +49 -17
  31. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/LICENSE.md +0 -0
  32. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/entry_points.txt +0 -0
  33. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,11 @@ from typing import List
2
2
 
3
3
 
4
4
  from trilogy.core.models import Concept, Function
5
- from trilogy.core.processing.nodes import UnnestNode, History
5
+ from trilogy.core.processing.nodes import SelectNode, UnnestNode, History, StrategyNode
6
+ from trilogy.core.processing.utility import padding
7
+ from trilogy.constants import logger
8
+
9
+ LOGGER_PREFIX = "[GEN_ROWSET_NODE]"
6
10
 
7
11
 
8
12
  def gen_unnest_node(
@@ -13,7 +17,7 @@ def gen_unnest_node(
13
17
  depth: int,
14
18
  source_concepts,
15
19
  history: History | None = None,
16
- ) -> UnnestNode | None:
20
+ ) -> StrategyNode | None:
17
21
  arguments = []
18
22
  if isinstance(concept.lineage, Function):
19
23
  arguments = concept.lineage.concept_arguments
@@ -26,8 +30,12 @@ def gen_unnest_node(
26
30
  history=history,
27
31
  )
28
32
  if not parent:
33
+ logger.info(
34
+ f"{padding(depth)}{LOGGER_PREFIX} could not find unnest node parents"
35
+ )
29
36
  return None
30
- return UnnestNode(
37
+
38
+ base = UnnestNode(
31
39
  unnest_concept=concept,
32
40
  input_concepts=arguments + local_optional,
33
41
  output_concepts=[concept] + local_optional,
@@ -35,3 +43,16 @@ def gen_unnest_node(
35
43
  g=g,
36
44
  parents=([parent] if (arguments or local_optional) else []),
37
45
  )
46
+ # we need to sometimes nest an unnest node,
47
+ # as unnest operations are not valid in all situations
48
+ # TODO: inline this node when we can detect it's safe
49
+ new = SelectNode(
50
+ input_concepts=[concept] + local_optional,
51
+ output_concepts=[concept] + local_optional,
52
+ environment=environment,
53
+ g=g,
54
+ parents=[base],
55
+ )
56
+ qds = new.resolve()
57
+ assert qds.source_map[concept.address] == {base.resolve()}
58
+ return new
@@ -59,19 +59,21 @@ def gen_window_node(
59
59
  parents=[
60
60
  parent_node,
61
61
  ],
62
+ depth=depth,
62
63
  )
63
64
  window_node = MergeNode(
64
65
  parents=[_window_node],
65
66
  environment=environment,
66
67
  g=g,
67
- input_concepts=_window_node.input_concepts,
68
+ input_concepts=[concept] + _window_node.input_concepts,
68
69
  output_concepts=_window_node.output_concepts,
69
70
  grain=_window_node.grain,
70
71
  force_group=False,
72
+ depth=depth,
71
73
  )
72
74
  if not local_optional:
73
75
  return window_node
74
- logger.info(f"{padding(depth)}{LOGGER_PREFIX} group node requires enrichment")
76
+ logger.info(f"{padding(depth)}{LOGGER_PREFIX} window node requires enrichment")
75
77
  return gen_enrichment_node(
76
78
  window_node,
77
79
  join_keys=concept_to_relevant_joins(parent_concepts),
@@ -1,7 +1,7 @@
1
1
  from .filter_node import FilterNode
2
2
  from .group_node import GroupNode
3
3
  from .merge_node import MergeNode
4
- from .select_node_v2 import SelectNode, StaticSelectNode, ConstantNode
4
+ from .select_node_v2 import SelectNode, ConstantNode
5
5
  from .window_node import WindowNode
6
6
  from .base_node import StrategyNode, NodeJoin
7
7
  from .unnest_node import UnnestNode
@@ -37,10 +37,12 @@ class History(BaseModel):
37
37
  raise ValueError(
38
38
  f"Parent key {parent_key} is the same as the current key {key}"
39
39
  )
40
- return self.history.get(
41
- key,
42
- False,
43
- )
40
+ if key in self.history:
41
+ node = self.history[key]
42
+ if node:
43
+ return node.copy()
44
+ return node
45
+ return False
44
46
 
45
47
  def log_start(
46
48
  self,
@@ -125,7 +127,6 @@ __all__ = [
125
127
  "GroupNode",
126
128
  "MergeNode",
127
129
  "SelectNode",
128
- "StaticSelectNode",
129
130
  "WindowNode",
130
131
  "StrategyNode",
131
132
  "NodeJoin",
@@ -17,6 +17,7 @@ from trilogy.core.models import (
17
17
  from trilogy.core.enums import Purpose, JoinType, PurposeLineage, Granularity
18
18
  from trilogy.utility import unique
19
19
  from dataclasses import dataclass
20
+ from trilogy.constants import logger
20
21
 
21
22
 
22
23
  def concept_list_to_grain(
@@ -55,11 +56,18 @@ def resolve_concept_map(
55
56
  defaultdict(set)
56
57
  )
57
58
  full_addresses = {c.address for c in full_joins} if full_joins else set()
59
+ inherited = set([t.address for t in inherited_inputs])
58
60
  for input in inputs:
59
61
  for concept in input.output_concepts:
62
+ logger.info(concept.address)
60
63
  if concept.address not in input.non_partial_concept_addresses:
61
64
  continue
62
- if concept.address not in [t.address for t in inherited_inputs]:
65
+ if concept.address not in inherited:
66
+ continue
67
+ if (
68
+ isinstance(input, QueryDatasource)
69
+ and concept.address in input.hidden_concepts
70
+ ):
63
71
  continue
64
72
  if concept.address in full_addresses:
65
73
  concept_map[concept.address].add(input)
@@ -71,11 +79,16 @@ def resolve_concept_map(
71
79
  for concept in input.output_concepts:
72
80
  if concept.address not in [t.address for t in inherited_inputs]:
73
81
  continue
82
+ if (
83
+ isinstance(input, QueryDatasource)
84
+ and concept.address in input.hidden_concepts
85
+ ):
86
+ continue
74
87
  if len(concept_map.get(concept.address, [])) == 0:
75
88
  concept_map[concept.address].add(input)
76
89
  # this adds our new derived metrics, which are not created in this CTE
77
90
  for target in targets:
78
- if target not in inherited_inputs:
91
+ if target.address not in inherited:
79
92
  # an empty source means it is defined in this CTE
80
93
  concept_map[target.address] = set()
81
94
  return concept_map
@@ -108,6 +121,8 @@ class StrategyNode:
108
121
  force_group: bool | None = None,
109
122
  grain: Optional[Grain] = None,
110
123
  hidden_concepts: List[Concept] | None = None,
124
+ existence_concepts: List[Concept] | None = None,
125
+ virtual_output_concepts: List[Concept] | None = None,
111
126
  ):
112
127
  self.input_concepts: List[Concept] = (
113
128
  unique(input_concepts, "address") if input_concepts else []
@@ -131,6 +146,8 @@ class StrategyNode:
131
146
  self.force_group = force_group
132
147
  self.tainted = False
133
148
  self.hidden_concepts = hidden_concepts or []
149
+ self.existence_concepts = existence_concepts or []
150
+ self.virtual_output_concepts = virtual_output_concepts or []
134
151
  for parent in self.parents:
135
152
  if not parent:
136
153
  raise SyntaxError("Unresolvable parent")
@@ -162,12 +179,11 @@ class StrategyNode:
162
179
  p.resolve() for p in self.parents
163
180
  ]
164
181
 
165
- # if conditional:
166
- # for condition in conditions[1:]:
167
- # conditional += condition
168
182
  grain = Grain(components=self.output_concepts)
169
183
  source_map = resolve_concept_map(
170
- parent_sources, self.output_concepts, self.input_concepts
184
+ parent_sources,
185
+ self.output_concepts,
186
+ self.input_concepts + self.existence_concepts,
171
187
  )
172
188
  return QueryDatasource(
173
189
  input_concepts=self.input_concepts,
@@ -197,6 +213,24 @@ class StrategyNode:
197
213
  self.resolution_cache = qds
198
214
  return qds
199
215
 
216
+ def copy(self) -> "StrategyNode":
217
+ return self.__class__(
218
+ input_concepts=list(self.input_concepts),
219
+ output_concepts=list(self.output_concepts),
220
+ environment=self.environment,
221
+ g=self.g,
222
+ whole_grain=self.whole_grain,
223
+ parents=list(self.parents),
224
+ partial_concepts=list(self.partial_concepts),
225
+ depth=self.depth,
226
+ conditions=self.conditions,
227
+ force_group=self.force_group,
228
+ grain=self.grain,
229
+ hidden_concepts=list(self.hidden_concepts),
230
+ existence_concepts=list(self.existence_concepts),
231
+ virtual_output_concepts=list(self.virtual_output_concepts),
232
+ )
233
+
200
234
 
201
235
  @dataclass
202
236
  class NodeJoin:
@@ -33,7 +33,7 @@ class FilterNode(StrategyNode):
33
33
  depth: int = 0,
34
34
  conditions: Conditional | Comparison | Parenthetical | None = None,
35
35
  partial_concepts: List[Concept] | None = None,
36
- force_group: bool = False,
36
+ force_group: bool | None = False,
37
37
  ):
38
38
  super().__init__(
39
39
  output_concepts=output_concepts,
@@ -47,3 +47,17 @@ class FilterNode(StrategyNode):
47
47
  partial_concepts=partial_concepts,
48
48
  force_group=force_group,
49
49
  )
50
+
51
+ def copy(self) -> "FilterNode":
52
+ return FilterNode(
53
+ input_concepts=list(self.input_concepts),
54
+ output_concepts=list(self.output_concepts),
55
+ environment=self.environment,
56
+ g=self.g,
57
+ whole_grain=self.whole_grain,
58
+ parents=self.parents,
59
+ depth=self.depth,
60
+ conditions=self.conditions,
61
+ partial_concepts=list(self.partial_concepts),
62
+ force_group=self.force_group,
63
+ )
@@ -33,6 +33,7 @@ class GroupNode(StrategyNode):
33
33
  parents: List["StrategyNode"] | None = None,
34
34
  depth: int = 0,
35
35
  partial_concepts: Optional[List[Concept]] = None,
36
+ force_group: bool | None = None,
36
37
  ):
37
38
  super().__init__(
38
39
  input_concepts=input_concepts,
@@ -43,6 +44,7 @@ class GroupNode(StrategyNode):
43
44
  parents=parents,
44
45
  depth=depth,
45
46
  partial_concepts=partial_concepts,
47
+ force_group=force_group,
46
48
  )
47
49
 
48
50
  def _resolve(self) -> QueryDatasource:
@@ -57,7 +59,11 @@ class GroupNode(StrategyNode):
57
59
 
58
60
  # dynamically select if we need to group
59
61
  # because sometimes, we are already at required grain
60
- if comp_grain == grain and self.output_lcl == self.input_lcl:
62
+ if (
63
+ comp_grain == grain
64
+ and self.output_lcl == self.input_lcl
65
+ and self.force_group is not True
66
+ ):
61
67
  # if there is no group by, and inputs equal outputs
62
68
  # return the parent
63
69
  logger.info(
@@ -111,3 +117,16 @@ class GroupNode(StrategyNode):
111
117
  partial_concepts=self.partial_concepts,
112
118
  condition=self.conditions,
113
119
  )
120
+
121
+ def copy(self) -> "GroupNode":
122
+ return GroupNode(
123
+ input_concepts=list(self.input_concepts),
124
+ output_concepts=list(self.output_concepts),
125
+ environment=self.environment,
126
+ g=self.g,
127
+ whole_grain=self.whole_grain,
128
+ parents=self.parents,
129
+ depth=self.depth,
130
+ partial_concepts=list(self.partial_concepts),
131
+ force_group=self.force_group,
132
+ )
@@ -12,6 +12,8 @@ from trilogy.core.models import (
12
12
  Concept,
13
13
  UnnestJoin,
14
14
  Conditional,
15
+ Comparison,
16
+ Parenthetical,
15
17
  )
16
18
  from trilogy.utility import unique
17
19
  from trilogy.core.processing.nodes.base_node import (
@@ -103,8 +105,9 @@ class MergeNode(StrategyNode):
103
105
  force_group: bool | None = None,
104
106
  depth: int = 0,
105
107
  grain: Grain | None = None,
106
- conditions: Conditional | None = None,
108
+ conditions: Conditional | Comparison | Parenthetical | None = None,
107
109
  hidden_concepts: List[Concept] | None = None,
110
+ virtual_output_concepts: List[Concept] | None = None,
108
111
  ):
109
112
  super().__init__(
110
113
  input_concepts=input_concepts,
@@ -119,10 +122,12 @@ class MergeNode(StrategyNode):
119
122
  grain=grain,
120
123
  conditions=conditions,
121
124
  hidden_concepts=hidden_concepts,
125
+ virtual_output_concepts=virtual_output_concepts,
122
126
  )
123
127
  self.join_concepts = join_concepts
124
128
  self.force_join_type = force_join_type
125
129
  self.node_joins = node_joins
130
+
126
131
  final_joins = []
127
132
  if self.node_joins:
128
133
  for join in self.node_joins:
@@ -312,17 +317,21 @@ class MergeNode(StrategyNode):
312
317
  force_group = None
313
318
 
314
319
  qd_joins: List[BaseJoin | UnnestJoin] = [*joins]
320
+ source_map = resolve_concept_map(
321
+ parent_sources,
322
+ targets=self.output_concepts,
323
+ inherited_inputs=self.input_concepts + self.existence_concepts,
324
+ full_joins=full_join_concepts,
325
+ )
326
+ logger.info(
327
+ f"{self.logging_prefix}{LOGGER_PREFIX} source_map {str(source_map)}"
328
+ )
315
329
  qds = QueryDatasource(
316
330
  input_concepts=unique(self.input_concepts, "address"),
317
331
  output_concepts=unique(self.output_concepts, "address"),
318
332
  datasources=final_datasets,
319
333
  source_type=self.source_type,
320
- source_map=resolve_concept_map(
321
- parent_sources,
322
- self.output_concepts,
323
- self.input_concepts,
324
- full_joins=full_join_concepts,
325
- ),
334
+ source_map=source_map,
326
335
  joins=qd_joins,
327
336
  grain=grain,
328
337
  partial_concepts=self.partial_concepts,
@@ -331,3 +340,23 @@ class MergeNode(StrategyNode):
331
340
  hidden_concepts=self.hidden_concepts,
332
341
  )
333
342
  return qds
343
+
344
+ def copy(self) -> "MergeNode":
345
+ return MergeNode(
346
+ input_concepts=list(self.input_concepts),
347
+ output_concepts=list(self.output_concepts),
348
+ environment=self.environment,
349
+ g=self.g,
350
+ whole_grain=self.whole_grain,
351
+ parents=self.parents,
352
+ depth=self.depth,
353
+ partial_concepts=list(self.partial_concepts),
354
+ force_group=self.force_group,
355
+ grain=self.grain,
356
+ conditions=self.conditions,
357
+ hidden_concepts=list(self.hidden_concepts),
358
+ virtual_output_concepts=list(self.virtual_output_concepts),
359
+ node_joins=self.node_joins,
360
+ join_concepts=list(self.join_concepts) if self.join_concepts else None,
361
+ force_join_type=self.force_join_type,
362
+ )
@@ -5,14 +5,14 @@ from trilogy.constants import logger
5
5
  from trilogy.core.constants import CONSTANT_DATASET
6
6
  from trilogy.core.enums import Purpose, PurposeLineage
7
7
  from trilogy.core.models import (
8
- Datasource,
8
+ Function,
9
+ Grain,
9
10
  QueryDatasource,
10
11
  SourceType,
11
- Environment,
12
12
  Concept,
13
- Grain,
14
- Function,
13
+ Environment,
15
14
  UnnestJoin,
15
+ Datasource,
16
16
  )
17
17
  from trilogy.utility import unique
18
18
  from trilogy.core.processing.nodes.base_node import StrategyNode
@@ -22,39 +22,6 @@ from trilogy.core.exceptions import NoDatasourceException
22
22
  LOGGER_PREFIX = "[CONCEPT DETAIL - SELECT NODE]"
23
23
 
24
24
 
25
- class StaticSelectNode(StrategyNode):
26
- """Static select nodes."""
27
-
28
- source_type = SourceType.SELECT
29
-
30
- def __init__(
31
- self,
32
- input_concepts: List[Concept],
33
- output_concepts: List[Concept],
34
- environment: Environment,
35
- g,
36
- datasource: QueryDatasource,
37
- depth: int = 0,
38
- partial_concepts: List[Concept] | None = None,
39
- ):
40
- super().__init__(
41
- input_concepts=input_concepts,
42
- output_concepts=output_concepts,
43
- environment=environment,
44
- g=g,
45
- whole_grain=True,
46
- parents=[],
47
- depth=depth,
48
- partial_concepts=partial_concepts,
49
- )
50
- self.datasource = datasource
51
-
52
- def _resolve(self):
53
- if self.datasource.grain == Grain():
54
- raise NotImplementedError
55
- return self.datasource
56
-
57
-
58
25
  class SelectNode(StrategyNode):
59
26
  """Select nodes actually fetch raw data from a table
60
27
  Responsible for selecting the cheapest option from which to select.
@@ -75,7 +42,7 @@ class SelectNode(StrategyNode):
75
42
  partial_concepts: List[Concept] | None = None,
76
43
  accept_partial: bool = False,
77
44
  grain: Optional[Grain] = None,
78
- force_group: bool = False,
45
+ force_group: bool | None = False,
79
46
  ):
80
47
  super().__init__(
81
48
  input_concepts=input_concepts,
@@ -119,6 +86,7 @@ class SelectNode(StrategyNode):
119
86
  if x.address not in source_map and x.derivation in (
120
87
  PurposeLineage.MULTISELECT,
121
88
  PurposeLineage.MERGE,
89
+ PurposeLineage.FILTER,
122
90
  ):
123
91
  source_map[x.address] = set()
124
92
 
@@ -140,6 +108,7 @@ class SelectNode(StrategyNode):
140
108
  c.concept for c in datasource.columns if not c.is_complete
141
109
  ],
142
110
  source_type=SourceType.DIRECT_SELECT,
111
+ condition=self.conditions,
143
112
  # select nodes should never group
144
113
  force_group=self.force_group,
145
114
  )
@@ -154,6 +123,7 @@ class SelectNode(StrategyNode):
154
123
  source_map={concept.address: set() for concept in self.all_concepts},
155
124
  datasources=[datasource],
156
125
  grain=datasource.grain,
126
+ condition=self.conditions,
157
127
  joins=[],
158
128
  partial_concepts=[],
159
129
  source_type=SourceType.CONSTANT,
@@ -191,8 +161,33 @@ class SelectNode(StrategyNode):
191
161
  f"Could not find any way to associate required concepts {required}"
192
162
  )
193
163
 
164
+ def copy(self) -> "SelectNode":
165
+ return SelectNode(
166
+ input_concepts=list(self.input_concepts),
167
+ output_concepts=list(self.output_concepts),
168
+ environment=self.environment,
169
+ g=self.g,
170
+ datasource=self.datasource,
171
+ depth=self.depth,
172
+ parents=self.parents,
173
+ whole_grain=self.whole_grain,
174
+ partial_concepts=list(self.partial_concepts),
175
+ accept_partial=self.accept_partial,
176
+ grain=self.grain,
177
+ force_group=self.force_group,
178
+ )
179
+
194
180
 
195
181
  class ConstantNode(SelectNode):
196
182
  """Represents a constant value."""
197
183
 
198
- pass
184
+ def copy(self) -> "ConstantNode":
185
+ return ConstantNode(
186
+ input_concepts=list(self.input_concepts),
187
+ output_concepts=list(self.output_concepts),
188
+ environment=self.environment,
189
+ g=self.g,
190
+ datasource=self.datasource,
191
+ depth=self.depth,
192
+ partial_concepts=list(self.partial_concepts),
193
+ )
@@ -52,3 +52,15 @@ class UnnestNode(StrategyNode):
52
52
  base.source_map[self.unnest_concept.address] = {unnest}
53
53
  base.join_derived_concepts = [self.unnest_concept]
54
54
  return base
55
+
56
+ def copy(self) -> "UnnestNode":
57
+ return UnnestNode(
58
+ unnest_concept=self.unnest_concept,
59
+ input_concepts=list(self.input_concepts),
60
+ output_concepts=list(self.output_concepts),
61
+ environment=self.environment,
62
+ g=self.g,
63
+ whole_grain=self.whole_grain,
64
+ parents=self.parents,
65
+ depth=self.depth,
66
+ )
@@ -32,3 +32,14 @@ class WindowNode(StrategyNode):
32
32
  base = super()._resolve()
33
33
  base.grain = Grain(components=self.input_concepts)
34
34
  return base
35
+
36
+ def copy(self) -> "WindowNode":
37
+ return WindowNode(
38
+ input_concepts=list(self.input_concepts),
39
+ output_concepts=list(self.output_concepts),
40
+ environment=self.environment,
41
+ g=self.g,
42
+ whole_grain=self.whole_grain,
43
+ parents=self.parents,
44
+ depth=self.depth,
45
+ )
@@ -232,20 +232,6 @@ def get_node_joins(
232
232
  raise SyntaxError(
233
233
  f"Could not find join for {x.identifier} with output {[c.address for c in x.output_concepts]}, all {[z.identifier for z in datasources]}"
234
234
  )
235
- single_row = [x for x in datasources if x.grain.abstract]
236
- for x in single_row:
237
- for join in final_joins:
238
- found = False
239
- for join in final_joins:
240
- if (
241
- join.left_datasource.identifier == x.identifier
242
- or join.right_datasource.identifier == x.identifier
243
- ):
244
- found = True
245
- if not found:
246
- raise SyntaxError(
247
- f"Could not find join for {x.identifier} with output {[c.address for c in x.output_concepts]}, all {[z.identifier for z in datasources]}"
248
- )
249
235
  return final_joins
250
236
 
251
237