pytrilogy 0.0.2.25__py3-none-any.whl → 0.0.2.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -63,14 +63,14 @@ class InlineDatasource(OptimizationRule):
63
63
  for replaceable in to_inline:
64
64
  if replaceable.name not in self.candidates[cte.name]:
65
65
  self.candidates[cte.name].add(replaceable.name)
66
- self.count[replaceable.source.name] += 1
66
+ self.count[replaceable.source.identifier] += 1
67
67
  return True
68
68
  if (
69
- self.count[replaceable.source.name]
69
+ self.count[replaceable.source.identifier]
70
70
  > CONFIG.optimizations.constant_inline_cutoff
71
71
  ):
72
72
  self.log(
73
- f"Skipping inlining raw datasource {replaceable.source.name} ({replaceable.name}) due to multiple references"
73
+ f"Skipping inlining raw datasource {replaceable.source.identifier} ({replaceable.name}) due to multiple references"
74
74
  )
75
75
  continue
76
76
  if not replaceable.source.datasources[0].grain.issubset(replaceable.grain):
@@ -81,7 +81,7 @@ class InlineDatasource(OptimizationRule):
81
81
  result = cte.inline_parent_datasource(replaceable, force_group=force_group)
82
82
  if result:
83
83
  self.log(
84
- f"Inlined parent {replaceable.name} with {replaceable.source.name}"
84
+ f"Inlined parent {replaceable.name} with {replaceable.source.identifier}"
85
85
  )
86
86
  optimized = True
87
87
  else:
@@ -1,6 +1,5 @@
1
1
  from typing import List, Tuple, Callable
2
2
 
3
-
4
3
  from trilogy.core.enums import PurposeLineage, Purpose
5
4
  from trilogy.core.models import (
6
5
  Concept,
@@ -28,14 +28,18 @@ def extract_address(node: str):
28
28
  return node.split("~")[1].split("@")[0]
29
29
 
30
30
 
31
- def get_graph_partial_nodes(g: nx.DiGraph) -> dict[str, list[str]]:
31
+ def get_graph_partial_nodes(
32
+ g: nx.DiGraph, conditions: WhereClause | None
33
+ ) -> dict[str, list[str]]:
32
34
  datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
33
35
  partial: dict[str, list[str]] = {}
34
36
  for node in g.nodes:
35
37
  if node in datasources:
36
- partial[node] = [
37
- concept_to_node(c) for c in datasources[node].partial_concepts
38
- ]
38
+ ds = datasources[node]
39
+ partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
40
+ if ds.non_partial_for and conditions == ds.non_partial_for:
41
+ partial[node] = []
42
+
39
43
  return partial
40
44
 
41
45
 
@@ -49,7 +53,10 @@ def get_graph_grain_length(g: nx.DiGraph) -> dict[str, int]:
49
53
 
50
54
 
51
55
  def create_pruned_concept_graph(
52
- g: nx.DiGraph, all_concepts: List[Concept], accept_partial: bool = False
56
+ g: nx.DiGraph,
57
+ all_concepts: List[Concept],
58
+ accept_partial: bool = False,
59
+ conditions: WhereClause | None = None,
53
60
  ) -> nx.DiGraph:
54
61
  orig_g = g
55
62
  g = g.copy()
@@ -66,11 +73,7 @@ def create_pruned_concept_graph(
66
73
  relevent_datasets: list[str] = []
67
74
  if not accept_partial:
68
75
  partial = {}
69
- for node in g.nodes:
70
- if node in datasources:
71
- partial[node] = [
72
- concept_to_node(c) for c in datasources[node].partial_concepts
73
- ]
76
+ partial = get_graph_partial_nodes(g, conditions)
74
77
  to_remove = []
75
78
  for edge in g.edges:
76
79
  if (
@@ -133,31 +136,53 @@ def create_pruned_concept_graph(
133
136
  return g
134
137
 
135
138
 
136
- def resolve_subgraphs(g: nx.DiGraph) -> dict[str, list[str]]:
139
+ def resolve_subgraphs(
140
+ g: nx.DiGraph, conditions: WhereClause | None
141
+ ) -> dict[str, list[str]]:
137
142
  datasources = [n for n in g.nodes if n.startswith("ds~")]
138
- subgraphs = {ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources}
139
- partial_map = get_graph_partial_nodes(g)
143
+ subgraphs: dict[str, list[str]] = {
144
+ ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources
145
+ }
146
+ partial_map = get_graph_partial_nodes(g, conditions)
140
147
  grain_length = get_graph_grain_length(g)
141
- non_partial = {
142
- ds: [c for c in subgraphs[ds] if c not in partial_map[ds]] for ds in datasources
148
+ concepts: dict[str, Concept] = nx.get_node_attributes(g, "concept")
149
+ non_partial_map = {
150
+ ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
151
+ for ds in datasources
152
+ }
153
+ concept_map = {
154
+ ds: [concepts[c].address for c in subgraphs[ds]] for ds in datasources
143
155
  }
144
156
  pruned_subgraphs = {}
145
- for key, value in subgraphs.items():
157
+ for key, nodes in subgraphs.items():
158
+ value = non_partial_map[key]
159
+ all_concepts = concept_map[key]
146
160
  is_subset = False
147
161
  matches = set()
148
162
  # Compare current list with other lists
149
- for other_key, other_value in non_partial.items():
150
- if key != other_key and set(value).issubset(set(other_value)):
163
+ for other_key, other_all_concepts in concept_map.items():
164
+ other_value = non_partial_map[other_key]
165
+ # needs to be a subset of non partial and a subset of all
166
+ if (
167
+ key != other_key
168
+ and set(value).issubset(set(other_value))
169
+ and set(all_concepts).issubset(set(other_all_concepts))
170
+ ):
151
171
  if len(value) < len(other_value):
152
172
  is_subset = True
173
+ logger.debug(
174
+ f"Dropping subgraph {key} with {value} as it is a subset of {other_key} with {other_value}"
175
+ )
153
176
  break
154
- elif len(value) == len(other_value):
177
+ elif len(value) == len(other_value) and len(all_concepts) == len(
178
+ other_all_concepts
179
+ ):
155
180
  matches.add(other_key)
156
181
  matches.add(key)
157
182
  if matches:
158
183
  is_subset = key is not min(matches, key=lambda x: (grain_length[x], x))
159
184
  if not is_subset:
160
- pruned_subgraphs[key] = value
185
+ pruned_subgraphs[key] = nodes
161
186
  return pruned_subgraphs
162
187
 
163
188
 
@@ -168,6 +193,7 @@ def create_select_node(
168
193
  g,
169
194
  environment: Environment,
170
195
  depth: int,
196
+ conditions: WhereClause | None = None,
171
197
  ) -> StrategyNode:
172
198
  ds_name = ds_name.split("~")[1]
173
199
  all_concepts = [
@@ -206,6 +232,7 @@ def create_select_node(
206
232
  c.concept for c in datasource.columns if c.is_nullable and c.concept in all_lcl
207
233
  ]
208
234
  nullable_lcl = LooseConceptList(concepts=nullable_concepts)
235
+ partial_is_full = conditions and (conditions == datasource.non_partial_for)
209
236
 
210
237
  bcandidate: StrategyNode = SelectNode(
211
238
  input_concepts=[c.concept for c in datasource.columns],
@@ -214,12 +241,15 @@ def create_select_node(
214
241
  g=g,
215
242
  parents=[],
216
243
  depth=depth,
217
- partial_concepts=[c for c in all_concepts if c in partial_lcl],
244
+ partial_concepts=(
245
+ [] if partial_is_full else [c for c in all_concepts if c in partial_lcl]
246
+ ),
218
247
  nullable_concepts=[c for c in all_concepts if c in nullable_lcl],
219
248
  accept_partial=accept_partial,
220
249
  datasource=datasource,
221
250
  grain=Grain(components=all_concepts),
222
251
  conditions=datasource.where.conditional if datasource.where else None,
252
+ render_condition=not partial_is_full,
223
253
  )
224
254
 
225
255
  # we need to nest the group node one further
@@ -261,7 +291,9 @@ def gen_select_merge_node(
261
291
  force_group=False,
262
292
  )
263
293
  for attempt in [False, True]:
264
- pruned_concept_graph = create_pruned_concept_graph(g, non_constant, attempt)
294
+ pruned_concept_graph = create_pruned_concept_graph(
295
+ g, non_constant, attempt, conditions
296
+ )
265
297
  if pruned_concept_graph:
266
298
  logger.info(
267
299
  f"{padding(depth)}{LOGGER_PREFIX} found covering graph w/ partial flag {attempt}"
@@ -274,7 +306,7 @@ def gen_select_merge_node(
274
306
  )
275
307
  return None
276
308
 
277
- sub_nodes = resolve_subgraphs(pruned_concept_graph)
309
+ sub_nodes = resolve_subgraphs(pruned_concept_graph, conditions)
278
310
 
279
311
  logger.info(f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {sub_nodes}")
280
312
  parents = [
@@ -285,6 +317,7 @@ def gen_select_merge_node(
285
317
  accept_partial=accept_partial,
286
318
  environment=environment,
287
319
  depth=depth,
320
+ conditions=conditions,
288
321
  )
289
322
  for k, subgraph in sub_nodes.items()
290
323
  ]
@@ -165,6 +165,7 @@ class StrategyNode:
165
165
  hidden_concepts: List[Concept] | None = None,
166
166
  existence_concepts: List[Concept] | None = None,
167
167
  virtual_output_concepts: List[Concept] | None = None,
168
+ render_condition: bool = True,
168
169
  ):
169
170
  self.input_concepts: List[Concept] = (
170
171
  unique(input_concepts, "address") if input_concepts else []
@@ -208,6 +209,7 @@ class StrategyNode:
208
209
  )
209
210
  self.validate_parents()
210
211
  self.log = True
212
+ self.render_condition = render_condition
211
213
 
212
214
  def add_parents(self, parents: list["StrategyNode"]):
213
215
  self.parents += parents
@@ -380,6 +382,7 @@ class StrategyNode:
380
382
  hidden_concepts=list(self.hidden_concepts),
381
383
  existence_concepts=list(self.existence_concepts),
382
384
  virtual_output_concepts=list(self.virtual_output_concepts),
385
+ render_condition=self.render_condition,
383
386
  )
384
387
 
385
388
 
@@ -89,8 +89,8 @@ def deduplicate_nodes_and_joins(
89
89
  joins = [
90
90
  j
91
91
  for j in joins
92
- if j.left_node.resolve().full_name not in removed
93
- and j.right_node.resolve().full_name not in removed
92
+ if j.left_node.resolve().identifier not in removed
93
+ and j.right_node.resolve().identifier not in removed
94
94
  ]
95
95
  return joins, merged
96
96
 
@@ -155,8 +155,8 @@ class MergeNode(StrategyNode):
155
155
  for join in node_joins:
156
156
  left = join.left_node.resolve()
157
157
  right = join.right_node.resolve()
158
- if left.full_name == right.full_name:
159
- raise SyntaxError(f"Cannot join node {left.full_name} to itself")
158
+ if left.identifier == right.identifier:
159
+ raise SyntaxError(f"Cannot join node {left.identifier} to itself")
160
160
  joins.append(
161
161
  BaseJoin(
162
162
  left_datasource=left,
@@ -168,7 +168,7 @@ class MergeNode(StrategyNode):
168
168
  )
169
169
  return joins
170
170
 
171
- def create_full_joins(self, dataset_list: List[QueryDatasource]):
171
+ def create_full_joins(self, dataset_list: List[QueryDatasource | Datasource]):
172
172
  joins = []
173
173
  seen = set()
174
174
  for left_value in dataset_list:
@@ -198,7 +198,7 @@ class MergeNode(StrategyNode):
198
198
  environment: Environment,
199
199
  ) -> List[BaseJoin | UnnestJoin]:
200
200
  # only finally, join between them for unique values
201
- dataset_list: List[QueryDatasource] = sorted(
201
+ dataset_list: List[QueryDatasource | Datasource] = sorted(
202
202
  final_datasets, key=lambda x: -len(x.grain.components_copy)
203
203
  )
204
204
 
@@ -215,7 +215,7 @@ class MergeNode(StrategyNode):
215
215
  logger.info(
216
216
  f"{self.logging_prefix}{LOGGER_PREFIX} inferring node joins to target grain {str(grain)}"
217
217
  )
218
- joins = get_node_joins(dataset_list, grain.components, environment)
218
+ joins = get_node_joins(dataset_list, environment=environment)
219
219
  elif final_joins:
220
220
  logger.info(
221
221
  f"{self.logging_prefix}{LOGGER_PREFIX} translating provided node joins {len(final_joins)}"
@@ -238,13 +238,13 @@ class MergeNode(StrategyNode):
238
238
  merged: dict[str, QueryDatasource | Datasource] = {}
239
239
  final_joins: List[NodeJoin] | None = self.node_joins
240
240
  for source in parent_sources:
241
- if source.full_name in merged:
241
+ if source.identifier in merged:
242
242
  logger.info(
243
- f"{self.logging_prefix}{LOGGER_PREFIX} merging parent node with {source.full_name} into existing"
243
+ f"{self.logging_prefix}{LOGGER_PREFIX} merging parent node with {source.identifier} into existing"
244
244
  )
245
- merged[source.full_name] = merged[source.full_name] + source
245
+ merged[source.identifier] = merged[source.identifier] + source
246
246
  else:
247
- merged[source.full_name] = source
247
+ merged[source.identifier] = source
248
248
 
249
249
  # it's possible that we have more sources than we need
250
250
  final_joins, merged = deduplicate_nodes_and_joins(
@@ -314,7 +314,7 @@ class MergeNode(StrategyNode):
314
314
  full_join_concepts = []
315
315
  for join in joins:
316
316
  if isinstance(join, BaseJoin) and join.join_type == JoinType.FULL:
317
- full_join_concepts += join.concepts
317
+ full_join_concepts += join.input_concepts
318
318
  if self.whole_grain:
319
319
  force_group = False
320
320
  elif self.force_group is False:
@@ -49,6 +49,7 @@ class SelectNode(StrategyNode):
49
49
  conditions: Conditional | Comparison | Parenthetical | None = None,
50
50
  preexisting_conditions: Conditional | Comparison | Parenthetical | None = None,
51
51
  hidden_concepts: List[Concept] | None = None,
52
+ render_condition: bool = True,
52
53
  ):
53
54
  super().__init__(
54
55
  input_concepts=input_concepts,
@@ -65,6 +66,7 @@ class SelectNode(StrategyNode):
65
66
  conditions=conditions,
66
67
  preexisting_conditions=preexisting_conditions,
67
68
  hidden_concepts=hidden_concepts,
69
+ render_condition=render_condition,
68
70
  )
69
71
  self.accept_partial = accept_partial
70
72
  self.datasource = datasource
@@ -120,7 +122,8 @@ class SelectNode(StrategyNode):
120
122
  ],
121
123
  nullable_concepts=[c.concept for c in datasource.columns if c.is_nullable],
122
124
  source_type=SourceType.DIRECT_SELECT,
123
- condition=self.conditions,
125
+ # we can skip rendering conditions
126
+ condition=self.conditions if self.render_condition else None,
124
127
  # select nodes should never group
125
128
  force_group=self.force_group,
126
129
  hidden_concepts=self.hidden_concepts,
@@ -128,7 +131,7 @@ class SelectNode(StrategyNode):
128
131
 
129
132
  def resolve_from_constant_datasources(self) -> QueryDatasource:
130
133
  datasource = Datasource(
131
- identifier=CONSTANT_DATASET, address=CONSTANT_DATASET, columns=[]
134
+ name=CONSTANT_DATASET, address=CONSTANT_DATASET, columns=[]
132
135
  )
133
136
  return QueryDatasource(
134
137
  input_concepts=[],
@@ -205,6 +208,7 @@ class SelectNode(StrategyNode):
205
208
  conditions=self.conditions,
206
209
  preexisting_conditions=self.preexisting_conditions,
207
210
  hidden_concepts=self.hidden_concepts,
211
+ render_condition=self.render_condition,
208
212
  )
209
213
 
210
214