pytrilogy 0.0.3.92__py3-none-any.whl → 0.0.3.94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -90,9 +90,7 @@ def validate_concepts(v) -> BuildEnvironmentConceptDict:
90
90
  if isinstance(v, BuildEnvironmentConceptDict):
91
91
  return v
92
92
  elif isinstance(v, dict):
93
- return BuildEnvironmentConceptDict(
94
- **{x: BuildConcept.model_validate(y) for x, y in v.items()}
95
- )
93
+ return BuildEnvironmentConceptDict(**{x: y for x, y in v.items()})
96
94
  raise ValueError
97
95
 
98
96
 
@@ -100,9 +98,7 @@ def validate_datasources(v) -> BuildEnvironmentDatasourceDict:
100
98
  if isinstance(v, BuildEnvironmentDatasourceDict):
101
99
  return v
102
100
  elif isinstance(v, dict):
103
- return BuildEnvironmentDatasourceDict(
104
- **{x: BuildDatasource.model_validate(y) for x, y in v.items()}
105
- )
101
+ return BuildEnvironmentDatasourceDict(**{x: y for x, y in v.items()})
106
102
  raise ValueError
107
103
 
108
104
 
@@ -859,7 +859,7 @@ class RecursiveCTE(CTE):
859
859
  ),
860
860
  BuildCaseElse(expr=False),
861
861
  ],
862
- output_datatype=DataType.BOOL,
862
+ output_data_type=DataType.BOOL,
863
863
  output_purpose=Purpose.KEY,
864
864
  ),
865
865
  )
@@ -884,7 +884,7 @@ class RecursiveCTE(CTE):
884
884
  ),
885
885
  BuildCaseElse(expr=False),
886
886
  ],
887
- output_datatype=DataType.BOOL,
887
+ output_data_type=DataType.BOOL,
888
888
  output_purpose=Purpose.KEY,
889
889
  ),
890
890
  )
@@ -909,7 +909,7 @@ class RecursiveCTE(CTE):
909
909
  ),
910
910
  BuildCaseElse(expr=right_recurse_concept),
911
911
  ],
912
- output_datatype=recursive_derived.datatype,
912
+ output_data_type=recursive_derived.datatype,
913
913
  output_purpose=recursive_derived.purpose,
914
914
  ),
915
915
  )
@@ -61,13 +61,13 @@ def gen_group_node(
61
61
 
62
62
  # if the aggregation has a grain, we need to ensure these are the ONLY optional in the output of the select
63
63
  output_concepts = [concept]
64
-
64
+ grain_components = [environment.concepts[c] for c in concept.grain.components]
65
65
  if (
66
66
  concept.grain
67
67
  and len(concept.grain.components) > 0
68
68
  and not concept.grain.abstract
69
69
  ):
70
- grain_components = [environment.concepts[c] for c in concept.grain.components]
70
+
71
71
  parent_concepts += grain_components
72
72
  build_grain_parents = get_aggregate_grain(concept, environment)
73
73
  output_concepts += grain_components
@@ -131,10 +131,6 @@ def gen_group_node(
131
131
  else:
132
132
  parents = []
133
133
 
134
- # the keys we group by
135
- # are what we can use for enrichment
136
- group_key_parents = [environment.concepts[c] for c in concept.grain.components]
137
-
138
134
  group_node = GroupNode(
139
135
  output_concepts=output_concepts,
140
136
  input_concepts=parent_concepts,
@@ -164,7 +160,7 @@ def gen_group_node(
164
160
  )
165
161
  return gen_enrichment_node(
166
162
  group_node,
167
- join_keys=group_key_parents,
163
+ join_keys=grain_components,
168
164
  local_optional=local_optional,
169
165
  environment=environment,
170
166
  g=g,
@@ -6,7 +6,11 @@ from networkx.algorithms import approximation as ax
6
6
  from trilogy.constants import logger
7
7
  from trilogy.core.enums import Derivation
8
8
  from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
9
- from trilogy.core.graph_models import concept_to_node, prune_sources_for_conditions
9
+ from trilogy.core.graph_models import (
10
+ ReferenceGraph,
11
+ concept_to_node,
12
+ prune_sources_for_conditions,
13
+ )
10
14
  from trilogy.core.models.build import BuildConcept, BuildConditional, BuildWhereClause
11
15
  from trilogy.core.models.build_environment import BuildEnvironment
12
16
  from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
@@ -17,11 +21,12 @@ LOGGER_PREFIX = "[GEN_MERGE_NODE]"
17
21
  AMBIGUITY_CHECK_LIMIT = 20
18
22
 
19
23
 
20
- def filter_pseudonyms_for_source(ds_graph: nx.DiGraph, node: str):
24
+ def filter_pseudonyms_for_source(
25
+ ds_graph: nx.DiGraph, node: str, pseudonyms: set[tuple[str, str]]
26
+ ):
21
27
  to_remove = set()
22
-
23
28
  for edge in ds_graph.edges:
24
- if ds_graph.edges[edge].get("pseudonym", False):
29
+ if edge in pseudonyms:
25
30
  lengths = {}
26
31
  for n in edge:
27
32
  lengths[n] = nx.shortest_path_length(ds_graph, node, n)
@@ -52,12 +57,14 @@ def filter_unique_graphs(graphs: list[list[str]]) -> list[list[str]]:
52
57
  return [list(x) for x in unique_graphs]
53
58
 
54
59
 
55
- def extract_ds_components(g: nx.DiGraph, nodelist: list[str]) -> list[list[str]]:
60
+ def extract_ds_components(
61
+ g: nx.DiGraph, nodelist: list[str], pseudonyms: set[tuple[str, str]]
62
+ ) -> list[list[str]]:
56
63
  graphs = []
57
64
  for node in g.nodes:
58
65
  if node.startswith("ds~"):
59
66
  local = g.copy()
60
- filter_pseudonyms_for_source(local, node)
67
+ filter_pseudonyms_for_source(local, node, pseudonyms)
61
68
  ds_graph: nx.DiGraph = nx.ego_graph(local, node, radius=10).copy()
62
69
  graphs.append(
63
70
  [
@@ -78,7 +85,7 @@ def extract_ds_components(g: nx.DiGraph, nodelist: list[str]) -> list[list[str]]
78
85
 
79
86
 
80
87
  def determine_induced_minimal_nodes(
81
- G: nx.DiGraph,
88
+ G: ReferenceGraph,
82
89
  nodelist: list[str],
83
90
  environment: BuildEnvironment,
84
91
  filter_downstream: bool,
@@ -86,23 +93,19 @@ def determine_induced_minimal_nodes(
86
93
  ) -> nx.DiGraph | None:
87
94
  H: nx.Graph = nx.to_undirected(G).copy()
88
95
  nodes_to_remove = []
89
- concepts = nx.get_node_attributes(G, "concept")
90
-
91
- for node in G.nodes:
92
- if concepts.get(node):
93
- lookup: BuildConcept = concepts[node]
94
- # inclusion of aggregates can create ambiguous node relation chains
95
- # there may be a better way to handle this
96
- # can be revisited if we need to connect a derived synonym based on an aggregate
97
- if lookup.derivation in (
98
- Derivation.CONSTANT,
99
- Derivation.AGGREGATE,
100
- Derivation.FILTER,
101
- ):
102
- nodes_to_remove.append(node)
103
- # purge a node if we're already looking for all it's parents
104
- if filter_downstream and lookup.derivation not in (Derivation.ROOT,):
105
- nodes_to_remove.append(node)
96
+ for node, lookup in G.concepts.items():
97
+ # inclusion of aggregates can create ambiguous node relation chains
98
+ # there may be a better way to handle this
99
+ # can be revisited if we need to connect a derived synonym based on an aggregate
100
+ if lookup.derivation in (
101
+ Derivation.CONSTANT,
102
+ Derivation.AGGREGATE,
103
+ Derivation.FILTER,
104
+ ):
105
+ nodes_to_remove.append(node)
106
+ # purge a node if we're already looking for all it's parents
107
+ if filter_downstream and lookup.derivation not in (Derivation.ROOT,):
108
+ nodes_to_remove.append(node)
106
109
  if nodes_to_remove:
107
110
  # logger.debug(f"Removing nodes {nodes_to_remove} from graph")
108
111
  H.remove_nodes_from(nodes_to_remove)
@@ -259,7 +262,7 @@ def filter_duplicate_subgraphs(
259
262
  def resolve_weak_components(
260
263
  all_concepts: List[BuildConcept],
261
264
  environment: BuildEnvironment,
262
- environment_graph: nx.DiGraph,
265
+ environment_graph: ReferenceGraph,
263
266
  filter_downstream: bool = True,
264
267
  accept_partial: bool = False,
265
268
  search_conditions: BuildWhereClause | None = None,
@@ -316,8 +319,6 @@ def resolve_weak_components(
316
319
  ]
317
320
  new = [x for x in all_graph_concepts if x.address not in all_concepts]
318
321
 
319
- new_addresses = set([x.address for x in new if x.address not in synonyms])
320
-
321
322
  if not new:
322
323
  break_flag = True
323
324
  # remove our new nodes for the next search path
@@ -329,6 +330,7 @@ def resolve_weak_components(
329
330
  # from trilogy.hooks.graph_hook import GraphHook
330
331
  # GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
331
332
  found.append(g)
333
+ new_addresses = set([x.address for x in new if x.address not in synonyms])
332
334
  reduced_concept_sets.append(new_addresses)
333
335
 
334
336
  except nx.exception.NetworkXNoPath:
@@ -346,7 +348,7 @@ def resolve_weak_components(
346
348
  subgraphs: list[list[BuildConcept]] = []
347
349
  # components = nx.strongly_connected_components(g)
348
350
  node_list = [x for x in g.nodes if x.startswith("c~")]
349
- components = extract_ds_components(g, node_list)
351
+ components = extract_ds_components(g, node_list, environment_graph.pseudonyms)
350
352
  logger.debug(f"Extracted components {components} from {node_list}")
351
353
  for component in components:
352
354
  # we need to take unique again as different addresses may map to the same concept
@@ -128,17 +128,32 @@ def simplify_conditions(
128
128
  for condition in conditions:
129
129
  if not isinstance(condition, BuildComparison):
130
130
  return False
131
- if not isinstance(condition.left, REDUCABLE_TYPES) and not isinstance(
132
- condition.right, REDUCABLE_TYPES
133
- ):
134
- return False
135
- if not isinstance(condition.left, BuildConcept) and not isinstance(
136
- condition.right, BuildConcept
131
+ left_is_concept = False
132
+ left_is_reducable = False
133
+ right_is_concept = False
134
+ right_is_reducable = False
135
+ if isinstance(condition.left, BuildConcept):
136
+ left_is_concept = True
137
+ elif isinstance(condition.left, REDUCABLE_TYPES):
138
+ left_is_reducable = True
139
+
140
+ if isinstance(condition.right, BuildConcept):
141
+ right_is_concept = True
142
+ elif isinstance(condition.right, REDUCABLE_TYPES):
143
+ right_is_reducable = True
144
+
145
+ if not (
146
+ (left_is_concept and right_is_reducable)
147
+ or (right_is_concept and left_is_reducable)
137
148
  ):
138
149
  return False
139
- vars = [condition.left, condition.right]
140
- concept = [x for x in vars if isinstance(x, BuildConcept)][0]
141
- raw_comparison = [x for x in vars if not isinstance(x, BuildConcept)][0]
150
+ if left_is_concept:
151
+ concept = condition.left
152
+ raw_comparison = condition.right
153
+ else:
154
+ concept = condition.right
155
+ raw_comparison = condition.left
156
+
142
157
  if isinstance(raw_comparison, BuildFunction):
143
158
  if not raw_comparison.operator == FunctionType.CONSTANT:
144
159
  return False
@@ -154,7 +169,7 @@ def simplify_conditions(
154
169
  if not isinstance(comparison, REDUCABLE_TYPES):
155
170
  return False
156
171
 
157
- var = concept
172
+ var: BuildConcept = concept # type: ignore
158
173
  op = condition.operator
159
174
  grouped[var].append((op, comparison))
160
175
 
@@ -240,7 +255,6 @@ def get_union_sources(
240
255
  assocs[merge_key.address].append(x)
241
256
  final: list[list[BuildDatasource]] = []
242
257
  for _, dses in assocs.items():
243
-
244
258
  conditions = [c.non_partial_for.conditional for c in dses if c.non_partial_for]
245
259
  if simplify_conditions(conditions):
246
260
  final.append(dses)
@@ -6,6 +6,7 @@ import networkx as nx
6
6
  from trilogy.constants import logger
7
7
  from trilogy.core.enums import Derivation
8
8
  from trilogy.core.graph_models import (
9
+ ReferenceGraph,
9
10
  concept_to_node,
10
11
  get_graph_exact_match,
11
12
  prune_sources_for_conditions,
@@ -41,77 +42,68 @@ def extract_address(node: str):
41
42
 
42
43
 
43
44
  def get_graph_partial_nodes(
44
- g: nx.DiGraph, conditions: BuildWhereClause | None
45
+ g: ReferenceGraph, conditions: BuildWhereClause | None
45
46
  ) -> dict[str, list[str]]:
46
- datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
47
- nx.get_node_attributes(g, "datasource")
48
- )
49
47
  partial: dict[str, list[str]] = {}
50
- for node in g.nodes:
51
- if node in datasources:
52
- ds = datasources[node]
53
- if not isinstance(ds, list):
54
-
55
- if ds.non_partial_for and conditions == ds.non_partial_for:
56
- partial[node] = []
57
- continue
58
- partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
59
- ds = [ds]
60
- # assume union sources have no partial
61
- else:
62
- partial[node] = []
48
+ for node, ds in g.datasources.items():
63
49
 
50
+ if not isinstance(ds, list):
51
+
52
+ if ds.non_partial_for and conditions == ds.non_partial_for:
53
+ partial[node] = []
54
+ continue
55
+ partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
56
+ # assume union sources have no partial
57
+ else:
58
+ partial[node] = []
64
59
  return partial
65
60
 
66
61
 
67
- def get_graph_grains(g: nx.DiGraph) -> dict[str, list[str]]:
68
- datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
69
- nx.get_node_attributes(g, "datasource")
70
- )
62
+ def get_graph_grains(g: ReferenceGraph) -> dict[str, list[str]]:
71
63
  grain_length: dict[str, list[str]] = {}
72
- for node in g.nodes:
73
- if node in datasources:
74
- base: set[str] = set()
75
- lookup = datasources[node]
76
- if not isinstance(lookup, list):
77
- lookup = [lookup]
78
- assert isinstance(lookup, list)
79
- grain_length[node] = reduce(
80
- lambda x, y: x.union(y.grain.components), lookup, base # type: ignore
81
- )
64
+ for node, lookup in g.datasources.items():
65
+
66
+ base: set[str] = set()
67
+ if not isinstance(lookup, list):
68
+ flookup = [lookup]
69
+ else:
70
+ flookup = lookup
71
+ assert isinstance(flookup, list)
72
+ grain_length[node] = reduce(
73
+ lambda x, y: x.union(y.grain.components), flookup, base # type: ignore
74
+ )
82
75
  return grain_length
83
76
 
84
77
 
85
78
  def subgraph_is_complete(
86
79
  nodes: list[str], targets: set[str], mapping: dict[str, str], g: nx.DiGraph
87
80
  ) -> bool:
88
- mapped = set([mapping.get(n, n) for n in nodes])
89
- passed = all([t in mapped for t in targets])
90
- if not passed:
81
+ # Check if all targets are present in mapped nodes
82
+ mapped = {mapping.get(n, n) for n in nodes}
83
+ if not targets.issubset(mapped):
91
84
  # logger.info(
92
85
  # f"Subgraph {nodes} is not complete, missing targets {targets} - mapped {mapped}"
93
86
  # )
94
87
  return False
95
- # check if all concepts have a datasource edge
96
- has_ds_edge = {
97
- mapping.get(n, n): any(x.startswith("ds~") for x in nx.neighbors(g, n))
98
- for n in nodes
99
- if n.startswith("c~")
100
- }
101
- has_ds_edge = {k: False for k in targets}
102
- # check at least one instance of concept has a datasource edge
103
- for n in nodes:
104
- if n.startswith("c~"):
105
- neighbors = nx.neighbors(g, n)
106
- for neighbor in neighbors:
107
- if neighbor.startswith("ds~"):
108
- has_ds_edge[mapping.get(n, n)] = True
109
- break
110
- return all(has_ds_edge.values()) and passed
88
+
89
+ # Check if at least one concept node has a datasource edge
90
+ has_ds_edge = {target: False for target in targets}
91
+
92
+ for node in nodes:
93
+ if node.startswith("c~"):
94
+ mapped_node = mapping.get(node, node)
95
+ if mapped_node in targets and not has_ds_edge[mapped_node]:
96
+ # Only check neighbors if we haven't found a ds edge for this mapped node yet
97
+ if any(
98
+ neighbor.startswith("ds~") for neighbor in nx.neighbors(g, node)
99
+ ):
100
+ has_ds_edge[mapped_node] = True
101
+
102
+ return all(has_ds_edge.values())
111
103
 
112
104
 
113
105
  def create_pruned_concept_graph(
114
- g: nx.DiGraph,
106
+ g: ReferenceGraph,
115
107
  all_concepts: List[BuildConcept],
116
108
  datasources: list[BuildDatasource],
117
109
  accept_partial: bool = False,
@@ -133,14 +125,13 @@ def create_pruned_concept_graph(
133
125
  )
134
126
  g.add_node(node_address, datasource=ds_list)
135
127
  for c in common:
136
- g.add_edge(node_address, concept_to_node(c))
137
- g.add_edge(concept_to_node(c), node_address)
128
+ cnode = concept_to_node(c)
129
+ g.add_edge(node_address, cnode)
130
+ g.add_edge(cnode, node_address)
138
131
  prune_sources_for_conditions(g, accept_partial, conditions)
139
132
  target_addresses = set([c.address for c in all_concepts])
140
- concepts: dict[str, BuildConcept] = nx.get_node_attributes(orig_g, "concept")
141
- datasource_map: dict[str, BuildDatasource | list[BuildDatasource]] = (
142
- nx.get_node_attributes(orig_g, "datasource")
143
- )
133
+ concepts: dict[str, BuildConcept] = orig_g.concepts
134
+ datasource_map: dict[str, BuildDatasource] = orig_g.datasources
144
135
  relevant_concepts_pre = {
145
136
  n: x.address
146
137
  for n in g.nodes()
@@ -170,31 +161,27 @@ def create_pruned_concept_graph(
170
161
  for edge in to_remove:
171
162
  g.remove_edge(*edge)
172
163
 
173
- for n in g.nodes():
174
- if not n.startswith("ds~"):
175
- continue
176
- actual_neighbors = [
177
- x for x in relevant_concepts if x in (nx.all_neighbors(g, n))
178
- ]
179
- if actual_neighbors:
164
+ for n in g.datasources:
165
+ if any([[n, x] in g.edges for x in relevant_concepts]):
180
166
  relevent_datasets.append(n)
167
+ continue
181
168
 
182
169
  # for injecting extra join concepts that are shared between datasets
183
170
  # use the original graph, pre-partial pruning
184
- for n in orig_g.nodes:
171
+ for n in orig_g.concepts:
185
172
  # readd ignoring grain
186
173
  # we want to join inclusive of all concepts
187
- roots: dict[str, set[str]] = {}
188
- if n.startswith("c~") and n not in relevant_concepts:
189
- root = n.split("@")[0]
190
- neighbors = roots.get(root, set())
191
- for neighbor in nx.all_neighbors(orig_g, n):
174
+ if n not in relevant_concepts:
175
+ n_neighbors = nx.all_neighbors(orig_g, n)
176
+ # check if the irrelevant concept is a join between
177
+ # two relevant datasets
178
+ neighbors = set()
179
+ for neighbor in n_neighbors:
192
180
  if neighbor in relevent_datasets:
193
181
  neighbors.add(neighbor)
194
- if len(neighbors) > 1:
195
- relevant_concepts.append(n)
196
- roots[root] = set()
197
-
182
+ if len(neighbors) > 1:
183
+ relevant_concepts.append(n)
184
+ continue
198
185
  g.remove_nodes_from(
199
186
  [
200
187
  n
@@ -238,7 +225,7 @@ def create_pruned_concept_graph(
238
225
 
239
226
 
240
227
  def resolve_subgraphs(
241
- g: nx.DiGraph,
228
+ g: ReferenceGraph,
242
229
  relevant: list[BuildConcept],
243
230
  accept_partial: bool,
244
231
  conditions: BuildWhereClause | None,
@@ -261,7 +248,7 @@ def resolve_subgraphs(
261
248
  partial_map = get_graph_partial_nodes(g, conditions)
262
249
  exact_map = get_graph_exact_match(g, accept_partial, conditions)
263
250
  grain_length = get_graph_grains(g)
264
- concepts: dict[str, BuildConcept] = nx.get_node_attributes(g, "concept")
251
+ concepts: dict[str, BuildConcept] = g.concepts
265
252
  non_partial_map = {
266
253
  ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
267
254
  for ds in datasources
@@ -460,7 +447,7 @@ def create_select_node(
460
447
  ds_name: str,
461
448
  subgraph: list[str],
462
449
  accept_partial: bool,
463
- g,
450
+ g: ReferenceGraph,
464
451
  environment: BuildEnvironment,
465
452
  depth: int,
466
453
  conditions: BuildWhereClause | None = None,
@@ -486,9 +473,7 @@ def create_select_node(
486
473
  preexisting_conditions=conditions.conditional if conditions else None,
487
474
  )
488
475
 
489
- datasource: dict[str, BuildDatasource | list[BuildDatasource]] = (
490
- nx.get_node_attributes(g, "datasource")[ds_name]
491
- )
476
+ datasource: BuildDatasource = g.datasources[ds_name]
492
477
  if isinstance(datasource, BuildDatasource):
493
478
  bcandidate, force_group = create_datasource_node(
494
479
  datasource,
@@ -581,6 +566,7 @@ def gen_select_merge_node(
581
566
  logger.info(
582
567
  f"{padding(depth)}{LOGGER_PREFIX} searching for root source graph for concepts {[c.address for c in all_concepts]} and conditions {conditions}"
583
568
  )
569
+ pruned_concept_graph = None
584
570
  for attempt in attempts:
585
571
  pruned_concept_graph = create_pruned_concept_graph(
586
572
  g,
@@ -244,7 +244,7 @@ def unwrap_transformation(
244
244
  elif isinstance(input, Parenthetical):
245
245
  return unwrap_transformation(input.content, environment)
246
246
  else:
247
- return Function(
247
+ return Function.model_construct(
248
248
  operator=FunctionType.CONSTANT,
249
249
  output_datatype=arg_to_datatype(input),
250
250
  output_purpose=Purpose.CONSTANT,