pytrilogy 0.0.3.93__py3-none-any.whl → 0.0.3.94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -6,6 +6,7 @@ import networkx as nx
6
6
  from trilogy.constants import logger
7
7
  from trilogy.core.enums import Derivation
8
8
  from trilogy.core.graph_models import (
9
+ ReferenceGraph,
9
10
  concept_to_node,
10
11
  get_graph_exact_match,
11
12
  prune_sources_for_conditions,
@@ -41,77 +42,68 @@ def extract_address(node: str):
41
42
 
42
43
 
43
44
  def get_graph_partial_nodes(
44
- g: nx.DiGraph, conditions: BuildWhereClause | None
45
+ g: ReferenceGraph, conditions: BuildWhereClause | None
45
46
  ) -> dict[str, list[str]]:
46
- datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
47
- nx.get_node_attributes(g, "datasource")
48
- )
49
47
  partial: dict[str, list[str]] = {}
50
- for node in g.nodes:
51
- if node in datasources:
52
- ds = datasources[node]
53
- if not isinstance(ds, list):
54
-
55
- if ds.non_partial_for and conditions == ds.non_partial_for:
56
- partial[node] = []
57
- continue
58
- partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
59
- ds = [ds]
60
- # assume union sources have no partial
61
- else:
62
- partial[node] = []
48
+ for node, ds in g.datasources.items():
63
49
 
50
+ if not isinstance(ds, list):
51
+
52
+ if ds.non_partial_for and conditions == ds.non_partial_for:
53
+ partial[node] = []
54
+ continue
55
+ partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
56
+ # assume union sources have no partial
57
+ else:
58
+ partial[node] = []
64
59
  return partial
65
60
 
66
61
 
67
- def get_graph_grains(g: nx.DiGraph) -> dict[str, list[str]]:
68
- datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
69
- nx.get_node_attributes(g, "datasource")
70
- )
62
+ def get_graph_grains(g: ReferenceGraph) -> dict[str, list[str]]:
71
63
  grain_length: dict[str, list[str]] = {}
72
- for node in g.nodes:
73
- if node in datasources:
74
- base: set[str] = set()
75
- lookup = datasources[node]
76
- if not isinstance(lookup, list):
77
- lookup = [lookup]
78
- assert isinstance(lookup, list)
79
- grain_length[node] = reduce(
80
- lambda x, y: x.union(y.grain.components), lookup, base # type: ignore
81
- )
64
+ for node, lookup in g.datasources.items():
65
+
66
+ base: set[str] = set()
67
+ if not isinstance(lookup, list):
68
+ flookup = [lookup]
69
+ else:
70
+ flookup = lookup
71
+ assert isinstance(flookup, list)
72
+ grain_length[node] = reduce(
73
+ lambda x, y: x.union(y.grain.components), flookup, base # type: ignore
74
+ )
82
75
  return grain_length
83
76
 
84
77
 
85
78
  def subgraph_is_complete(
86
79
  nodes: list[str], targets: set[str], mapping: dict[str, str], g: nx.DiGraph
87
80
  ) -> bool:
88
- mapped = set([mapping.get(n, n) for n in nodes])
89
- passed = all([t in mapped for t in targets])
90
- if not passed:
81
+ # Check if all targets are present in mapped nodes
82
+ mapped = {mapping.get(n, n) for n in nodes}
83
+ if not targets.issubset(mapped):
91
84
  # logger.info(
92
85
  # f"Subgraph {nodes} is not complete, missing targets {targets} - mapped {mapped}"
93
86
  # )
94
87
  return False
95
- # check if all concepts have a datasource edge
96
- has_ds_edge = {
97
- mapping.get(n, n): any(x.startswith("ds~") for x in nx.neighbors(g, n))
98
- for n in nodes
99
- if n.startswith("c~")
100
- }
101
- has_ds_edge = {k: False for k in targets}
102
- # check at least one instance of concept has a datasource edge
103
- for n in nodes:
104
- if n.startswith("c~"):
105
- neighbors = nx.neighbors(g, n)
106
- for neighbor in neighbors:
107
- if neighbor.startswith("ds~"):
108
- has_ds_edge[mapping.get(n, n)] = True
109
- break
110
- return all(has_ds_edge.values()) and passed
88
+
89
+ # Check if at least one concept node has a datasource edge
90
+ has_ds_edge = {target: False for target in targets}
91
+
92
+ for node in nodes:
93
+ if node.startswith("c~"):
94
+ mapped_node = mapping.get(node, node)
95
+ if mapped_node in targets and not has_ds_edge[mapped_node]:
96
+ # Only check neighbors if we haven't found a ds edge for this mapped node yet
97
+ if any(
98
+ neighbor.startswith("ds~") for neighbor in nx.neighbors(g, node)
99
+ ):
100
+ has_ds_edge[mapped_node] = True
101
+
102
+ return all(has_ds_edge.values())
111
103
 
112
104
 
113
105
  def create_pruned_concept_graph(
114
- g: nx.DiGraph,
106
+ g: ReferenceGraph,
115
107
  all_concepts: List[BuildConcept],
116
108
  datasources: list[BuildDatasource],
117
109
  accept_partial: bool = False,
@@ -133,14 +125,13 @@ def create_pruned_concept_graph(
133
125
  )
134
126
  g.add_node(node_address, datasource=ds_list)
135
127
  for c in common:
136
- g.add_edge(node_address, concept_to_node(c))
137
- g.add_edge(concept_to_node(c), node_address)
128
+ cnode = concept_to_node(c)
129
+ g.add_edge(node_address, cnode)
130
+ g.add_edge(cnode, node_address)
138
131
  prune_sources_for_conditions(g, accept_partial, conditions)
139
132
  target_addresses = set([c.address for c in all_concepts])
140
- concepts: dict[str, BuildConcept] = nx.get_node_attributes(orig_g, "concept")
141
- datasource_map: dict[str, BuildDatasource | list[BuildDatasource]] = (
142
- nx.get_node_attributes(orig_g, "datasource")
143
- )
133
+ concepts: dict[str, BuildConcept] = orig_g.concepts
134
+ datasource_map: dict[str, BuildDatasource] = orig_g.datasources
144
135
  relevant_concepts_pre = {
145
136
  n: x.address
146
137
  for n in g.nodes()
@@ -170,31 +161,27 @@ def create_pruned_concept_graph(
170
161
  for edge in to_remove:
171
162
  g.remove_edge(*edge)
172
163
 
173
- for n in g.nodes():
174
- if not n.startswith("ds~"):
175
- continue
176
- actual_neighbors = [
177
- x for x in relevant_concepts if x in (nx.all_neighbors(g, n))
178
- ]
179
- if actual_neighbors:
164
+ for n in g.datasources:
165
+ if any([[n, x] in g.edges for x in relevant_concepts]):
180
166
  relevent_datasets.append(n)
167
+ continue
181
168
 
182
169
  # for injecting extra join concepts that are shared between datasets
183
170
  # use the original graph, pre-partial pruning
184
- for n in orig_g.nodes:
171
+ for n in orig_g.concepts:
185
172
  # readd ignoring grain
186
173
  # we want to join inclusive of all concepts
187
- roots: dict[str, set[str]] = {}
188
- if n.startswith("c~") and n not in relevant_concepts:
189
- root = n.split("@")[0]
190
- neighbors = roots.get(root, set())
191
- for neighbor in nx.all_neighbors(orig_g, n):
174
+ if n not in relevant_concepts:
175
+ n_neighbors = nx.all_neighbors(orig_g, n)
176
+ # check if the irrelevant concept is a join between
177
+ # two relevant datasets
178
+ neighbors = set()
179
+ for neighbor in n_neighbors:
192
180
  if neighbor in relevent_datasets:
193
181
  neighbors.add(neighbor)
194
- if len(neighbors) > 1:
195
- relevant_concepts.append(n)
196
- roots[root] = set()
197
-
182
+ if len(neighbors) > 1:
183
+ relevant_concepts.append(n)
184
+ continue
198
185
  g.remove_nodes_from(
199
186
  [
200
187
  n
@@ -238,7 +225,7 @@ def create_pruned_concept_graph(
238
225
 
239
226
 
240
227
  def resolve_subgraphs(
241
- g: nx.DiGraph,
228
+ g: ReferenceGraph,
242
229
  relevant: list[BuildConcept],
243
230
  accept_partial: bool,
244
231
  conditions: BuildWhereClause | None,
@@ -261,7 +248,7 @@ def resolve_subgraphs(
261
248
  partial_map = get_graph_partial_nodes(g, conditions)
262
249
  exact_map = get_graph_exact_match(g, accept_partial, conditions)
263
250
  grain_length = get_graph_grains(g)
264
- concepts: dict[str, BuildConcept] = nx.get_node_attributes(g, "concept")
251
+ concepts: dict[str, BuildConcept] = g.concepts
265
252
  non_partial_map = {
266
253
  ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
267
254
  for ds in datasources
@@ -460,7 +447,7 @@ def create_select_node(
460
447
  ds_name: str,
461
448
  subgraph: list[str],
462
449
  accept_partial: bool,
463
- g,
450
+ g: ReferenceGraph,
464
451
  environment: BuildEnvironment,
465
452
  depth: int,
466
453
  conditions: BuildWhereClause | None = None,
@@ -486,9 +473,7 @@ def create_select_node(
486
473
  preexisting_conditions=conditions.conditional if conditions else None,
487
474
  )
488
475
 
489
- datasource: dict[str, BuildDatasource | list[BuildDatasource]] = (
490
- nx.get_node_attributes(g, "datasource")[ds_name]
491
- )
476
+ datasource: BuildDatasource = g.datasources[ds_name]
492
477
  if isinstance(datasource, BuildDatasource):
493
478
  bcandidate, force_group = create_datasource_node(
494
479
  datasource,
@@ -581,6 +566,7 @@ def gen_select_merge_node(
581
566
  logger.info(
582
567
  f"{padding(depth)}{LOGGER_PREFIX} searching for root source graph for concepts {[c.address for c in all_concepts]} and conditions {conditions}"
583
568
  )
569
+ pruned_concept_graph = None
584
570
  for attempt in attempts:
585
571
  pruned_concept_graph = create_pruned_concept_graph(
586
572
  g,
@@ -244,7 +244,7 @@ def unwrap_transformation(
244
244
  elif isinstance(input, Parenthetical):
245
245
  return unwrap_transformation(input.content, environment)
246
246
  else:
247
- return Function(
247
+ return Function.model_construct(
248
248
  operator=FunctionType.CONSTANT,
249
249
  output_datatype=arg_to_datatype(input),
250
250
  output_purpose=Purpose.CONSTANT,