pytrilogy 0.0.3.93__py3-none-any.whl → 0.0.3.95__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (39) hide show
  1. {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/METADATA +170 -145
  2. {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/RECORD +38 -34
  3. trilogy/__init__.py +1 -1
  4. trilogy/authoring/__init__.py +4 -0
  5. trilogy/core/enums.py +13 -0
  6. trilogy/core/env_processor.py +21 -10
  7. trilogy/core/environment_helpers.py +111 -0
  8. trilogy/core/exceptions.py +21 -1
  9. trilogy/core/functions.py +6 -1
  10. trilogy/core/graph_models.py +60 -67
  11. trilogy/core/internal.py +18 -0
  12. trilogy/core/models/author.py +16 -25
  13. trilogy/core/models/build.py +5 -4
  14. trilogy/core/models/core.py +3 -0
  15. trilogy/core/models/environment.py +28 -0
  16. trilogy/core/models/execute.py +7 -0
  17. trilogy/core/processing/node_generators/node_merge_node.py +30 -28
  18. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +25 -11
  19. trilogy/core/processing/node_generators/select_merge_node.py +68 -82
  20. trilogy/core/query_processor.py +2 -1
  21. trilogy/core/statements/author.py +18 -3
  22. trilogy/core/statements/common.py +0 -10
  23. trilogy/core/statements/execute.py +71 -16
  24. trilogy/core/validation/__init__.py +0 -0
  25. trilogy/core/validation/common.py +109 -0
  26. trilogy/core/validation/concept.py +122 -0
  27. trilogy/core/validation/datasource.py +192 -0
  28. trilogy/core/validation/environment.py +71 -0
  29. trilogy/dialect/base.py +40 -21
  30. trilogy/dialect/sql_server.py +3 -1
  31. trilogy/engine.py +25 -7
  32. trilogy/executor.py +145 -83
  33. trilogy/parsing/parse_engine.py +35 -4
  34. trilogy/parsing/trilogy.lark +11 -5
  35. trilogy/core/processing/node_generators/select_merge_node_v2.py +0 -792
  36. {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/WHEEL +0 -0
  37. {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/entry_points.txt +0 -0
  38. {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/licenses/LICENSE.md +0 -0
  39. {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,11 @@ from networkx.algorithms import approximation as ax
6
6
  from trilogy.constants import logger
7
7
  from trilogy.core.enums import Derivation
8
8
  from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
9
- from trilogy.core.graph_models import concept_to_node, prune_sources_for_conditions
9
+ from trilogy.core.graph_models import (
10
+ ReferenceGraph,
11
+ concept_to_node,
12
+ prune_sources_for_conditions,
13
+ )
10
14
  from trilogy.core.models.build import BuildConcept, BuildConditional, BuildWhereClause
11
15
  from trilogy.core.models.build_environment import BuildEnvironment
12
16
  from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
@@ -17,11 +21,12 @@ LOGGER_PREFIX = "[GEN_MERGE_NODE]"
17
21
  AMBIGUITY_CHECK_LIMIT = 20
18
22
 
19
23
 
20
- def filter_pseudonyms_for_source(ds_graph: nx.DiGraph, node: str):
24
+ def filter_pseudonyms_for_source(
25
+ ds_graph: nx.DiGraph, node: str, pseudonyms: set[tuple[str, str]]
26
+ ):
21
27
  to_remove = set()
22
-
23
28
  for edge in ds_graph.edges:
24
- if ds_graph.edges[edge].get("pseudonym", False):
29
+ if edge in pseudonyms:
25
30
  lengths = {}
26
31
  for n in edge:
27
32
  lengths[n] = nx.shortest_path_length(ds_graph, node, n)
@@ -52,12 +57,14 @@ def filter_unique_graphs(graphs: list[list[str]]) -> list[list[str]]:
52
57
  return [list(x) for x in unique_graphs]
53
58
 
54
59
 
55
- def extract_ds_components(g: nx.DiGraph, nodelist: list[str]) -> list[list[str]]:
60
+ def extract_ds_components(
61
+ g: nx.DiGraph, nodelist: list[str], pseudonyms: set[tuple[str, str]]
62
+ ) -> list[list[str]]:
56
63
  graphs = []
57
64
  for node in g.nodes:
58
65
  if node.startswith("ds~"):
59
66
  local = g.copy()
60
- filter_pseudonyms_for_source(local, node)
67
+ filter_pseudonyms_for_source(local, node, pseudonyms)
61
68
  ds_graph: nx.DiGraph = nx.ego_graph(local, node, radius=10).copy()
62
69
  graphs.append(
63
70
  [
@@ -78,7 +85,7 @@ def extract_ds_components(g: nx.DiGraph, nodelist: list[str]) -> list[list[str]]
78
85
 
79
86
 
80
87
  def determine_induced_minimal_nodes(
81
- G: nx.DiGraph,
88
+ G: ReferenceGraph,
82
89
  nodelist: list[str],
83
90
  environment: BuildEnvironment,
84
91
  filter_downstream: bool,
@@ -86,23 +93,19 @@ def determine_induced_minimal_nodes(
86
93
  ) -> nx.DiGraph | None:
87
94
  H: nx.Graph = nx.to_undirected(G).copy()
88
95
  nodes_to_remove = []
89
- concepts = nx.get_node_attributes(G, "concept")
90
-
91
- for node in G.nodes:
92
- if concepts.get(node):
93
- lookup: BuildConcept = concepts[node]
94
- # inclusion of aggregates can create ambiguous node relation chains
95
- # there may be a better way to handle this
96
- # can be revisited if we need to connect a derived synonym based on an aggregate
97
- if lookup.derivation in (
98
- Derivation.CONSTANT,
99
- Derivation.AGGREGATE,
100
- Derivation.FILTER,
101
- ):
102
- nodes_to_remove.append(node)
103
- # purge a node if we're already looking for all it's parents
104
- if filter_downstream and lookup.derivation not in (Derivation.ROOT,):
105
- nodes_to_remove.append(node)
96
+ for node, lookup in G.concepts.items():
97
+ # inclusion of aggregates can create ambiguous node relation chains
98
+ # there may be a better way to handle this
99
+ # can be revisited if we need to connect a derived synonym based on an aggregate
100
+ if lookup.derivation in (
101
+ Derivation.CONSTANT,
102
+ Derivation.AGGREGATE,
103
+ Derivation.FILTER,
104
+ ):
105
+ nodes_to_remove.append(node)
106
+ # purge a node if we're already looking for all it's parents
107
+ if filter_downstream and lookup.derivation not in (Derivation.ROOT,):
108
+ nodes_to_remove.append(node)
106
109
  if nodes_to_remove:
107
110
  # logger.debug(f"Removing nodes {nodes_to_remove} from graph")
108
111
  H.remove_nodes_from(nodes_to_remove)
@@ -259,7 +262,7 @@ def filter_duplicate_subgraphs(
259
262
  def resolve_weak_components(
260
263
  all_concepts: List[BuildConcept],
261
264
  environment: BuildEnvironment,
262
- environment_graph: nx.DiGraph,
265
+ environment_graph: ReferenceGraph,
263
266
  filter_downstream: bool = True,
264
267
  accept_partial: bool = False,
265
268
  search_conditions: BuildWhereClause | None = None,
@@ -316,8 +319,6 @@ def resolve_weak_components(
316
319
  ]
317
320
  new = [x for x in all_graph_concepts if x.address not in all_concepts]
318
321
 
319
- new_addresses = set([x.address for x in new if x.address not in synonyms])
320
-
321
322
  if not new:
322
323
  break_flag = True
323
324
  # remove our new nodes for the next search path
@@ -329,6 +330,7 @@ def resolve_weak_components(
329
330
  # from trilogy.hooks.graph_hook import GraphHook
330
331
  # GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
331
332
  found.append(g)
333
+ new_addresses = set([x.address for x in new if x.address not in synonyms])
332
334
  reduced_concept_sets.append(new_addresses)
333
335
 
334
336
  except nx.exception.NetworkXNoPath:
@@ -346,7 +348,7 @@ def resolve_weak_components(
346
348
  subgraphs: list[list[BuildConcept]] = []
347
349
  # components = nx.strongly_connected_components(g)
348
350
  node_list = [x for x in g.nodes if x.startswith("c~")]
349
- components = extract_ds_components(g, node_list)
351
+ components = extract_ds_components(g, node_list, environment_graph.pseudonyms)
350
352
  logger.debug(f"Extracted components {components} from {node_list}")
351
353
  for component in components:
352
354
  # we need to take unique again as different addresses may map to the same concept
@@ -128,17 +128,32 @@ def simplify_conditions(
128
128
  for condition in conditions:
129
129
  if not isinstance(condition, BuildComparison):
130
130
  return False
131
- if not isinstance(condition.left, REDUCABLE_TYPES) and not isinstance(
132
- condition.right, REDUCABLE_TYPES
133
- ):
134
- return False
135
- if not isinstance(condition.left, BuildConcept) and not isinstance(
136
- condition.right, BuildConcept
131
+ left_is_concept = False
132
+ left_is_reducable = False
133
+ right_is_concept = False
134
+ right_is_reducable = False
135
+ if isinstance(condition.left, BuildConcept):
136
+ left_is_concept = True
137
+ elif isinstance(condition.left, REDUCABLE_TYPES):
138
+ left_is_reducable = True
139
+
140
+ if isinstance(condition.right, BuildConcept):
141
+ right_is_concept = True
142
+ elif isinstance(condition.right, REDUCABLE_TYPES):
143
+ right_is_reducable = True
144
+
145
+ if not (
146
+ (left_is_concept and right_is_reducable)
147
+ or (right_is_concept and left_is_reducable)
137
148
  ):
138
149
  return False
139
- vars = [condition.left, condition.right]
140
- concept = [x for x in vars if isinstance(x, BuildConcept)][0]
141
- raw_comparison = [x for x in vars if not isinstance(x, BuildConcept)][0]
150
+ if left_is_concept:
151
+ concept = condition.left
152
+ raw_comparison = condition.right
153
+ else:
154
+ concept = condition.right
155
+ raw_comparison = condition.left
156
+
142
157
  if isinstance(raw_comparison, BuildFunction):
143
158
  if not raw_comparison.operator == FunctionType.CONSTANT:
144
159
  return False
@@ -154,7 +169,7 @@ def simplify_conditions(
154
169
  if not isinstance(comparison, REDUCABLE_TYPES):
155
170
  return False
156
171
 
157
- var = concept
172
+ var: BuildConcept = concept # type: ignore
158
173
  op = condition.operator
159
174
  grouped[var].append((op, comparison))
160
175
 
@@ -240,7 +255,6 @@ def get_union_sources(
240
255
  assocs[merge_key.address].append(x)
241
256
  final: list[list[BuildDatasource]] = []
242
257
  for _, dses in assocs.items():
243
-
244
258
  conditions = [c.non_partial_for.conditional for c in dses if c.non_partial_for]
245
259
  if simplify_conditions(conditions):
246
260
  final.append(dses)
@@ -6,6 +6,7 @@ import networkx as nx
6
6
  from trilogy.constants import logger
7
7
  from trilogy.core.enums import Derivation
8
8
  from trilogy.core.graph_models import (
9
+ ReferenceGraph,
9
10
  concept_to_node,
10
11
  get_graph_exact_match,
11
12
  prune_sources_for_conditions,
@@ -41,77 +42,68 @@ def extract_address(node: str):
41
42
 
42
43
 
43
44
  def get_graph_partial_nodes(
44
- g: nx.DiGraph, conditions: BuildWhereClause | None
45
+ g: ReferenceGraph, conditions: BuildWhereClause | None
45
46
  ) -> dict[str, list[str]]:
46
- datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
47
- nx.get_node_attributes(g, "datasource")
48
- )
49
47
  partial: dict[str, list[str]] = {}
50
- for node in g.nodes:
51
- if node in datasources:
52
- ds = datasources[node]
53
- if not isinstance(ds, list):
54
-
55
- if ds.non_partial_for and conditions == ds.non_partial_for:
56
- partial[node] = []
57
- continue
58
- partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
59
- ds = [ds]
60
- # assume union sources have no partial
61
- else:
62
- partial[node] = []
48
+ for node, ds in g.datasources.items():
63
49
 
50
+ if not isinstance(ds, list):
51
+
52
+ if ds.non_partial_for and conditions == ds.non_partial_for:
53
+ partial[node] = []
54
+ continue
55
+ partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
56
+ # assume union sources have no partial
57
+ else:
58
+ partial[node] = []
64
59
  return partial
65
60
 
66
61
 
67
- def get_graph_grains(g: nx.DiGraph) -> dict[str, list[str]]:
68
- datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
69
- nx.get_node_attributes(g, "datasource")
70
- )
62
+ def get_graph_grains(g: ReferenceGraph) -> dict[str, list[str]]:
71
63
  grain_length: dict[str, list[str]] = {}
72
- for node in g.nodes:
73
- if node in datasources:
74
- base: set[str] = set()
75
- lookup = datasources[node]
76
- if not isinstance(lookup, list):
77
- lookup = [lookup]
78
- assert isinstance(lookup, list)
79
- grain_length[node] = reduce(
80
- lambda x, y: x.union(y.grain.components), lookup, base # type: ignore
81
- )
64
+ for node, lookup in g.datasources.items():
65
+
66
+ base: set[str] = set()
67
+ if not isinstance(lookup, list):
68
+ flookup = [lookup]
69
+ else:
70
+ flookup = lookup
71
+ assert isinstance(flookup, list)
72
+ grain_length[node] = reduce(
73
+ lambda x, y: x.union(y.grain.components), flookup, base # type: ignore
74
+ )
82
75
  return grain_length
83
76
 
84
77
 
85
78
  def subgraph_is_complete(
86
79
  nodes: list[str], targets: set[str], mapping: dict[str, str], g: nx.DiGraph
87
80
  ) -> bool:
88
- mapped = set([mapping.get(n, n) for n in nodes])
89
- passed = all([t in mapped for t in targets])
90
- if not passed:
81
+ # Check if all targets are present in mapped nodes
82
+ mapped = {mapping.get(n, n) for n in nodes}
83
+ if not targets.issubset(mapped):
91
84
  # logger.info(
92
85
  # f"Subgraph {nodes} is not complete, missing targets {targets} - mapped {mapped}"
93
86
  # )
94
87
  return False
95
- # check if all concepts have a datasource edge
96
- has_ds_edge = {
97
- mapping.get(n, n): any(x.startswith("ds~") for x in nx.neighbors(g, n))
98
- for n in nodes
99
- if n.startswith("c~")
100
- }
101
- has_ds_edge = {k: False for k in targets}
102
- # check at least one instance of concept has a datasource edge
103
- for n in nodes:
104
- if n.startswith("c~"):
105
- neighbors = nx.neighbors(g, n)
106
- for neighbor in neighbors:
107
- if neighbor.startswith("ds~"):
108
- has_ds_edge[mapping.get(n, n)] = True
109
- break
110
- return all(has_ds_edge.values()) and passed
88
+
89
+ # Check if at least one concept node has a datasource edge
90
+ has_ds_edge = {target: False for target in targets}
91
+
92
+ for node in nodes:
93
+ if node.startswith("c~"):
94
+ mapped_node = mapping.get(node, node)
95
+ if mapped_node in targets and not has_ds_edge[mapped_node]:
96
+ # Only check neighbors if we haven't found a ds edge for this mapped node yet
97
+ if any(
98
+ neighbor.startswith("ds~") for neighbor in nx.neighbors(g, node)
99
+ ):
100
+ has_ds_edge[mapped_node] = True
101
+
102
+ return all(has_ds_edge.values())
111
103
 
112
104
 
113
105
  def create_pruned_concept_graph(
114
- g: nx.DiGraph,
106
+ g: ReferenceGraph,
115
107
  all_concepts: List[BuildConcept],
116
108
  datasources: list[BuildDatasource],
117
109
  accept_partial: bool = False,
@@ -131,16 +123,15 @@ def create_pruned_concept_graph(
131
123
  common: set[BuildConcept] = set.intersection(
132
124
  *[set(x.output_concepts) for x in ds_list]
133
125
  )
134
- g.add_node(node_address, datasource=ds_list)
126
+ g.add_datasource_node(node_address, ds_list)
135
127
  for c in common:
136
- g.add_edge(node_address, concept_to_node(c))
137
- g.add_edge(concept_to_node(c), node_address)
128
+ cnode = concept_to_node(c)
129
+ g.add_edge(node_address, cnode)
130
+ g.add_edge(cnode, node_address)
138
131
  prune_sources_for_conditions(g, accept_partial, conditions)
139
132
  target_addresses = set([c.address for c in all_concepts])
140
- concepts: dict[str, BuildConcept] = nx.get_node_attributes(orig_g, "concept")
141
- datasource_map: dict[str, BuildDatasource | list[BuildDatasource]] = (
142
- nx.get_node_attributes(orig_g, "datasource")
143
- )
133
+ concepts: dict[str, BuildConcept] = orig_g.concepts
134
+ datasource_map: dict[str, BuildDatasource] = orig_g.datasources
144
135
  relevant_concepts_pre = {
145
136
  n: x.address
146
137
  for n in g.nodes()
@@ -170,31 +161,27 @@ def create_pruned_concept_graph(
170
161
  for edge in to_remove:
171
162
  g.remove_edge(*edge)
172
163
 
173
- for n in g.nodes():
174
- if not n.startswith("ds~"):
175
- continue
176
- actual_neighbors = [
177
- x for x in relevant_concepts if x in (nx.all_neighbors(g, n))
178
- ]
179
- if actual_neighbors:
164
+ for n in g.datasources:
165
+ if any([[n, x] in g.edges for x in relevant_concepts]):
180
166
  relevent_datasets.append(n)
167
+ continue
181
168
 
182
169
  # for injecting extra join concepts that are shared between datasets
183
170
  # use the original graph, pre-partial pruning
184
- for n in orig_g.nodes:
171
+ for n in orig_g.concepts:
185
172
  # readd ignoring grain
186
173
  # we want to join inclusive of all concepts
187
- roots: dict[str, set[str]] = {}
188
- if n.startswith("c~") and n not in relevant_concepts:
189
- root = n.split("@")[0]
190
- neighbors = roots.get(root, set())
191
- for neighbor in nx.all_neighbors(orig_g, n):
174
+ if n not in relevant_concepts:
175
+ n_neighbors = nx.all_neighbors(orig_g, n)
176
+ # check if the irrelevant concept is a join between
177
+ # two relevant datasets
178
+ neighbors = set()
179
+ for neighbor in n_neighbors:
192
180
  if neighbor in relevent_datasets:
193
181
  neighbors.add(neighbor)
194
- if len(neighbors) > 1:
195
- relevant_concepts.append(n)
196
- roots[root] = set()
197
-
182
+ if len(neighbors) > 1:
183
+ relevant_concepts.append(n)
184
+ continue
198
185
  g.remove_nodes_from(
199
186
  [
200
187
  n
@@ -227,7 +214,7 @@ def create_pruned_concept_graph(
227
214
  relevant = set(relevant_concepts + relevent_datasets)
228
215
  for edge in orig_g.edges():
229
216
  if edge[0] in relevant and edge[1] in relevant:
230
- g.add_edge(edge[0], edge[1])
217
+ g.add_edge(edge[0], edge[1], fast=True)
231
218
  # if we have no ds nodes at all, for non constant, we can't find it
232
219
  if not any([n.startswith("ds~") for n in g.nodes]):
233
220
  logger.info(
@@ -238,7 +225,7 @@ def create_pruned_concept_graph(
238
225
 
239
226
 
240
227
  def resolve_subgraphs(
241
- g: nx.DiGraph,
228
+ g: ReferenceGraph,
242
229
  relevant: list[BuildConcept],
243
230
  accept_partial: bool,
244
231
  conditions: BuildWhereClause | None,
@@ -261,7 +248,7 @@ def resolve_subgraphs(
261
248
  partial_map = get_graph_partial_nodes(g, conditions)
262
249
  exact_map = get_graph_exact_match(g, accept_partial, conditions)
263
250
  grain_length = get_graph_grains(g)
264
- concepts: dict[str, BuildConcept] = nx.get_node_attributes(g, "concept")
251
+ concepts: dict[str, BuildConcept] = g.concepts
265
252
  non_partial_map = {
266
253
  ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
267
254
  for ds in datasources
@@ -460,7 +447,7 @@ def create_select_node(
460
447
  ds_name: str,
461
448
  subgraph: list[str],
462
449
  accept_partial: bool,
463
- g,
450
+ g: ReferenceGraph,
464
451
  environment: BuildEnvironment,
465
452
  depth: int,
466
453
  conditions: BuildWhereClause | None = None,
@@ -486,9 +473,7 @@ def create_select_node(
486
473
  preexisting_conditions=conditions.conditional if conditions else None,
487
474
  )
488
475
 
489
- datasource: dict[str, BuildDatasource | list[BuildDatasource]] = (
490
- nx.get_node_attributes(g, "datasource")[ds_name]
491
- )
476
+ datasource: BuildDatasource = g.datasources[ds_name]
492
477
  if isinstance(datasource, BuildDatasource):
493
478
  bcandidate, force_group = create_datasource_node(
494
479
  datasource,
@@ -581,6 +566,7 @@ def gen_select_merge_node(
581
566
  logger.info(
582
567
  f"{padding(depth)}{LOGGER_PREFIX} searching for root source graph for concepts {[c.address for c in all_concepts]} and conditions {conditions}"
583
568
  )
569
+ pruned_concept_graph = None
584
570
  for attempt in attempts:
585
571
  pruned_concept_graph = create_pruned_concept_graph(
586
572
  g,
@@ -40,8 +40,8 @@ from trilogy.core.statements.author import (
40
40
  PersistStatement,
41
41
  SelectStatement,
42
42
  )
43
- from trilogy.core.statements.common import MaterializedDataset
44
43
  from trilogy.core.statements.execute import (
44
+ MaterializedDataset,
45
45
  ProcessedCopyStatement,
46
46
  ProcessedQuery,
47
47
  ProcessedQueryPersist,
@@ -567,4 +567,5 @@ def process_query(
567
567
  base=root_cte,
568
568
  hidden_columns=set([x for x in statement.hidden_components]),
569
569
  local_concepts=statement.local_concepts,
570
+ locally_derived=statement.locally_derived,
570
571
  )
@@ -12,6 +12,7 @@ from trilogy.core.enums import (
12
12
  IOType,
13
13
  Modifier,
14
14
  ShowCategory,
15
+ ValidationScope,
15
16
  )
16
17
  from trilogy.core.models.author import (
17
18
  AggregateWrapper,
@@ -147,11 +148,13 @@ class SelectStatement(HasUUID, SelectTypeMixin, BaseModel):
147
148
  continue
148
149
  if CONFIG.parsing.select_as_definition and not environment.frozen:
149
150
  if x.concept.address not in environment.concepts:
150
- environment.add_concept(x.content.output)
151
+ environment.add_concept(x.content.output, add_derived=False)
151
152
  elif x.concept.address in environment.concepts:
152
153
  version = environment.concepts[x.concept.address]
153
154
  if version.metadata.concept_source == ConceptSource.SELECT:
154
- environment.add_concept(x.content.output, force=True)
155
+ environment.add_concept(
156
+ x.content.output, force=True, add_derived=False
157
+ )
155
158
  x.content.output = x.content.output.set_select_grain(
156
159
  output.grain, environment
157
160
  )
@@ -378,6 +381,13 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, BaseModel):
378
381
  output = output.union(select.hidden_components)
379
382
  return output
380
383
 
384
+ @property
385
+ def locally_derived(self) -> set[str]:
386
+ locally_derived: set[str] = set([x.address for x in self.derived_concepts])
387
+ for select in self.selects:
388
+ locally_derived = locally_derived.union(select.locally_derived)
389
+ return locally_derived
390
+
381
391
 
382
392
  class RowsetDerivationStatement(HasUUID, BaseModel):
383
393
  name: str
@@ -428,8 +438,13 @@ class PersistStatement(HasUUID, BaseModel):
428
438
  return self.datasource.address
429
439
 
430
440
 
441
+ class ValidateStatement(BaseModel):
442
+ scope: ValidationScope
443
+ targets: Optional[List[str]] = None # list of identifiers
444
+
445
+
431
446
  class ShowStatement(BaseModel):
432
- content: SelectStatement | PersistStatement | ShowCategory
447
+ content: SelectStatement | PersistStatement | ValidateStatement | ShowCategory
433
448
 
434
449
 
435
450
  class Limit(BaseModel):
@@ -4,7 +4,6 @@ from pydantic import BaseModel, Field
4
4
 
5
5
  from trilogy.core.enums import IOType
6
6
  from trilogy.core.models.author import ConceptRef, HavingClause, WhereClause
7
- from trilogy.core.models.datasource import Address, Datasource
8
7
 
9
8
 
10
9
  class CopyQueryMixin(BaseModel):
@@ -12,15 +11,6 @@ class CopyQueryMixin(BaseModel):
12
11
  target_type: IOType
13
12
 
14
13
 
15
- class MaterializedDataset(BaseModel):
16
- address: Address
17
-
18
-
19
- class PersistQueryMixin(BaseModel):
20
- output_to: MaterializedDataset
21
- datasource: Datasource
22
-
23
-
24
14
  class SelectTypeMixin(BaseModel):
25
15
  where_clause: Union["WhereClause", None] = Field(default=None)
26
16
  having_clause: Union["HavingClause", None] = Field(default=None)
@@ -1,49 +1,104 @@
1
- from typing import Annotated, List, Optional, Union
1
+ from dataclasses import dataclass, field
2
+ from typing import List, Optional, Union
2
3
 
3
- from pydantic import BaseModel, Field
4
- from pydantic.functional_validators import PlainValidator
5
-
6
- from trilogy.core.models.author import ConceptRef
4
+ from trilogy.core.enums import IOType, ValidationScope
5
+ from trilogy.core.models.author import ConceptRef, HavingClause, WhereClause
7
6
  from trilogy.core.models.build import (
8
7
  BuildConcept,
9
8
  BuildDatasource,
10
9
  BuildOrderBy,
11
10
  )
12
- from trilogy.core.models.environment import EnvironmentConceptDict, validate_concepts
11
+ from trilogy.core.models.datasource import Address, Datasource
12
+ from trilogy.core.models.environment import EnvironmentConceptDict
13
13
  from trilogy.core.models.execute import CTE, UnionCTE
14
- from trilogy.core.statements.common import CopyQueryMixin, PersistQueryMixin
15
14
 
16
15
 
17
- class ProcessedQuery(BaseModel):
16
+ @dataclass
17
+ class CopyQueryMixin:
18
+ target: str
19
+ target_type: IOType
20
+
21
+
22
+ @dataclass
23
+ class MaterializedDataset:
24
+ address: Address
25
+
26
+
27
+ @dataclass
28
+ class PersistQueryMixin:
29
+ output_to: MaterializedDataset
30
+ datasource: Datasource
31
+
32
+
33
+ @dataclass
34
+ class SelectTypeMixin:
35
+ where_clause: Union["WhereClause", None] = field(default=None)
36
+ having_clause: Union["HavingClause", None] = field(default=None)
37
+
38
+ @property
39
+ def output_components(self) -> List[ConceptRef]:
40
+ raise NotImplementedError
41
+
42
+
43
+ @dataclass
44
+ class ProcessedQuery:
18
45
  output_columns: List[ConceptRef]
19
46
  ctes: List[CTE | UnionCTE]
20
47
  base: CTE | UnionCTE
21
- hidden_columns: set[str] = Field(default_factory=set)
48
+ hidden_columns: set[str] = field(default_factory=set)
22
49
  limit: Optional[int] = None
23
50
  order_by: Optional[BuildOrderBy] = None
24
- local_concepts: Annotated[
25
- EnvironmentConceptDict, PlainValidator(validate_concepts)
26
- ] = Field(default_factory=EnvironmentConceptDict)
51
+ local_concepts: EnvironmentConceptDict = field(
52
+ default_factory=EnvironmentConceptDict
53
+ )
54
+ locally_derived: set[str] = field(default_factory=set)
27
55
 
28
56
 
57
+ @dataclass
29
58
  class ProcessedQueryPersist(ProcessedQuery, PersistQueryMixin):
30
59
  pass
31
60
 
32
61
 
62
+ @dataclass
33
63
  class ProcessedCopyStatement(ProcessedQuery, CopyQueryMixin):
34
64
  pass
35
65
 
36
66
 
37
- class ProcessedRawSQLStatement(BaseModel):
67
+ @dataclass
68
+ class ProcessedRawSQLStatement:
38
69
  text: str
39
70
 
40
71
 
41
- class ProcessedStaticValueOutput(BaseModel):
72
+ @dataclass
73
+ class ProcessedValidateStatement:
74
+ scope: ValidationScope
75
+ targets: Optional[List[str]]
76
+
77
+
78
+ @dataclass
79
+ class ProcessedStaticValueOutput:
42
80
  values: List[dict]
43
81
 
44
82
 
45
- class ProcessedShowStatement(BaseModel):
83
+ @dataclass
84
+ class ProcessedShowStatement:
46
85
  output_columns: List[ConceptRef]
47
86
  output_values: List[
48
- Union[BuildConcept, BuildDatasource, ProcessedQuery, ProcessedStaticValueOutput]
87
+ Union[
88
+ BuildConcept,
89
+ BuildDatasource,
90
+ ProcessedQuery,
91
+ ProcessedValidateStatement,
92
+ ProcessedStaticValueOutput,
93
+ ]
49
94
  ]
95
+
96
+
97
+ PROCESSED_STATEMENT_TYPES = (
98
+ ProcessedCopyStatement
99
+ | ProcessedQuery
100
+ | ProcessedRawSQLStatement
101
+ | ProcessedQueryPersist
102
+ | ProcessedShowStatement
103
+ | ProcessedValidateStatement
104
+ )
File without changes