pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (76) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1859 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +701 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +178 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +55 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +18 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +70 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +148 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/__init__.py +4 -0
  65. trilogy/hooks/base_hook.py +6 -4
  66. trilogy/hooks/query_debugger.py +113 -97
  67. trilogy/parser.py +1 -1
  68. trilogy/parsing/common.py +307 -64
  69. trilogy/parsing/parse_engine.py +277 -618
  70. trilogy/parsing/render.py +50 -26
  71. trilogy/scripts/trilogy.py +2 -1
  72. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  73. trilogy/core/models.py +0 -4960
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
  76. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
@@ -3,17 +3,19 @@ from enum import Enum
3
3
  from typing import List, Optional, Protocol, Union
4
4
 
5
5
  from trilogy.constants import logger
6
- from trilogy.core.enums import FunctionType, Granularity, PurposeLineage
6
+ from trilogy.core.enums import Derivation, FunctionType, Granularity
7
7
  from trilogy.core.env_processor import generate_graph
8
8
  from trilogy.core.graph_models import ReferenceGraph
9
- from trilogy.core.models import (
10
- Concept,
11
- Environment,
12
- Function,
13
- RowsetItem,
9
+ from trilogy.core.models.author import (
14
10
  UndefinedConcept,
15
- WhereClause,
16
11
  )
12
+ from trilogy.core.models.build import (
13
+ BuildConcept,
14
+ BuildFunction,
15
+ BuildRowsetItem,
16
+ BuildWhereClause,
17
+ )
18
+ from trilogy.core.models.build_environment import BuildEnvironment
17
19
  from trilogy.core.processing.node_generators import (
18
20
  gen_basic_node,
19
21
  gen_filter_node,
@@ -52,17 +54,17 @@ LOGGER_PREFIX = "[CONCEPT DETAIL]"
52
54
  class SearchConceptsType(Protocol):
53
55
  def __call__(
54
56
  self,
55
- mandatory_list: List[Concept],
56
- environment: Environment,
57
+ mandatory_list: List[BuildConcept],
58
+ history: History,
59
+ environment: BuildEnvironment,
57
60
  depth: int,
58
61
  g: ReferenceGraph,
59
62
  accept_partial: bool = False,
60
- history: Optional[History] = None,
61
- conditions: Optional[WhereClause] = None,
63
+ conditions: Optional[BuildWhereClause] = None,
62
64
  ) -> Union[StrategyNode, None]: ...
63
65
 
64
66
 
65
- def get_upstream_concepts(base: Concept, nested: bool = False) -> set[str]:
67
+ def get_upstream_concepts(base: BuildConcept, nested: bool = False) -> set[str]:
66
68
  upstream = set()
67
69
  if nested:
68
70
  upstream.add(base.address)
@@ -70,20 +72,42 @@ def get_upstream_concepts(base: Concept, nested: bool = False) -> set[str]:
70
72
  return upstream
71
73
  for x in base.lineage.concept_arguments:
72
74
  # if it's derived from any value in a rowset, ALL rowset items are upstream
73
- if x.derivation == PurposeLineage.ROWSET:
74
- assert isinstance(x.lineage, RowsetItem)
75
- for y in x.lineage.rowset.derived_concepts:
76
- upstream = upstream.union(get_upstream_concepts(y, nested=True))
75
+ if x.derivation == Derivation.ROWSET:
76
+ assert isinstance(x.lineage, BuildRowsetItem), type(x.lineage)
77
+ for y in x.lineage.rowset.select.output_components:
78
+ upstream.add(f"{x.lineage.rowset.name}.{y.address}")
79
+ # upstream = upstream.union(get_upstream_concepts(y, nested=True))
77
80
  upstream = upstream.union(get_upstream_concepts(x, nested=True))
78
81
  return upstream
79
82
 
80
83
 
84
+ def restrict_node_outputs_targets(
85
+ node: StrategyNode, targets: list[BuildConcept], depth: int
86
+ ) -> list[BuildConcept]:
87
+ ex_resolve = node.resolve()
88
+ extra = [
89
+ x
90
+ for x in ex_resolve.output_concepts
91
+ if x.address not in [y.address for y in targets]
92
+ ]
93
+
94
+ base = [x for x in ex_resolve.output_concepts if x.address not in extra]
95
+ logger.info(
96
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} reducing final outputs, was {[c.address for c in ex_resolve.output_concepts]} with extra {[c.address for c in extra]}, remaining {base}"
97
+ )
98
+ for x in targets:
99
+ if x.address not in base:
100
+ base.append(x)
101
+ node.set_output_concepts(base)
102
+ return extra
103
+
104
+
81
105
  def get_priority_concept(
82
- all_concepts: List[Concept],
106
+ all_concepts: List[BuildConcept],
83
107
  attempted_addresses: set[str],
84
108
  found_concepts: set[str],
85
109
  depth: int,
86
- ) -> Concept:
110
+ ) -> BuildConcept:
87
111
  # optimized search for missing concepts
88
112
  pass_one = [
89
113
  c
@@ -99,34 +123,34 @@ def get_priority_concept(
99
123
  [
100
124
  c
101
125
  for c in remaining_concept
102
- if c.derivation == PurposeLineage.CONSTANT
126
+ if c.derivation == Derivation.CONSTANT
103
127
  and c.granularity == Granularity.SINGLE_ROW
104
128
  ]
105
129
  +
106
130
  # then multiselects to remove them from scope
107
- [c for c in remaining_concept if c.derivation == PurposeLineage.MULTISELECT]
131
+ [c for c in remaining_concept if c.derivation == Derivation.MULTISELECT]
108
132
  +
109
133
  # then rowsets to remove them from scope, as they cannot get partials
110
- [c for c in remaining_concept if c.derivation == PurposeLineage.ROWSET]
134
+ [c for c in remaining_concept if c.derivation == Derivation.ROWSET]
111
135
  +
112
136
  # then rowsets to remove them from scope, as they cannot get partials
113
- [c for c in remaining_concept if c.derivation == PurposeLineage.UNION]
137
+ [c for c in remaining_concept if c.derivation == Derivation.UNION]
114
138
  # we should be home-free here
115
139
  +
116
140
  # then aggregates to remove them from scope, as they cannot get partials
117
- [c for c in remaining_concept if c.derivation == PurposeLineage.AGGREGATE]
141
+ [c for c in remaining_concept if c.derivation == Derivation.AGGREGATE]
118
142
  # then windows to remove them from scope, as they cannot get partials
119
- + [c for c in remaining_concept if c.derivation == PurposeLineage.WINDOW]
143
+ + [c for c in remaining_concept if c.derivation == Derivation.WINDOW]
120
144
  # then filters to remove them from scope, also cannot get partials
121
- + [c for c in remaining_concept if c.derivation == PurposeLineage.FILTER]
145
+ + [c for c in remaining_concept if c.derivation == Derivation.FILTER]
122
146
  # unnests are weird?
123
- + [c for c in remaining_concept if c.derivation == PurposeLineage.UNNEST]
124
- + [c for c in remaining_concept if c.derivation == PurposeLineage.BASIC]
147
+ + [c for c in remaining_concept if c.derivation == Derivation.UNNEST]
148
+ + [c for c in remaining_concept if c.derivation == Derivation.BASIC]
125
149
  # finally our plain selects
126
150
  + [
127
- c for c in remaining_concept if c.derivation == PurposeLineage.ROOT
151
+ c for c in remaining_concept if c.derivation == Derivation.ROOT
128
152
  ] # and any non-single row constants
129
- + [c for c in remaining_concept if c.derivation == PurposeLineage.CONSTANT]
153
+ + [c for c in remaining_concept if c.derivation == Derivation.CONSTANT]
130
154
  )
131
155
 
132
156
  priority += [
@@ -139,7 +163,15 @@ def get_priority_concept(
139
163
  # get the derived copy first
140
164
  # as this will usually resolve cleaner
141
165
  for x in priority:
142
- if any([x.address in get_upstream_concepts(c) for c in priority]):
166
+ if any(
167
+ [
168
+ x.address
169
+ in get_upstream_concepts(
170
+ c,
171
+ )
172
+ for c in priority
173
+ ]
174
+ ):
143
175
  logger.info(
144
176
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} delaying fetch of {x.address} as parent of another concept"
145
177
  )
@@ -157,11 +189,12 @@ def get_priority_concept(
157
189
 
158
190
 
159
191
  def generate_candidates_restrictive(
160
- priority_concept: Concept,
161
- candidates: list[Concept],
192
+ priority_concept: BuildConcept,
193
+ candidates: list[BuildConcept],
162
194
  exhausted: set[str],
163
- conditions: WhereClause | None = None,
164
- ) -> List[List[Concept]]:
195
+ depth: int,
196
+ conditions: BuildWhereClause | None = None,
197
+ ) -> List[List[BuildConcept]]:
165
198
  # if it's single row, joins are irrelevant. Fetch without keys.
166
199
  if priority_concept.granularity == Granularity.SINGLE_ROW:
167
200
  return [[]]
@@ -175,26 +208,28 @@ def generate_candidates_restrictive(
175
208
  and priority_concept.address not in x.pseudonyms
176
209
  ]
177
210
  if conditions and priority_concept.derivation in (
178
- PurposeLineage.ROOT,
179
- PurposeLineage.CONSTANT,
211
+ Derivation.ROOT,
212
+ Derivation.CONSTANT,
180
213
  ):
181
- return [unique(conditions.row_arguments + local_candidates, "address")]
214
+ logger.info(
215
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Injecting additional conditional row arguments as all remaining concepts are roots or constant"
216
+ )
217
+ return [unique(list(conditions.row_arguments) + local_candidates, "address")]
182
218
  return [local_candidates]
183
219
 
184
220
 
185
221
  def generate_node(
186
- concept: Concept,
187
- local_optional: List[Concept],
188
- environment: Environment,
222
+ concept: BuildConcept,
223
+ local_optional: List[BuildConcept],
224
+ environment: BuildEnvironment,
189
225
  g: ReferenceGraph,
190
226
  depth: int,
191
227
  source_concepts: SearchConceptsType,
228
+ history: History,
192
229
  accept_partial: bool = False,
193
- history: History | None = None,
194
- conditions: WhereClause | None = None,
230
+ conditions: BuildWhereClause | None = None,
195
231
  ) -> StrategyNode | None:
196
232
  # first check in case there is a materialized_concept
197
- history = history or History()
198
233
  candidate = history.gen_select_node(
199
234
  concept,
200
235
  local_optional,
@@ -211,50 +246,50 @@ def generate_node(
211
246
  if candidate:
212
247
  return candidate
213
248
 
214
- if concept.derivation == PurposeLineage.WINDOW:
249
+ if concept.derivation == Derivation.WINDOW:
215
250
  logger.info(
216
251
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating window node with optional {[x.address for x in local_optional]}"
217
252
  )
218
253
  return gen_window_node(
219
254
  concept,
220
255
  local_optional,
221
- environment,
222
- g,
223
- depth + 1,
224
- source_concepts,
225
- history,
256
+ history=history,
257
+ environment=environment,
258
+ g=g,
259
+ depth=depth + 1,
260
+ source_concepts=source_concepts,
226
261
  conditions=conditions,
227
262
  )
228
263
 
229
- elif concept.derivation == PurposeLineage.FILTER:
264
+ elif concept.derivation == Derivation.FILTER:
230
265
  logger.info(
231
266
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating filter node with optional {[x.address for x in local_optional]}"
232
267
  )
233
268
  return gen_filter_node(
234
269
  concept,
235
270
  local_optional,
236
- environment,
237
- g,
238
- depth + 1,
239
- source_concepts=source_concepts,
240
271
  history=history,
272
+ environment=environment,
273
+ g=g,
274
+ depth=depth + 1,
275
+ source_concepts=source_concepts,
241
276
  conditions=conditions,
242
277
  )
243
- elif concept.derivation == PurposeLineage.UNNEST:
278
+ elif concept.derivation == Derivation.UNNEST:
244
279
  logger.info(
245
280
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating unnest node with optional {[x.address for x in local_optional]} and condition {conditions}"
246
281
  )
247
282
  return gen_unnest_node(
248
283
  concept,
249
284
  local_optional,
250
- environment,
251
- g,
252
- depth + 1,
253
- source_concepts,
254
- history,
285
+ history=history,
286
+ environment=environment,
287
+ g=g,
288
+ depth=depth + 1,
289
+ source_concepts=source_concepts,
255
290
  conditions=conditions,
256
291
  )
257
- elif concept.derivation == PurposeLineage.UNION:
292
+ elif concept.derivation == Derivation.UNION:
258
293
  logger.info(
259
294
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating union node with optional {[x.address for x in local_optional]} and condition {conditions}"
260
295
  )
@@ -268,7 +303,7 @@ def generate_node(
268
303
  history,
269
304
  conditions=conditions,
270
305
  )
271
- elif concept.derivation == PurposeLineage.AGGREGATE:
306
+ elif concept.derivation == Derivation.AGGREGATE:
272
307
  # don't push constants up before aggregation
273
308
  # if not required
274
309
  # to avoid constants multiplication changing default aggregation results
@@ -284,14 +319,14 @@ def generate_node(
284
319
  return gen_group_node(
285
320
  concept,
286
321
  agg_optional,
287
- environment,
288
- g,
289
- depth + 1,
290
- source_concepts,
291
- history,
322
+ history=history,
323
+ environment=environment,
324
+ g=g,
325
+ depth=depth + 1,
326
+ source_concepts=source_concepts,
292
327
  conditions=conditions,
293
328
  )
294
- elif concept.derivation == PurposeLineage.ROWSET:
329
+ elif concept.derivation == Derivation.ROWSET:
295
330
  logger.info(
296
331
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating rowset node with optional {[x.address for x in local_optional]}"
297
332
  )
@@ -305,7 +340,7 @@ def generate_node(
305
340
  history,
306
341
  conditions=conditions,
307
342
  )
308
- elif concept.derivation == PurposeLineage.MULTISELECT:
343
+ elif concept.derivation == Derivation.MULTISELECT:
309
344
  logger.info(
310
345
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
311
346
  )
@@ -319,16 +354,14 @@ def generate_node(
319
354
  history,
320
355
  conditions=conditions,
321
356
  )
322
- elif concept.derivation == PurposeLineage.CONSTANT:
357
+ elif concept.derivation == Derivation.CONSTANT:
323
358
  constant_targets = [concept] + local_optional
324
359
  logger.info(
325
360
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
326
361
  )
327
- if any([x.derivation != PurposeLineage.CONSTANT for x in local_optional]):
362
+ if any([x.derivation != Derivation.CONSTANT for x in local_optional]):
328
363
  non_root = [
329
- x.address
330
- for x in local_optional
331
- if x.derivation != PurposeLineage.CONSTANT
364
+ x.address for x in local_optional if x.derivation != Derivation.CONSTANT
332
365
  ]
333
366
  logger.info(
334
367
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including filter concepts, there is non root filter requirements {non_root}. Recursing with all of these as mandatory"
@@ -362,10 +395,10 @@ def generate_node(
362
395
  parents=[],
363
396
  depth=depth + 1,
364
397
  )
365
- elif concept.derivation == PurposeLineage.BASIC:
398
+ elif concept.derivation == Derivation.BASIC:
366
399
  # this is special case handling for group bys
367
400
  if (
368
- isinstance(concept.lineage, Function)
401
+ isinstance(concept.lineage, BuildFunction)
369
402
  and concept.lineage.operator == FunctionType.GROUP
370
403
  ):
371
404
  logger.info(
@@ -387,15 +420,15 @@ def generate_node(
387
420
  return gen_basic_node(
388
421
  concept,
389
422
  local_optional,
390
- environment,
391
- g,
392
- depth + 1,
393
- source_concepts,
394
- history,
423
+ history=history,
424
+ environment=environment,
425
+ g=g,
426
+ depth=depth + 1,
427
+ source_concepts=source_concepts,
395
428
  conditions=conditions,
396
429
  )
397
430
 
398
- elif concept.derivation == PurposeLineage.ROOT:
431
+ elif concept.derivation == Derivation.ROOT:
399
432
  logger.info(
400
433
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating select node with optional including condition inputs {[x.address for x in local_optional]}"
401
434
  )
@@ -405,14 +438,14 @@ def generate_node(
405
438
 
406
439
  if any(
407
440
  [
408
- x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
441
+ x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
409
442
  for x in local_optional
410
443
  ]
411
444
  ):
412
445
  non_root = [
413
446
  x.address
414
447
  for x in local_optional
415
- if x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
448
+ if x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
416
449
  ]
417
450
  logger.info(
418
451
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including filter concepts, there is non root filter requirements {non_root}. Recursing with all of these as mandatory"
@@ -466,29 +499,12 @@ def generate_node(
466
499
  )
467
500
 
468
501
  if expanded:
469
- ex_resolve = expanded.resolve()
470
- extra = [
471
- x
472
- for x in ex_resolve.output_concepts
473
- if x.address not in [y.address for y in root_targets]
474
- ]
475
-
502
+ extra = restrict_node_outputs_targets(expanded, root_targets, depth)
476
503
  pseudonyms = [
477
504
  x
478
505
  for x in extra
479
506
  if any(x.address in y.pseudonyms for y in root_targets)
480
507
  ]
481
- logger.info(
482
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} reducing final outputs, was {[c.address for c in ex_resolve.output_concepts]} with extra {[c.address for c in extra]}"
483
- )
484
- base = [
485
- x for x in ex_resolve.output_concepts if x.address not in extra
486
- ]
487
- for x in root_targets:
488
- if x.address not in base:
489
- base.append(x)
490
- expanded.set_output_concepts(base)
491
- # but hide them
492
508
  if pseudonyms:
493
509
  expanded.add_output_concepts(pseudonyms)
494
510
  logger.info(
@@ -505,21 +521,21 @@ def generate_node(
505
521
  )
506
522
  return None
507
523
  else:
508
- raise ValueError(f"Unknown derivation {concept.derivation}")
524
+ raise ValueError(f"Unknown derivation {concept.derivation} on {concept}")
509
525
  return None
510
526
 
511
527
 
512
528
  def validate_concept(
513
- concept: Concept,
529
+ concept: BuildConcept,
514
530
  node: StrategyNode,
515
531
  found_addresses: set[str],
516
532
  non_partial_addresses: set[str],
517
533
  partial_addresses: set[str],
518
534
  virtual_addresses: set[str],
519
- found_map: dict[str, set[Concept]],
535
+ found_map: dict[str, set[BuildConcept]],
520
536
  accept_partial: bool,
521
537
  seen: set[str],
522
- environment: Environment,
538
+ environment: BuildEnvironment,
523
539
  ):
524
540
  found_map[str(node)].add(concept)
525
541
  seen.add(concept.address)
@@ -546,7 +562,6 @@ def validate_concept(
546
562
  return
547
563
  if v.address == concept.address:
548
564
  return
549
-
550
565
  validate_concept(
551
566
  v,
552
567
  node,
@@ -562,14 +577,14 @@ def validate_concept(
562
577
 
563
578
 
564
579
  def validate_stack(
565
- environment: Environment,
580
+ environment: BuildEnvironment,
566
581
  stack: List[StrategyNode],
567
- concepts: List[Concept],
568
- mandatory_with_filter: List[Concept],
569
- conditions: WhereClause | None = None,
582
+ concepts: List[BuildConcept],
583
+ mandatory_with_filter: List[BuildConcept],
584
+ conditions: BuildWhereClause | None = None,
570
585
  accept_partial: bool = False,
571
586
  ) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
572
- found_map: dict[str, set[Concept]] = defaultdict(set)
587
+ found_map: dict[str, set[BuildConcept]] = defaultdict(set)
573
588
  found_addresses: set[str] = set()
574
589
  non_partial_addresses: set[str] = set()
575
590
  partial_addresses: set[str] = set()
@@ -648,10 +663,10 @@ def depth_to_prefix(depth: int) -> str:
648
663
 
649
664
  def append_existence_check(
650
665
  node: StrategyNode,
651
- environment: Environment,
666
+ environment: BuildEnvironment,
652
667
  graph: ReferenceGraph,
653
- where: WhereClause,
654
- history: History | None = None,
668
+ where: BuildWhereClause,
669
+ history: History,
655
670
  ):
656
671
  # we if we have a where clause doing an existence check
657
672
  # treat that as separate subquery
@@ -668,7 +683,10 @@ def append_existence_check(
668
683
  f"{LOGGER_PREFIX} fetching existence clause inputs {[str(c) for c in subselect]}"
669
684
  )
670
685
  parent = source_query_concepts(
671
- [*subselect], environment=environment, g=graph, history=history
686
+ [*subselect],
687
+ history=history,
688
+ environment=environment,
689
+ g=graph,
672
690
  )
673
691
  assert parent, "Could not resolve existence clause"
674
692
  node.add_parents([parent])
@@ -679,15 +697,14 @@ def append_existence_check(
679
697
 
680
698
 
681
699
  def search_concepts(
682
- mandatory_list: List[Concept],
683
- environment: Environment,
700
+ mandatory_list: List[BuildConcept],
701
+ history: History,
702
+ environment: BuildEnvironment,
684
703
  depth: int,
685
704
  g: ReferenceGraph,
686
705
  accept_partial: bool = False,
687
- history: History | None = None,
688
- conditions: WhereClause | None = None,
706
+ conditions: BuildWhereClause | None = None,
689
707
  ) -> StrategyNode | None:
690
- history = history or History()
691
708
  hist = history.get_history(
692
709
  search=mandatory_list, accept_partial=accept_partial, conditions=conditions
693
710
  )
@@ -718,13 +735,13 @@ def search_concepts(
718
735
 
719
736
 
720
737
  def _search_concepts(
721
- mandatory_list: List[Concept],
722
- environment: Environment,
738
+ mandatory_list: List[BuildConcept],
739
+ environment: BuildEnvironment,
723
740
  depth: int,
724
741
  g: ReferenceGraph,
725
742
  history: History,
726
743
  accept_partial: bool = False,
727
- conditions: WhereClause | None = None,
744
+ conditions: BuildWhereClause | None = None,
728
745
  ) -> StrategyNode | None:
729
746
  # these are the concepts we need in the output projection
730
747
  mandatory_list = unique(mandatory_list, "address")
@@ -738,22 +755,30 @@ def _search_concepts(
738
755
  # if we have a filter, we may need to get more values to support that.
739
756
  if conditions:
740
757
  completion_mandatory = unique(
741
- mandatory_list + conditions.row_arguments, "address"
758
+ mandatory_list + list(conditions.row_arguments), "address"
742
759
  )
743
760
  # if anything we need to get is in the filter set and it's a computed value
744
761
  # we need to get _everything_ in this loop
762
+ logger.info(f"{[x.address for x in conditions.row_arguments]}")
745
763
  if any(
746
764
  [
747
- x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
765
+ x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
748
766
  and x.address in conditions.row_arguments
749
767
  for x in mandatory_list
750
768
  ]
751
769
  ):
770
+ logger.info(
771
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} derived condition row input present in mandatory list, forcing condition evaluation at this level. "
772
+ )
752
773
  mandatory_list = completion_mandatory
753
774
  must_evaluate_condition_on_this_level_not_push_down = True
775
+ else:
776
+ logger.info(
777
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Do not need to evaluate conditions yet."
778
+ )
754
779
  else:
755
- completion_mandatory = mandatory_list
756
780
 
781
+ completion_mandatory = mandatory_list
757
782
  attempted: set[str] = set()
758
783
 
759
784
  found: set[str] = set()
@@ -777,7 +802,7 @@ def _search_concepts(
777
802
  [x.address in mandatory_list for x in conditions.row_arguments]
778
803
  ) and not any(
779
804
  [
780
- x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
805
+ x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
781
806
  for x in mandatory_list
782
807
  if x.address not in conditions.row_arguments
783
808
  ]
@@ -797,7 +822,7 @@ def _search_concepts(
797
822
  if (
798
823
  conditions
799
824
  and priority_concept.derivation
800
- not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
825
+ not in (Derivation.ROOT, Derivation.CONSTANT)
801
826
  and priority_concept.address not in conditions.row_arguments
802
827
  ):
803
828
  logger.info(
@@ -806,14 +831,14 @@ def _search_concepts(
806
831
  local_conditions = conditions
807
832
 
808
833
  logger.info(
809
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)} derivation {priority_concept.derivation} with conditions {local_conditions}"
834
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)} derivation {priority_concept.derivation} granularity {priority_concept.granularity} with conditions {local_conditions}"
810
835
  )
811
836
 
812
837
  candidates = [
813
838
  c for c in mandatory_list if c.address != priority_concept.address
814
839
  ]
815
840
  candidate_lists = generate_candidates_restrictive(
816
- priority_concept, candidates, skip, conditions=conditions
841
+ priority_concept, candidates, skip, depth=depth, conditions=conditions
817
842
  )
818
843
  for clist in candidate_lists:
819
844
  logger.info(
@@ -842,14 +867,14 @@ def _search_concepts(
842
867
  # these concepts should not be attempted to be sourced again
843
868
  # as fetching them requires operating on a subset of concepts
844
869
  if priority_concept.derivation in [
845
- PurposeLineage.AGGREGATE,
846
- PurposeLineage.FILTER,
847
- PurposeLineage.WINDOW,
848
- PurposeLineage.UNNEST,
849
- PurposeLineage.ROWSET,
850
- PurposeLineage.BASIC,
851
- PurposeLineage.MULTISELECT,
852
- PurposeLineage.UNION,
870
+ Derivation.AGGREGATE,
871
+ Derivation.FILTER,
872
+ Derivation.WINDOW,
873
+ Derivation.UNNEST,
874
+ Derivation.ROWSET,
875
+ Derivation.BASIC,
876
+ Derivation.MULTISELECT,
877
+ Derivation.UNION,
853
878
  ]:
854
879
  skip.add(priority_concept.address)
855
880
  break
@@ -880,7 +905,7 @@ def _search_concepts(
880
905
  break
881
906
  # if we have attempted on root node, we've tried them all.
882
907
  # inject in another search with filter concepts
883
- if priority_concept.derivation == PurposeLineage.ROOT:
908
+ if priority_concept.derivation == Derivation.ROOT:
884
909
  logger.info(
885
910
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Breaking as attempted root with no results"
886
911
  )
@@ -910,10 +935,14 @@ def _search_concepts(
910
935
  )
911
936
  if len(stack) == 1:
912
937
  output: StrategyNode = stack[0]
938
+ # _ = restrict_node_outputs_targets(output, mandatory_list, depth)
913
939
  logger.info(
914
940
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
915
941
  )
916
942
  else:
943
+ logger.info(
944
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} wrapping multiple parent nodes {[type(x) for x in stack]} in merge node"
945
+ )
917
946
  output = MergeNode(
918
947
  input_concepts=non_virtual,
919
948
  output_concepts=non_virtual,
@@ -930,6 +959,8 @@ def _search_concepts(
930
959
  append_existence_check(
931
960
  output, environment, g, where=conditions, history=history
932
961
  )
962
+ elif conditions:
963
+ output.preexisting_conditions = conditions.conditional
933
964
  logger.info(
934
965
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Graph is connected, returning {type(output)} node partial {[c.address for c in output.partial_concepts]}"
935
966
  )
@@ -963,18 +994,17 @@ def _search_concepts(
963
994
 
964
995
 
965
996
  def source_query_concepts(
966
- output_concepts: List[Concept],
967
- environment: Environment,
997
+ output_concepts: List[BuildConcept],
998
+ history: History,
999
+ environment: BuildEnvironment,
968
1000
  g: Optional[ReferenceGraph] = None,
969
- conditions: Optional[WhereClause] = None,
970
- history: Optional[History] = None,
1001
+ conditions: Optional[BuildWhereClause] = None,
971
1002
  ):
972
1003
  if not output_concepts:
973
1004
  raise ValueError(f"No output concepts provided {output_concepts}")
974
1005
  if not g:
975
1006
  g = generate_graph(environment)
976
1007
 
977
- history = history or History()
978
1008
  root = search_concepts(
979
1009
  mandatory_list=output_concepts,
980
1010
  environment=environment,
@@ -992,6 +1022,9 @@ def source_query_concepts(
992
1022
  f"Could not resolve conections between {error_strings} from environment graph."
993
1023
  )
994
1024
  final = [x for x in root.output_concepts if x.address not in root.hidden_concepts]
1025
+ logger.info(
1026
+ f"{depth_to_prefix(0)}{LOGGER_PREFIX} final concepts are {[x.address for x in final]}"
1027
+ )
995
1028
  if GroupNode.check_if_required(
996
1029
  downstream_concepts=final,
997
1030
  parents=[root.resolve()],