pytrilogy 0.0.2.17__py3-none-any.whl → 0.0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (40) hide show
  1. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/METADATA +12 -8
  2. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/RECORD +40 -39
  3. trilogy/__init__.py +1 -1
  4. trilogy/constants.py +1 -1
  5. trilogy/core/enums.py +1 -0
  6. trilogy/core/functions.py +11 -0
  7. trilogy/core/models.py +89 -47
  8. trilogy/core/optimization.py +15 -9
  9. trilogy/core/processing/concept_strategies_v3.py +372 -145
  10. trilogy/core/processing/node_generators/basic_node.py +27 -55
  11. trilogy/core/processing/node_generators/common.py +6 -7
  12. trilogy/core/processing/node_generators/filter_node.py +28 -31
  13. trilogy/core/processing/node_generators/group_node.py +14 -2
  14. trilogy/core/processing/node_generators/group_to_node.py +3 -1
  15. trilogy/core/processing/node_generators/multiselect_node.py +3 -0
  16. trilogy/core/processing/node_generators/node_merge_node.py +14 -9
  17. trilogy/core/processing/node_generators/rowset_node.py +12 -12
  18. trilogy/core/processing/node_generators/select_merge_node.py +302 -0
  19. trilogy/core/processing/node_generators/select_node.py +7 -511
  20. trilogy/core/processing/node_generators/unnest_node.py +4 -3
  21. trilogy/core/processing/node_generators/window_node.py +12 -37
  22. trilogy/core/processing/nodes/__init__.py +0 -2
  23. trilogy/core/processing/nodes/base_node.py +69 -20
  24. trilogy/core/processing/nodes/filter_node.py +3 -0
  25. trilogy/core/processing/nodes/group_node.py +18 -17
  26. trilogy/core/processing/nodes/merge_node.py +4 -10
  27. trilogy/core/processing/nodes/select_node_v2.py +28 -14
  28. trilogy/core/processing/nodes/window_node.py +1 -2
  29. trilogy/core/processing/utility.py +51 -3
  30. trilogy/core/query_processor.py +17 -73
  31. trilogy/dialect/base.py +7 -3
  32. trilogy/dialect/duckdb.py +4 -1
  33. trilogy/dialect/sql_server.py +3 -3
  34. trilogy/hooks/query_debugger.py +5 -3
  35. trilogy/parsing/parse_engine.py +66 -38
  36. trilogy/parsing/trilogy.lark +2 -1
  37. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/LICENSE.md +0 -0
  38. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/WHEEL +0 -0
  39. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/entry_points.txt +0 -0
  40. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,17 @@
1
1
  from collections import defaultdict
2
- from typing import List, Optional, Callable
2
+ from typing import List, Optional, Protocol, Union
3
3
 
4
4
  from trilogy.constants import logger
5
5
  from trilogy.core.enums import PurposeLineage, Granularity, FunctionType
6
6
  from trilogy.core.env_processor import generate_graph
7
7
  from trilogy.core.graph_models import ReferenceGraph
8
- from trilogy.core.models import Concept, Environment, Function, Grain, WhereClause
8
+ from trilogy.core.models import (
9
+ Concept,
10
+ Environment,
11
+ Function,
12
+ WhereClause,
13
+ RowsetItem,
14
+ )
9
15
  from trilogy.core.processing.utility import (
10
16
  get_disconnected_components,
11
17
  )
@@ -36,11 +42,25 @@ class ValidationResult(Enum):
36
42
  COMPLETE = 1
37
43
  DISCONNECTED = 2
38
44
  INCOMPLETE = 3
45
+ INCOMPLETE_CONDITION = 4
39
46
 
40
47
 
41
48
  LOGGER_PREFIX = "[CONCEPT DETAIL]"
42
49
 
43
50
 
51
+ class SearchConceptsType(Protocol):
52
+ def __call__(
53
+ self,
54
+ mandatory_list: List[Concept],
55
+ environment: Environment,
56
+ depth: int,
57
+ g: ReferenceGraph,
58
+ accept_partial: bool = False,
59
+ history: Optional[History] = None,
60
+ conditions: Optional[WhereClause] = None,
61
+ ) -> Union[StrategyNode, None]: ...
62
+
63
+
44
64
  def get_upstream_concepts(base: Concept, nested: bool = False) -> set[str]:
45
65
  upstream = set()
46
66
  if nested:
@@ -48,6 +68,11 @@ def get_upstream_concepts(base: Concept, nested: bool = False) -> set[str]:
48
68
  if not base.lineage:
49
69
  return upstream
50
70
  for x in base.lineage.concept_arguments:
71
+ # if it's derived from any value in a rowset, ALL rowset items are upstream
72
+ if x.derivation == PurposeLineage.ROWSET:
73
+ assert isinstance(x.lineage, RowsetItem)
74
+ for y in x.lineage.rowset.derived_concepts:
75
+ upstream = upstream.union(get_upstream_concepts(y, nested=True))
51
76
  upstream = upstream.union(get_upstream_concepts(x, nested=True))
52
77
  return upstream
53
78
 
@@ -64,10 +89,10 @@ def get_priority_concept(
64
89
  for c in all_concepts
65
90
  if c.address not in attempted_addresses and c.address not in found_concepts
66
91
  ]
67
- # sometimes we need to scan intermediate concepts to get merge keys, so fall back
68
- # to exhaustive search
69
- pass_two = [c for c in all_concepts if c.address not in attempted_addresses]
70
- for remaining_concept in (pass_one, pass_two):
92
+ # sometimes we need to scan intermediate concepts to get merge keys or filter keys,
93
+ # so do an exhaustive search
94
+ # pass_two = [c for c in all_concepts+filter_only if c.address not in attempted_addresses]
95
+ for remaining_concept in (pass_one,):
71
96
  priority = (
72
97
  # find anything that needs no joins first, so we can exit early
73
98
  [
@@ -85,60 +110,19 @@ def get_priority_concept(
85
110
  # we should be home-free here
86
111
  +
87
112
  # then aggregates to remove them from scope, as they cannot get partials
88
- [
89
- c
90
- for c in remaining_concept
91
- if c.derivation == PurposeLineage.AGGREGATE
92
- and not c.granularity == Granularity.SINGLE_ROW
93
- ]
113
+ [c for c in remaining_concept if c.derivation == PurposeLineage.AGGREGATE]
94
114
  # then windows to remove them from scope, as they cannot get partials
95
- + [
96
- c
97
- for c in remaining_concept
98
- if c.derivation == PurposeLineage.WINDOW
99
- and not c.granularity == Granularity.SINGLE_ROW
100
- ]
115
+ + [c for c in remaining_concept if c.derivation == PurposeLineage.WINDOW]
101
116
  # then filters to remove them from scope, also cannot get partials
102
- + [
103
- c
104
- for c in remaining_concept
105
- if c.derivation == PurposeLineage.FILTER
106
- and not c.granularity == Granularity.SINGLE_ROW
107
- ]
117
+ + [c for c in remaining_concept if c.derivation == PurposeLineage.FILTER]
108
118
  # unnests are weird?
109
- + [
110
- c
111
- for c in remaining_concept
112
- if c.derivation == PurposeLineage.UNNEST
113
- and not c.granularity == Granularity.SINGLE_ROW
114
- ]
115
- + [
116
- c
117
- for c in remaining_concept
118
- if c.derivation == PurposeLineage.BASIC
119
- and not c.granularity == Granularity.SINGLE_ROW
120
- ]
119
+ + [c for c in remaining_concept if c.derivation == PurposeLineage.UNNEST]
120
+ + [c for c in remaining_concept if c.derivation == PurposeLineage.BASIC]
121
121
  # finally our plain selects
122
122
  + [
123
- c
124
- for c in remaining_concept
125
- if c.derivation == PurposeLineage.ROOT
126
- and not c.granularity == Granularity.SINGLE_ROW
127
- ]
128
- # and any non-single row constants
129
- + [
130
- c
131
- for c in remaining_concept
132
- if c.derivation == PurposeLineage.CONSTANT
133
- and not c.granularity == Granularity.SINGLE_ROW
134
- ]
135
- # catch all
136
- + [
137
- c
138
- for c in remaining_concept
139
- if c.derivation != PurposeLineage.CONSTANT
140
- and c.granularity == Granularity.SINGLE_ROW
141
- ]
123
+ c for c in remaining_concept if c.derivation == PurposeLineage.ROOT
124
+ ] # and any non-single row constants
125
+ + [c for c in remaining_concept if c.derivation == PurposeLineage.CONSTANT]
142
126
  )
143
127
 
144
128
  priority += [
@@ -172,6 +156,7 @@ def generate_candidates_restrictive(
172
156
  priority_concept: Concept,
173
157
  candidates: list[Concept],
174
158
  exhausted: set[str],
159
+ conditions: WhereClause | None = None,
175
160
  ) -> List[List[Concept]]:
176
161
  # if it's single row, joins are irrelevant. Fetch without keys.
177
162
  if priority_concept.granularity == Granularity.SINGLE_ROW:
@@ -185,19 +170,12 @@ def generate_candidates_restrictive(
185
170
  and x.address not in priority_concept.pseudonyms
186
171
  and priority_concept.address not in x.pseudonyms
187
172
  ]
188
- combos: list[list[Concept]] = []
189
- grain_check = Grain(components=[*local_candidates]).components_copy
190
- # for simple operations these, fetch as much as possible.
191
- if priority_concept.derivation in (PurposeLineage.BASIC, PurposeLineage.ROOT):
192
- if set([x.address for x in grain_check]) != set(
193
- [x.address for x in local_candidates]
194
- ):
195
- combos.append(local_candidates)
196
- combos.append(grain_check)
197
- # combos.append(local_candidates)
198
- # append the empty set for sourcing concept by itself last
199
- combos.append([])
200
- return combos
173
+ if conditions and priority_concept.derivation in (
174
+ PurposeLineage.ROOT,
175
+ PurposeLineage.CONSTANT,
176
+ ):
177
+ return [unique(conditions.row_arguments + local_candidates, "address")]
178
+ return [local_candidates]
201
179
 
202
180
 
203
181
  def generate_node(
@@ -206,7 +184,7 @@ def generate_node(
206
184
  environment: Environment,
207
185
  g: ReferenceGraph,
208
186
  depth: int,
209
- source_concepts: Callable,
187
+ source_concepts: SearchConceptsType,
210
188
  accept_partial: bool = False,
211
189
  history: History | None = None,
212
190
  conditions: WhereClause | None = None,
@@ -260,7 +238,7 @@ def generate_node(
260
238
  )
261
239
  elif concept.derivation == PurposeLineage.UNNEST:
262
240
  logger.info(
263
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating unnest node with optional {[x.address for x in local_optional]}"
241
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating unnest node with optional {[x.address for x in local_optional]} and condition {conditions}"
264
242
  )
265
243
  return gen_unnest_node(
266
244
  concept,
@@ -300,22 +278,68 @@ def generate_node(
300
278
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating rowset node with optional {[x.address for x in local_optional]}"
301
279
  )
302
280
  return gen_rowset_node(
303
- concept, local_optional, environment, g, depth + 1, source_concepts, history
281
+ concept,
282
+ local_optional,
283
+ environment,
284
+ g,
285
+ depth + 1,
286
+ source_concepts,
287
+ history,
288
+ conditions=conditions,
304
289
  )
305
290
  elif concept.derivation == PurposeLineage.MULTISELECT:
306
291
  logger.info(
307
292
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
308
293
  )
309
294
  return gen_multiselect_node(
310
- concept, local_optional, environment, g, depth + 1, source_concepts, history
295
+ concept,
296
+ local_optional,
297
+ environment,
298
+ g,
299
+ depth + 1,
300
+ source_concepts,
301
+ history,
302
+ conditions=conditions,
311
303
  )
312
304
  elif concept.derivation == PurposeLineage.CONSTANT:
305
+ constant_targets = [concept] + local_optional
313
306
  logger.info(
314
307
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
315
308
  )
309
+ if any([x.derivation != PurposeLineage.CONSTANT for x in local_optional]):
310
+ non_root = [
311
+ x.address
312
+ for x in local_optional
313
+ if x.derivation != PurposeLineage.CONSTANT
314
+ ]
315
+ logger.info(
316
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including filter concepts, there is non root filter requirements {non_root}. Recursing with all of these as mandatory"
317
+ )
318
+
319
+ if not history.check_started(
320
+ constant_targets, accept_partial=accept_partial, conditions=conditions
321
+ ):
322
+ history.log_start(
323
+ constant_targets,
324
+ accept_partial=accept_partial,
325
+ conditions=conditions,
326
+ )
327
+ return source_concepts(
328
+ mandatory_list=constant_targets,
329
+ environment=environment,
330
+ g=g,
331
+ depth=depth + 1,
332
+ accept_partial=accept_partial,
333
+ history=history,
334
+ # we DO NOT pass up conditions at this point, as we are now expanding to include conditions in search
335
+ # which we do whenever we hit a root node
336
+ # conditions=conditions,
337
+ )
338
+ else:
339
+ return None
316
340
  return ConstantNode(
317
341
  input_concepts=[],
318
- output_concepts=[concept],
342
+ output_concepts=constant_targets,
319
343
  environment=environment,
320
344
  g=g,
321
345
  parents=[],
@@ -338,19 +362,64 @@ def generate_node(
338
362
  depth + 1,
339
363
  source_concepts,
340
364
  history,
365
+ conditions=conditions,
341
366
  )
342
367
  logger.info(
343
368
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating basic node with optional {[x.address for x in local_optional]}"
344
369
  )
345
370
  return gen_basic_node(
346
- concept, local_optional, environment, g, depth + 1, source_concepts, history
371
+ concept,
372
+ local_optional,
373
+ environment,
374
+ g,
375
+ depth + 1,
376
+ source_concepts,
377
+ history,
378
+ conditions=conditions,
347
379
  )
348
380
 
349
381
  elif concept.derivation == PurposeLineage.ROOT:
350
382
  logger.info(
351
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating select node with optional {[x.address for x in local_optional]}"
383
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating select node with optional including condition inputs {[x.address for x in local_optional]}"
352
384
  )
353
- return history.gen_select_node(
385
+ # we've injected in any conditional concepts that may exist
386
+ # so if we don't still have just roots, we need to go up
387
+ root_targets = [concept] + local_optional
388
+
389
+ if any(
390
+ [
391
+ x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
392
+ for x in local_optional
393
+ ]
394
+ ):
395
+ non_root = [
396
+ x.address
397
+ for x in local_optional
398
+ if x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
399
+ ]
400
+ logger.info(
401
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including filter concepts, there is non root filter requirements {non_root}. Recursing with all of these as mandatory"
402
+ )
403
+
404
+ if not history.check_started(
405
+ root_targets, accept_partial=accept_partial, conditions=conditions
406
+ ):
407
+ history.log_start(
408
+ root_targets, accept_partial=accept_partial, conditions=conditions
409
+ )
410
+ return source_concepts(
411
+ mandatory_list=root_targets,
412
+ environment=environment,
413
+ g=g,
414
+ depth=depth + 1,
415
+ accept_partial=accept_partial,
416
+ history=history,
417
+ # we DO NOT pass up conditions at this point, as we are now expanding to include conditions in search
418
+ # which we do whenever we hit a root node
419
+ # conditions=conditions,
420
+ )
421
+
422
+ check = history.gen_select_node(
354
423
  concept,
355
424
  local_optional,
356
425
  environment,
@@ -358,12 +427,48 @@ def generate_node(
358
427
  depth + 1,
359
428
  fail_if_not_found=False,
360
429
  accept_partial=accept_partial,
361
- accept_partial_optional=True,
430
+ accept_partial_optional=False,
362
431
  source_concepts=source_concepts,
363
432
  conditions=conditions,
364
433
  )
434
+ if not check:
435
+
436
+ logger.info(
437
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve root concepts, checking for expanded concepts"
438
+ )
439
+ for accept_partial in [False, True]:
440
+ expanded = gen_merge_node(
441
+ all_concepts=root_targets,
442
+ environment=environment,
443
+ g=g,
444
+ depth=depth,
445
+ source_concepts=source_concepts,
446
+ history=history,
447
+ search_conditions=conditions,
448
+ accept_partial=accept_partial,
449
+ )
450
+
451
+ if expanded:
452
+ ex_resolve = expanded.resolve()
453
+ extra = [
454
+ x
455
+ for x in ex_resolve.output_concepts
456
+ if x.address not in [y.address for y in root_targets]
457
+ and x not in ex_resolve.grain.components
458
+ ]
459
+ expanded.set_output_concepts(root_targets)
460
+
461
+ logger.info(
462
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in root_targets]} via concept addition; removing extra {[c.address for c in extra]}"
463
+ )
464
+ return expanded
465
+ logger.info(
466
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} could not find additional concept to inject"
467
+ )
468
+ return None
365
469
  else:
366
470
  raise ValueError(f"Unknown derivation {concept.derivation}")
471
+ return None
367
472
 
368
473
 
369
474
  def validate_concept(
@@ -417,6 +522,8 @@ def validate_concept(
417
522
  def validate_stack(
418
523
  stack: List[StrategyNode],
419
524
  concepts: List[Concept],
525
+ mandatory_with_filter: List[Concept],
526
+ conditions: WhereClause | None = None,
420
527
  accept_partial: bool = False,
421
528
  ) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
422
529
  found_map: dict[str, set[Concept]] = defaultdict(set)
@@ -425,6 +532,7 @@ def validate_stack(
425
532
  partial_addresses: set[str] = set()
426
533
  virtual_addresses: set[str] = set()
427
534
  seen: set[str] = set()
535
+
428
536
  for node in stack:
429
537
  resolved = node.resolve()
430
538
 
@@ -445,13 +553,26 @@ def validate_stack(
445
553
  continue
446
554
  found_addresses.add(concept.address)
447
555
  virtual_addresses.add(concept.address)
448
-
556
+ if not conditions:
557
+ conditions_met = True
558
+ else:
559
+ conditions_met = all(
560
+ [node.preexisting_conditions == conditions.conditional for node in stack]
561
+ ) or all([c.address in found_addresses for c in mandatory_with_filter])
449
562
  # zip in those we know we found
450
- if not all([c.address in found_addresses for c in concepts]):
563
+ if not all([c.address in found_addresses for c in concepts]) or not conditions_met:
564
+ if not all([c.address in found_addresses for c in concepts]):
565
+ return (
566
+ ValidationResult.INCOMPLETE,
567
+ found_addresses,
568
+ {c.address for c in concepts if c.address not in found_addresses},
569
+ partial_addresses,
570
+ virtual_addresses,
571
+ )
451
572
  return (
452
- ValidationResult.INCOMPLETE,
573
+ ValidationResult.INCOMPLETE_CONDITION,
453
574
  found_addresses,
454
- {c.address for c in concepts if c.address not in found_addresses},
575
+ {c.address for c in concepts if c.address not in mandatory_with_filter},
455
576
  partial_addresses,
456
577
  virtual_addresses,
457
578
  )
@@ -479,6 +600,38 @@ def depth_to_prefix(depth: int) -> str:
479
600
  return "\t" * depth
480
601
 
481
602
 
603
+ def append_existence_check(
604
+ node: StrategyNode,
605
+ environment: Environment,
606
+ graph: ReferenceGraph,
607
+ where: WhereClause,
608
+ history: History | None = None,
609
+ ):
610
+ # we if we have a where clause doing an existence check
611
+ # treat that as separate subquery
612
+ if where.existence_arguments:
613
+ for subselect in where.existence_arguments:
614
+ if not subselect:
615
+ continue
616
+ if all([x.address in node.input_concepts for x in subselect]):
617
+ logger.info(
618
+ f"{LOGGER_PREFIX} existance clause inputs already found {[str(c) for c in subselect]}"
619
+ )
620
+ continue
621
+ logger.info(
622
+ f"{LOGGER_PREFIX} fetching existence clause inputs {[str(c) for c in subselect]}"
623
+ )
624
+ parent = source_query_concepts(
625
+ [*subselect], environment=environment, g=graph, history=history
626
+ )
627
+ assert parent, "Could not resolve existence clause"
628
+ node.add_parents([parent])
629
+ logger.info(
630
+ f"{LOGGER_PREFIX} fetching existence clause inputs {[str(c) for c in subselect]}"
631
+ )
632
+ node.add_existence_concepts([*subselect])
633
+
634
+
482
635
  def search_concepts(
483
636
  mandatory_list: List[Concept],
484
637
  environment: Environment,
@@ -529,9 +682,32 @@ def _search_concepts(
529
682
  conditions: WhereClause | None = None,
530
683
  ) -> StrategyNode | None:
531
684
 
685
+ # these are the concepts we need in the output projection
532
686
  mandatory_list = unique(mandatory_list, "address")
533
687
 
534
688
  all_mandatory = set(c.address for c in mandatory_list)
689
+
690
+ must_evaluate_condition_on_this_level_not_push_down = False
691
+
692
+ # if we have a filter, we may need to get more values to support that.
693
+ if conditions:
694
+ completion_mandatory = unique(
695
+ mandatory_list + conditions.row_arguments, "address"
696
+ )
697
+ # if anything we need to get is in the filter set and it's a computed value
698
+ # we need to get _everything_ in this loop
699
+ if any(
700
+ [
701
+ x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
702
+ and x.address in conditions.row_arguments
703
+ for x in mandatory_list
704
+ ]
705
+ ):
706
+ mandatory_list = completion_mandatory
707
+ must_evaluate_condition_on_this_level_not_push_down = True
708
+ else:
709
+ completion_mandatory = mandatory_list
710
+
535
711
  attempted: set[str] = set()
536
712
 
537
713
  found: set[str] = set()
@@ -542,22 +718,60 @@ def _search_concepts(
542
718
 
543
719
  while attempted != all_mandatory:
544
720
  priority_concept = get_priority_concept(
545
- mandatory_list, attempted, found_concepts=found, depth=depth
721
+ mandatory_list,
722
+ attempted,
723
+ found_concepts=found,
724
+ depth=depth,
546
725
  )
726
+ # filter evaluation
727
+ # always pass the filter up when we aren't looking at all filter inputs
728
+ # or there are any non-filter complex types
729
+ if conditions:
730
+ should_evaluate_filter_on_this_level_not_push_down = all(
731
+ [x.address in mandatory_list for x in conditions.row_arguments]
732
+ ) and not any(
733
+ [
734
+ x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
735
+ for x in mandatory_list
736
+ if x.address not in conditions.row_arguments
737
+ ]
738
+ )
739
+ else:
740
+ should_evaluate_filter_on_this_level_not_push_down = True
741
+ local_conditions = (
742
+ conditions
743
+ if conditions
744
+ and not must_evaluate_condition_on_this_level_not_push_down
745
+ and not should_evaluate_filter_on_this_level_not_push_down
746
+ else None
747
+ )
748
+ # but if it's not basic, and it's not condition;
749
+ # we do need to push it down (and have another layer of filter evaluation)
750
+ # to ensure filtering happens before something like a SUM
751
+ if (
752
+ conditions
753
+ and priority_concept.derivation
754
+ not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
755
+ and priority_concept.address not in conditions.row_arguments
756
+ ):
757
+ logger.info(
758
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Force including conditions to push filtering above complex condition that is not condition member or parent"
759
+ )
760
+ local_conditions = conditions
547
761
 
548
762
  logger.info(
549
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)}"
763
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)} derivation {priority_concept.derivation} with conditions {local_conditions}"
550
764
  )
551
765
 
552
766
  candidates = [
553
767
  c for c in mandatory_list if c.address != priority_concept.address
554
768
  ]
555
769
  candidate_lists = generate_candidates_restrictive(
556
- priority_concept, candidates, skip
770
+ priority_concept, candidates, skip, conditions=conditions
557
771
  )
558
772
  for clist in candidate_lists:
559
773
  logger.info(
560
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Beginning sourcing loop for {str(priority_concept)}, accept_partial {accept_partial}, optional {[str(v) for v in clist]}, exhausted {[str(c) for c in skip]}"
774
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Beginning sourcing loop for {priority_concept.address}, accept_partial {accept_partial}, optional {[v.address for v in clist]}, exhausted {[c for c in skip]}"
561
775
  )
562
776
  node = generate_node(
563
777
  priority_concept,
@@ -568,11 +782,17 @@ def _search_concepts(
568
782
  source_concepts=search_concepts,
569
783
  accept_partial=accept_partial,
570
784
  history=history,
571
- conditions=conditions,
785
+ conditions=local_conditions,
572
786
  )
573
787
  if node:
574
788
  stack.append(node)
575
- node.resolve()
789
+ try:
790
+ node.resolve()
791
+ except Exception as e:
792
+ logger.error(
793
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve node {node} {e}"
794
+ )
795
+ raise e
576
796
  # these concepts should not be attempted to be sourced again
577
797
  # as fetching them requires operating on a subset of concepts
578
798
  if priority_concept.derivation in [
@@ -588,13 +808,21 @@ def _search_concepts(
588
808
  break
589
809
  attempted.add(priority_concept.address)
590
810
  complete, found, missing, partial, virtual = validate_stack(
591
- stack, mandatory_list, accept_partial
811
+ stack,
812
+ mandatory_list,
813
+ completion_mandatory,
814
+ conditions=conditions,
815
+ accept_partial=accept_partial,
592
816
  )
593
-
817
+ mandatory_completion = [c.address for c in completion_mandatory]
594
818
  logger.info(
595
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} flag for accepting partial addresses is"
596
- f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial} virtual {virtual}), attempted {attempted}"
819
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} {priority_concept.derivation} condition {conditions} flag for accepting partial addresses is"
820
+ f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial} virtual {virtual}), attempted {attempted}, mandatory w/ filter {mandatory_completion}"
597
821
  )
822
+ if complete == ValidationResult.INCOMPLETE_CONDITION:
823
+ raise SyntaxError(
824
+ {str(node): node.preexisting_conditions for node in stack}
825
+ )
598
826
  # early exit if we have a complete stack with one node
599
827
  # we can only early exit if we have a complete stack
600
828
  # and we are not looking for more non-partial sources
@@ -602,73 +830,65 @@ def _search_concepts(
602
830
  not accept_partial or (accept_partial and not partial)
603
831
  ):
604
832
  break
833
+ # if we have attempted on root node, we've tried them all.
834
+ # inject in another search with filter concepts
835
+ if priority_concept.derivation == PurposeLineage.ROOT:
836
+ logger.info(
837
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Breaking as attempted root with no results"
838
+ )
839
+ break
605
840
 
606
841
  logger.info(
607
842
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}, virtual {virtual}"
608
843
  )
609
844
  if complete == ValidationResult.COMPLETE:
610
- non_virtual = [c for c in mandatory_list if c.address not in virtual]
845
+ condition_required = True
846
+ non_virtual = [c for c in completion_mandatory if c.address not in virtual]
847
+ if not conditions:
848
+ condition_required = False
849
+ non_virtual = [c for c in mandatory_list if c.address not in virtual]
850
+
851
+ elif all([x.preexisting_conditions == conditions.conditional for x in stack]):
852
+ condition_required = False
853
+ non_virtual = [c for c in mandatory_list if c.address not in virtual]
854
+
855
+ if conditions and not condition_required:
856
+ parent_map = {
857
+ str(x): x.preexisting_conditions == conditions.conditional
858
+ for x in stack
859
+ }
860
+ logger.info(
861
+ f"Condition {conditions} not required, parents included filtering! {parent_map }"
862
+ )
611
863
  if len(stack) == 1:
612
- output = stack[0]
864
+ output: StrategyNode = stack[0]
613
865
  logger.info(
614
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)} with output {[x.address for x in output.output_concepts]} and {output.resolve().source_map}"
866
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
867
+ )
868
+ else:
869
+ output = MergeNode(
870
+ input_concepts=non_virtual,
871
+ output_concepts=non_virtual,
872
+ environment=environment,
873
+ g=g,
874
+ parents=stack,
875
+ depth=depth,
615
876
  )
616
- return output
617
-
618
- output = MergeNode(
619
- input_concepts=non_virtual,
620
- output_concepts=non_virtual,
621
- environment=environment,
622
- g=g,
623
- parents=stack,
624
- depth=depth,
625
- )
626
-
627
877
  # ensure we can resolve our final merge
628
878
  output.resolve()
879
+ if condition_required and conditions:
880
+ output.add_condition(conditions.conditional)
881
+ if conditions.existence_arguments:
882
+ append_existence_check(
883
+ output, environment, g, where=conditions, history=history
884
+ )
629
885
  logger.info(
630
886
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Graph is connected, returning merge node, partial {[c.address for c in output.partial_concepts]}"
631
887
  )
632
888
  return output
633
889
 
634
- # check that we're not already in a discovery loop
635
- if not history.check_started(
636
- mandatory_list, accept_partial=accept_partial, conditions=conditions
637
- ):
638
- logger.info(
639
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Stack is not connected graph, flag for accepting partial addresses is {accept_partial}, checking for expanded concepts"
640
- )
641
- # gate against further recursion into this
642
- history.log_start(
643
- mandatory_list, accept_partial=accept_partial, conditions=conditions
644
- )
645
- expanded = gen_merge_node(
646
- all_concepts=mandatory_list,
647
- environment=environment,
648
- g=g,
649
- depth=depth,
650
- source_concepts=search_concepts,
651
- history=history,
652
- search_conditions=conditions,
653
- )
654
-
655
- if expanded:
656
- # we don't need to return the entire list; just the ones we needed pre-expansion
657
- ex_resolve = expanded.resolve()
658
- extra = [
659
- x
660
- for x in ex_resolve.output_concepts
661
- if x.address not in [y.address for y in mandatory_list]
662
- and x not in ex_resolve.grain.components
663
- ]
664
- expanded.set_output_concepts(mandatory_list)
665
-
666
- logger.info(
667
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in mandatory_list]} via concept addition; removing extra {[c.address for c in extra]}"
668
- )
669
- return expanded
670
890
  # if we can't find it after expanding to a merge, then
671
- # attempt to accept partials in join paths
891
+ # accept partials in join paths
672
892
 
673
893
  if not accept_partial:
674
894
  logger.info(
@@ -723,11 +943,18 @@ def source_query_concepts(
723
943
  raise ValueError(
724
944
  f"Could not resolve conections between {error_strings} from environment graph."
725
945
  )
726
- return GroupNode(
727
- output_concepts=output_concepts,
728
- input_concepts=output_concepts,
946
+ candidate = GroupNode(
947
+ output_concepts=[
948
+ x for x in root.output_concepts if x.address not in root.hidden_concepts
949
+ ],
950
+ input_concepts=[
951
+ x for x in root.output_concepts if x.address not in root.hidden_concepts
952
+ ],
729
953
  environment=environment,
730
954
  g=g,
731
955
  parents=[root],
732
956
  partial_concepts=root.partial_concepts,
733
957
  )
958
+ if not candidate.resolve().group_required:
959
+ return root
960
+ return candidate