pytrilogy 0.0.3.54__py3-none-any.whl → 0.0.3.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (37) hide show
  1. {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/RECORD +37 -30
  3. trilogy/__init__.py +1 -1
  4. trilogy/constants.py +2 -0
  5. trilogy/core/enums.py +6 -0
  6. trilogy/core/functions.py +3 -0
  7. trilogy/core/models/author.py +12 -4
  8. trilogy/core/models/execute.py +207 -2
  9. trilogy/core/optimization.py +3 -3
  10. trilogy/core/optimizations/inline_datasource.py +5 -7
  11. trilogy/core/processing/concept_strategies_v3.py +323 -878
  12. trilogy/core/processing/discovery_loop.py +0 -0
  13. trilogy/core/processing/discovery_node_factory.py +469 -0
  14. trilogy/core/processing/discovery_utility.py +123 -0
  15. trilogy/core/processing/discovery_validation.py +155 -0
  16. trilogy/core/processing/node_generators/__init__.py +2 -0
  17. trilogy/core/processing/node_generators/recursive_node.py +87 -0
  18. trilogy/core/processing/node_generators/select_node.py +6 -8
  19. trilogy/core/processing/nodes/__init__.py +4 -4
  20. trilogy/core/processing/nodes/recursive_node.py +46 -0
  21. trilogy/core/query_processor.py +7 -1
  22. trilogy/dialect/base.py +11 -2
  23. trilogy/dialect/bigquery.py +5 -6
  24. trilogy/dialect/common.py +19 -3
  25. trilogy/dialect/duckdb.py +1 -1
  26. trilogy/dialect/snowflake.py +8 -8
  27. trilogy/parsing/common.py +4 -3
  28. trilogy/parsing/parse_engine.py +12 -0
  29. trilogy/parsing/trilogy.lark +3 -1
  30. trilogy/std/date.preql +3 -1
  31. trilogy/std/geography.preql +4 -0
  32. trilogy/std/money.preql +65 -4
  33. trilogy/std/net.preql +8 -0
  34. {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/WHEEL +0 -0
  35. {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/entry_points.txt +0 -0
  36. {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/licenses/LICENSE.md +0 -0
  37. {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,8 @@
1
- from collections import defaultdict
2
- from enum import Enum
3
- from typing import List, Optional, Protocol, Union
1
+ from dataclasses import dataclass
2
+ from typing import List, Optional
4
3
 
5
4
  from trilogy.constants import logger
6
- from trilogy.core.enums import Derivation, FunctionType, Granularity
5
+ from trilogy.core.enums import Derivation, Granularity
7
6
  from trilogy.core.env_processor import generate_graph
8
7
  from trilogy.core.exceptions import UnresolvableQueryException
9
8
  from trilogy.core.graph_models import ReferenceGraph
@@ -12,185 +11,41 @@ from trilogy.core.models.author import (
12
11
  )
13
12
  from trilogy.core.models.build import (
14
13
  BuildConcept,
15
- BuildFunction,
16
- BuildRowsetItem,
17
14
  BuildWhereClause,
18
15
  )
19
16
  from trilogy.core.models.build_environment import BuildEnvironment
20
- from trilogy.core.processing.node_generators import (
21
- gen_basic_node,
22
- gen_filter_node,
23
- gen_group_node,
24
- gen_group_to_node,
25
- gen_merge_node,
26
- gen_multiselect_node,
27
- gen_rowset_node,
28
- gen_synonym_node,
29
- gen_union_node,
30
- gen_unnest_node,
31
- gen_window_node,
17
+ from trilogy.core.processing.discovery_node_factory import generate_node
18
+ from trilogy.core.processing.discovery_utility import (
19
+ LOGGER_PREFIX,
20
+ depth_to_prefix,
21
+ get_priority_concept,
22
+ )
23
+ from trilogy.core.processing.discovery_validation import (
24
+ ValidationResult,
25
+ validate_stack,
32
26
  )
33
27
  from trilogy.core.processing.nodes import (
34
- ConstantNode,
35
28
  GroupNode,
36
29
  History,
37
30
  MergeNode,
38
31
  StrategyNode,
39
32
  )
40
- from trilogy.core.processing.utility import (
41
- get_disconnected_components,
42
- )
43
33
  from trilogy.utility import unique
44
34
 
35
+ SKIPPED_DERIVATIONS = [
36
+ Derivation.AGGREGATE,
37
+ Derivation.FILTER,
38
+ Derivation.WINDOW,
39
+ Derivation.UNNEST,
40
+ Derivation.RECURSIVE,
41
+ Derivation.ROWSET,
42
+ Derivation.BASIC,
43
+ Derivation.GROUP_TO,
44
+ Derivation.MULTISELECT,
45
+ Derivation.UNION,
46
+ ]
45
47
 
46
- class ValidationResult(Enum):
47
- COMPLETE = 1
48
- DISCONNECTED = 2
49
- INCOMPLETE = 3
50
- INCOMPLETE_CONDITION = 4
51
-
52
-
53
- LOGGER_PREFIX = "[CONCEPT DETAIL]"
54
-
55
-
56
- class SearchConceptsType(Protocol):
57
- def __call__(
58
- self,
59
- mandatory_list: List[BuildConcept],
60
- history: History,
61
- environment: BuildEnvironment,
62
- depth: int,
63
- g: ReferenceGraph,
64
- accept_partial: bool = False,
65
- conditions: Optional[BuildWhereClause] = None,
66
- ) -> Union[StrategyNode, None]: ...
67
-
68
-
69
- def get_upstream_concepts(base: BuildConcept, nested: bool = False) -> set[str]:
70
- upstream = set()
71
- if nested:
72
- upstream.add(base.address)
73
- if not base.lineage:
74
- return upstream
75
- for x in base.lineage.concept_arguments:
76
- # if it's derived from any value in a rowset, ALL rowset items are upstream
77
- if x.derivation == Derivation.ROWSET:
78
- assert isinstance(x.lineage, BuildRowsetItem), type(x.lineage)
79
- for y in x.lineage.rowset.select.output_components:
80
- upstream.add(f"{x.lineage.rowset.name}.{y.address}")
81
- # upstream = upstream.union(get_upstream_concepts(y, nested=True))
82
- upstream = upstream.union(get_upstream_concepts(x, nested=True))
83
- return upstream
84
-
85
-
86
- def restrict_node_outputs_targets(
87
- node: StrategyNode, targets: list[BuildConcept], depth: int
88
- ) -> list[BuildConcept]:
89
- ex_resolve = node.resolve()
90
- extra = [
91
- x
92
- for x in ex_resolve.output_concepts
93
- if x.address not in [y.address for y in targets]
94
- ]
95
-
96
- base = [x for x in ex_resolve.output_concepts if x.address not in extra]
97
- logger.info(
98
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} reducing final outputs, was {[c.address for c in ex_resolve.output_concepts]} with extra {[c.address for c in extra]}, remaining {base}"
99
- )
100
- for x in targets:
101
- if x.address not in base:
102
- base.append(x)
103
- node.set_output_concepts(base)
104
- return extra
105
-
106
-
107
- def get_priority_concept(
108
- all_concepts: List[BuildConcept],
109
- attempted_addresses: set[str],
110
- found_concepts: set[str],
111
- depth: int,
112
- ) -> BuildConcept:
113
- # optimized search for missing concepts
114
- pass_one = sorted(
115
- [
116
- c
117
- for c in all_concepts
118
- if c.address not in attempted_addresses and c.address not in found_concepts
119
- ],
120
- key=lambda x: x.address,
121
- )
122
- # sometimes we need to scan intermediate concepts to get merge keys or filter keys,
123
- # so do an exhaustive search
124
- # pass_two = [c for c in all_concepts+filter_only if c.address not in attempted_addresses]
125
- for remaining_concept in (pass_one,):
126
- priority = (
127
- # find anything that needs no joins first, so we can exit early
128
- [
129
- c
130
- for c in remaining_concept
131
- if c.derivation == Derivation.CONSTANT
132
- and c.granularity == Granularity.SINGLE_ROW
133
- ]
134
- +
135
- # then multiselects to remove them from scope
136
- [c for c in remaining_concept if c.derivation == Derivation.MULTISELECT]
137
- +
138
- # then rowsets to remove them from scope, as they cannot get partials
139
- [c for c in remaining_concept if c.derivation == Derivation.ROWSET]
140
- +
141
- # then rowsets to remove them from scope, as they cannot get partials
142
- [c for c in remaining_concept if c.derivation == Derivation.UNION]
143
- # we should be home-free here
144
- +
145
- # then aggregates to remove them from scope, as they cannot get partials
146
- [c for c in remaining_concept if c.derivation == Derivation.AGGREGATE]
147
- # then windows to remove them from scope, as they cannot get partials
148
- + [c for c in remaining_concept if c.derivation == Derivation.WINDOW]
149
- # then filters to remove them from scope, also cannot get partials
150
- + [c for c in remaining_concept if c.derivation == Derivation.FILTER]
151
- # unnests are weird?
152
- + [c for c in remaining_concept if c.derivation == Derivation.UNNEST]
153
- + [c for c in remaining_concept if c.derivation == Derivation.BASIC]
154
- # finally our plain selects
155
- + [
156
- c for c in remaining_concept if c.derivation == Derivation.ROOT
157
- ] # and any non-single row constants
158
- + [c for c in remaining_concept if c.derivation == Derivation.CONSTANT]
159
- )
160
-
161
- priority += [
162
- c
163
- for c in remaining_concept
164
- if c.address not in [x.address for x in priority]
165
- ]
166
- final = []
167
- # if any thing is derived from another concept
168
- # get the derived copy first
169
- # as this will usually resolve cleaner
170
- for x in priority:
171
- if any(
172
- [
173
- x.address
174
- in get_upstream_concepts(
175
- c,
176
- )
177
- for c in priority
178
- ]
179
- ):
180
- logger.info(
181
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} delaying fetch of {x.address} as parent of another concept"
182
- )
183
- continue
184
- final.append(x)
185
- # then append anything we didn't get
186
- for x2 in priority:
187
- if x2 not in final:
188
- final.append(x2)
189
- if final:
190
- return final[0]
191
- raise ValueError(
192
- f"Cannot resolve query. No remaining priority concepts, have attempted {attempted_addresses}"
193
- )
48
+ ROOT_DERIVATIONS = [Derivation.ROOT, Derivation.CONSTANT]
194
49
 
195
50
 
196
51
  def generate_candidates_restrictive(
@@ -199,10 +54,10 @@ def generate_candidates_restrictive(
199
54
  exhausted: set[str],
200
55
  depth: int,
201
56
  conditions: BuildWhereClause | None = None,
202
- ) -> List[List[BuildConcept]]:
57
+ ) -> List[BuildConcept]:
203
58
  # if it's single row, joins are irrelevant. Fetch without keys.
204
59
  if priority_concept.granularity == Granularity.SINGLE_ROW:
205
- return [[]]
60
+ return []
206
61
 
207
62
  local_candidates = [
208
63
  x
@@ -212,496 +67,12 @@ def generate_candidates_restrictive(
212
67
  and x.address not in priority_concept.pseudonyms
213
68
  and priority_concept.address not in x.pseudonyms
214
69
  ]
215
- if conditions and priority_concept.derivation in (
216
- Derivation.ROOT,
217
- Derivation.CONSTANT,
218
- ):
70
+ if conditions and priority_concept.derivation in ROOT_DERIVATIONS:
219
71
  logger.info(
220
72
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Injecting additional conditional row arguments as all remaining concepts are roots or constant"
221
73
  )
222
- return [unique(list(conditions.row_arguments) + local_candidates, "address")]
223
- return [local_candidates]
224
-
225
-
226
- def generate_node(
227
- concept: BuildConcept,
228
- local_optional: List[BuildConcept],
229
- environment: BuildEnvironment,
230
- g: ReferenceGraph,
231
- depth: int,
232
- source_concepts: SearchConceptsType,
233
- history: History,
234
- accept_partial: bool = False,
235
- conditions: BuildWhereClause | None = None,
236
- ) -> StrategyNode | None:
237
- # first check in case there is a materialized_concept
238
- candidate = history.gen_select_node(
239
- concept,
240
- local_optional,
241
- environment,
242
- g,
243
- depth + 1,
244
- fail_if_not_found=False,
245
- accept_partial=accept_partial,
246
- accept_partial_optional=False,
247
- source_concepts=source_concepts,
248
- conditions=conditions,
249
- )
250
-
251
- if candidate:
252
- return candidate
253
-
254
- if concept.derivation == Derivation.WINDOW:
255
- logger.info(
256
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating window node with optional {[x.address for x in local_optional]}"
257
- )
258
- return gen_window_node(
259
- concept,
260
- local_optional,
261
- history=history,
262
- environment=environment,
263
- g=g,
264
- depth=depth + 1,
265
- source_concepts=source_concepts,
266
- conditions=conditions,
267
- )
268
-
269
- elif concept.derivation == Derivation.FILTER:
270
- logger.info(
271
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating filter node with optional {[x.address for x in local_optional]}"
272
- )
273
- return gen_filter_node(
274
- concept,
275
- local_optional,
276
- history=history,
277
- environment=environment,
278
- g=g,
279
- depth=depth + 1,
280
- source_concepts=source_concepts,
281
- conditions=conditions,
282
- )
283
- elif concept.derivation == Derivation.UNNEST:
284
- logger.info(
285
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating unnest node with optional {[x.address for x in local_optional]} and condition {conditions}"
286
- )
287
- return gen_unnest_node(
288
- concept,
289
- local_optional,
290
- history=history,
291
- environment=environment,
292
- g=g,
293
- depth=depth + 1,
294
- source_concepts=source_concepts,
295
- conditions=conditions,
296
- )
297
- elif concept.derivation == Derivation.UNION:
298
- logger.info(
299
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating union node with optional {[x.address for x in local_optional]} and condition {conditions}"
300
- )
301
- return gen_union_node(
302
- concept,
303
- local_optional,
304
- environment,
305
- g,
306
- depth + 1,
307
- source_concepts,
308
- history,
309
- conditions=conditions,
310
- )
311
- elif concept.derivation == Derivation.AGGREGATE:
312
- # don't push constants up before aggregation
313
- # if not required
314
- # to avoid constants multiplication changing default aggregation results
315
- # ex sum(x) * 2 w/ no grain should return sum(x) * 2, not sum(x*2)
316
- # these should always be sourceable independently
317
- agg_optional = [
318
- x for x in local_optional if x.granularity != Granularity.SINGLE_ROW
319
- ]
320
-
321
- logger.info(
322
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating aggregate node with {[x for x in agg_optional]}"
323
- )
324
- return gen_group_node(
325
- concept,
326
- agg_optional,
327
- history=history,
328
- environment=environment,
329
- g=g,
330
- depth=depth + 1,
331
- source_concepts=source_concepts,
332
- conditions=conditions,
333
- )
334
- elif concept.derivation == Derivation.ROWSET:
335
- logger.info(
336
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating rowset node with optional {[x.address for x in local_optional]}"
337
- )
338
- return gen_rowset_node(
339
- concept,
340
- local_optional,
341
- environment,
342
- g,
343
- depth + 1,
344
- source_concepts,
345
- history,
346
- conditions=conditions,
347
- )
348
- elif concept.derivation == Derivation.MULTISELECT:
349
- logger.info(
350
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
351
- )
352
- return gen_multiselect_node(
353
- concept,
354
- local_optional,
355
- environment,
356
- g,
357
- depth + 1,
358
- source_concepts,
359
- history,
360
- conditions=conditions,
361
- )
362
- elif concept.derivation == Derivation.CONSTANT:
363
- constant_targets = [concept] + local_optional
364
- logger.info(
365
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
366
- )
367
- if any([x.derivation != Derivation.CONSTANT for x in local_optional]):
368
- non_root = [
369
- x.address for x in local_optional if x.derivation != Derivation.CONSTANT
370
- ]
371
- logger.info(
372
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including filter concepts, there are non root/non constant concepts we should find first: {non_root}. Recursing with all of these as mandatory"
373
- )
374
-
375
- if not history.check_started(
376
- constant_targets, accept_partial=accept_partial, conditions=conditions
377
- ):
378
- history.log_start(
379
- constant_targets,
380
- accept_partial=accept_partial,
381
- conditions=conditions,
382
- )
383
- return source_concepts(
384
- mandatory_list=constant_targets,
385
- environment=environment,
386
- g=g,
387
- depth=depth + 1,
388
- accept_partial=accept_partial,
389
- history=history,
390
- # we DO NOT pass up conditions at this point, as we are now expanding to include conditions in search
391
- # which we do whenever we hit a root node
392
- # conditions=conditions,
393
- )
394
- else:
395
- logger.info(
396
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} skipping search, already in a recursion fot these concepts"
397
- )
398
- return None
399
- return ConstantNode(
400
- input_concepts=[],
401
- output_concepts=constant_targets,
402
- environment=environment,
403
- parents=[],
404
- depth=depth + 1,
405
- preexisting_conditions=conditions.conditional if conditions else None,
406
- )
407
- elif concept.derivation == Derivation.BASIC:
408
- # this is special case handling for group bys
409
- if (
410
- isinstance(concept.lineage, BuildFunction)
411
- and concept.lineage.operator == FunctionType.GROUP
412
- ):
413
- logger.info(
414
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating group to grain node with {[x.address for x in local_optional]}"
415
- )
416
- return gen_group_to_node(
417
- concept,
418
- local_optional,
419
- environment,
420
- g,
421
- depth + 1,
422
- source_concepts,
423
- history,
424
- conditions=conditions,
425
- )
426
- logger.info(
427
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating basic node with optional {[x.address for x in local_optional]}"
428
- )
429
- return gen_basic_node(
430
- concept,
431
- local_optional,
432
- history=history,
433
- environment=environment,
434
- g=g,
435
- depth=depth + 1,
436
- source_concepts=source_concepts,
437
- conditions=conditions,
438
- )
439
-
440
- elif concept.derivation == Derivation.ROOT:
441
- logger.info(
442
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating select node with optional including condition inputs {[x.address for x in local_optional]}"
443
- )
444
- # we've injected in any conditional concepts that may exist
445
- # so if we don't still have just roots, we need to go up
446
- root_targets = [concept] + local_optional
447
-
448
- if any(
449
- [
450
- x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
451
- for x in local_optional
452
- ]
453
- ):
454
- non_root = [
455
- x.address
456
- for x in local_optional
457
- if x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
458
- ]
459
- logger.info(
460
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including any filters, there are non-root concepts we should expand first: {non_root}. Recursing with all of these as mandatory"
461
- )
462
-
463
- # if not history.check_started(
464
- # root_targets, accept_partial=accept_partial, conditions=conditions
465
- # ) or 1==1:
466
- if True:
467
- history.log_start(
468
- root_targets, accept_partial=accept_partial, conditions=conditions
469
- )
470
- return source_concepts(
471
- mandatory_list=root_targets,
472
- environment=environment,
473
- g=g,
474
- depth=depth + 1,
475
- accept_partial=accept_partial,
476
- history=history,
477
- # we DO NOT pass up conditions at this point, as we are now expanding to include conditions in search
478
- # which we do whenever we hit a root node
479
- # conditions=conditions,
480
- )
481
- else:
482
- logger.info(
483
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} skipping root search, already in a recursion for these concepts"
484
- )
485
- check = history.gen_select_node(
486
- concept,
487
- local_optional,
488
- environment,
489
- g,
490
- depth + 1,
491
- fail_if_not_found=False,
492
- accept_partial=accept_partial,
493
- accept_partial_optional=False,
494
- source_concepts=source_concepts,
495
- conditions=conditions,
496
- )
497
- if not check:
498
-
499
- logger.info(
500
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve root concepts, checking for expanded concepts"
501
- )
502
- for accept_partial in [False, True]:
503
- expanded = gen_merge_node(
504
- all_concepts=root_targets,
505
- environment=environment,
506
- g=g,
507
- depth=depth + 1,
508
- source_concepts=source_concepts,
509
- history=history,
510
- search_conditions=conditions,
511
- accept_partial=accept_partial,
512
- )
513
-
514
- if expanded:
515
- extra = restrict_node_outputs_targets(expanded, root_targets, depth)
516
- pseudonyms = [
517
- x
518
- for x in extra
519
- if any(x.address in y.pseudonyms for y in root_targets)
520
- ]
521
- if pseudonyms:
522
- expanded.add_output_concepts(pseudonyms)
523
- logger.info(
524
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Hiding pseudonyms{[c.address for c in pseudonyms]}"
525
- )
526
- expanded.hide_output_concepts(pseudonyms)
527
-
528
- logger.info(
529
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in root_targets]} via concept addition; removing extra {[c.address for c in extra]}"
530
- )
531
- return expanded
532
-
533
- logger.info(
534
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} could not find additional concept(s) to inject"
535
- )
536
- logger.info(
537
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve root concepts, checking for synonyms"
538
- )
539
- if not history.check_started(
540
- root_targets, accept_partial=accept_partial, conditions=conditions
541
- ):
542
- history.log_start(
543
- root_targets, accept_partial=accept_partial, conditions=conditions
544
- )
545
- resolved = gen_synonym_node(
546
- all_concepts=root_targets,
547
- environment=environment,
548
- g=g,
549
- depth=depth + 1,
550
- source_concepts=source_concepts,
551
- history=history,
552
- conditions=conditions,
553
- accept_partial=accept_partial,
554
- )
555
- if resolved:
556
- logger.info(
557
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} resolved concepts through synonyms"
558
- )
559
- return resolved
560
- else:
561
- logger.info(
562
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} skipping synonym search, already in a recursion for these concepts"
563
- )
564
- return None
565
- else:
566
- raise ValueError(f"Unknown derivation {concept.derivation} on {concept}")
567
- return None
568
-
569
-
570
- def validate_concept(
571
- concept: BuildConcept,
572
- node: StrategyNode,
573
- found_addresses: set[str],
574
- non_partial_addresses: set[str],
575
- partial_addresses: set[str],
576
- virtual_addresses: set[str],
577
- found_map: dict[str, set[BuildConcept]],
578
- accept_partial: bool,
579
- seen: set[str],
580
- environment: BuildEnvironment,
581
- ):
582
- found_map[str(node)].add(concept)
583
- seen.add(concept.address)
584
- if concept not in node.partial_concepts:
585
- found_addresses.add(concept.address)
586
- non_partial_addresses.add(concept.address)
587
- # remove it from our partial tracking
588
- if concept.address in partial_addresses:
589
- partial_addresses.remove(concept.address)
590
- if concept.address in virtual_addresses:
591
- virtual_addresses.remove(concept.address)
592
- if concept in node.partial_concepts:
593
- if concept.address in non_partial_addresses:
594
- return None
595
- partial_addresses.add(concept.address)
596
- if accept_partial:
597
- found_addresses.add(concept.address)
598
- found_map[str(node)].add(concept)
599
- for v_address in concept.pseudonyms:
600
- if v_address in seen:
601
- return
602
- v = environment.concepts[v_address]
603
- if v.address in seen:
604
- return
605
- if v.address == concept.address:
606
- return
607
- validate_concept(
608
- v,
609
- node,
610
- found_addresses,
611
- non_partial_addresses,
612
- partial_addresses,
613
- virtual_addresses,
614
- found_map,
615
- accept_partial,
616
- seen=seen,
617
- environment=environment,
618
- )
619
-
620
-
621
- def validate_stack(
622
- environment: BuildEnvironment,
623
- stack: List[StrategyNode],
624
- concepts: List[BuildConcept],
625
- mandatory_with_filter: List[BuildConcept],
626
- conditions: BuildWhereClause | None = None,
627
- accept_partial: bool = False,
628
- ) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
629
- found_map: dict[str, set[BuildConcept]] = defaultdict(set)
630
- found_addresses: set[str] = set()
631
- non_partial_addresses: set[str] = set()
632
- partial_addresses: set[str] = set()
633
- virtual_addresses: set[str] = set()
634
- seen: set[str] = set()
635
-
636
- for node in stack:
637
- resolved = node.resolve()
638
-
639
- for concept in resolved.output_concepts:
640
- if concept.address in resolved.hidden_concepts:
641
- continue
642
-
643
- validate_concept(
644
- concept,
645
- node,
646
- found_addresses,
647
- non_partial_addresses,
648
- partial_addresses,
649
- virtual_addresses,
650
- found_map,
651
- accept_partial,
652
- seen,
653
- environment,
654
- )
655
- for concept in node.virtual_output_concepts:
656
- if concept.address in non_partial_addresses:
657
- continue
658
- found_addresses.add(concept.address)
659
- virtual_addresses.add(concept.address)
660
- if not conditions:
661
- conditions_met = True
662
- else:
663
- conditions_met = all(
664
- [node.preexisting_conditions == conditions.conditional for node in stack]
665
- ) or all([c.address in found_addresses for c in mandatory_with_filter])
666
- # zip in those we know we found
667
- if not all([c.address in found_addresses for c in concepts]) or not conditions_met:
668
- if not all([c.address in found_addresses for c in concepts]):
669
- return (
670
- ValidationResult.INCOMPLETE,
671
- found_addresses,
672
- {c.address for c in concepts if c.address not in found_addresses},
673
- partial_addresses,
674
- virtual_addresses,
675
- )
676
- return (
677
- ValidationResult.INCOMPLETE_CONDITION,
678
- found_addresses,
679
- {c.address for c in concepts if c.address not in mandatory_with_filter},
680
- partial_addresses,
681
- virtual_addresses,
682
- )
683
-
684
- graph_count, _ = get_disconnected_components(found_map)
685
- if graph_count in (0, 1):
686
- return (
687
- ValidationResult.COMPLETE,
688
- found_addresses,
689
- set(),
690
- partial_addresses,
691
- virtual_addresses,
692
- )
693
- # if we have too many subgraphs, we need to keep searching
694
- return (
695
- ValidationResult.DISCONNECTED,
696
- found_addresses,
697
- set(),
698
- partial_addresses,
699
- virtual_addresses,
700
- )
701
-
702
-
703
- def depth_to_prefix(depth: int) -> str:
704
- return "\t" * depth
74
+ return unique(list(conditions.row_arguments) + local_candidates, "address")
75
+ return local_candidates
705
76
 
706
77
 
707
78
  def append_existence_check(
@@ -748,7 +119,6 @@ def search_concepts(
748
119
  accept_partial: bool = False,
749
120
  conditions: BuildWhereClause | None = None,
750
121
  ) -> StrategyNode | None:
751
- logger.error(f"starting search for {mandatory_list}")
752
122
  hist = history.get_history(
753
123
  search=mandatory_list, accept_partial=accept_partial, conditions=conditions
754
124
  )
@@ -778,7 +148,31 @@ def search_concepts(
778
148
  return result
779
149
 
780
150
 
781
- def _search_concepts(
151
+ @dataclass
152
+ class LoopContext:
153
+ mandatory_list: List[BuildConcept]
154
+ environment: BuildEnvironment
155
+ depth: int
156
+ g: ReferenceGraph
157
+ history: History
158
+ attempted: set[str]
159
+ found: set[str]
160
+ skip: set[str]
161
+ all_mandatory: set[str]
162
+ original_mandatory: List[BuildConcept]
163
+ completion_mandatory: List[BuildConcept]
164
+ stack: List[StrategyNode]
165
+ complete: ValidationResult = ValidationResult.INCOMPLETE
166
+ accept_partial: bool = False
167
+ must_evaluate_condition_on_this_level_not_push_down: bool = False
168
+ conditions: BuildWhereClause | None = None
169
+
170
+ @property
171
+ def incomplete(self) -> bool:
172
+ return self.attempted != self.all_mandatory
173
+
174
+
175
+ def initialize_loop_context(
782
176
  mandatory_list: List[BuildConcept],
783
177
  environment: BuildEnvironment,
784
178
  depth: int,
@@ -786,7 +180,7 @@ def _search_concepts(
786
180
  history: History,
787
181
  accept_partial: bool = False,
788
182
  conditions: BuildWhereClause | None = None,
789
- ) -> StrategyNode | None:
183
+ ):
790
184
  # these are the concepts we need in the output projection
791
185
  mandatory_list = unique(mandatory_list, "address")
792
186
  # cache our values before an filter injection
@@ -808,7 +202,7 @@ def _search_concepts(
808
202
  required_filters = [
809
203
  x
810
204
  for x in mandatory_list
811
- if x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
205
+ if x.derivation not in ROOT_DERIVATIONS
812
206
  and not (
813
207
  x.derivation == Derivation.AGGREGATE
814
208
  and x.granularity == Granularity.SINGLE_ROW
@@ -828,257 +222,308 @@ def _search_concepts(
828
222
  else:
829
223
 
830
224
  completion_mandatory = mandatory_list
831
- attempted: set[str] = set()
225
+ return LoopContext(
226
+ mandatory_list=mandatory_list,
227
+ environment=environment,
228
+ depth=depth,
229
+ g=g,
230
+ history=history,
231
+ attempted=set(),
232
+ found=set(),
233
+ skip=set(),
234
+ all_mandatory=all_mandatory,
235
+ original_mandatory=original_mandatory,
236
+ completion_mandatory=completion_mandatory,
237
+ stack=[],
238
+ complete=ValidationResult.INCOMPLETE,
239
+ accept_partial=accept_partial,
240
+ must_evaluate_condition_on_this_level_not_push_down=must_evaluate_condition_on_this_level_not_push_down,
241
+ conditions=conditions,
242
+ )
832
243
 
833
- found: set[str] = set()
834
- skip: set[str] = set()
835
- virtual: set[str] = set()
836
- stack: List[StrategyNode] = []
837
- complete = ValidationResult.INCOMPLETE
838
244
 
839
- while attempted != all_mandatory:
840
- priority_concept = get_priority_concept(
841
- mandatory_list,
842
- attempted,
843
- found_concepts=found,
844
- depth=depth,
245
+ def evaluate_loop_conditions(
246
+ context: LoopContext, priority_concept: BuildConcept
247
+ ) -> BuildWhereClause | None:
248
+ # filter evaluation
249
+ # always pass the filter up when we aren't looking at all filter inputs
250
+ # or there are any non-filter complex types
251
+ if context.conditions:
252
+ should_evaluate_filter_on_this_level_not_push_down = all(
253
+ [
254
+ x.address in context.mandatory_list
255
+ for x in context.conditions.row_arguments
256
+ ]
257
+ ) and not any(
258
+ [
259
+ x.derivation not in ROOT_DERIVATIONS
260
+ for x in context.mandatory_list
261
+ if x.address not in context.conditions.row_arguments
262
+ ]
263
+ )
264
+ else:
265
+ should_evaluate_filter_on_this_level_not_push_down = True
266
+ local_conditions = (
267
+ context.conditions
268
+ if context.conditions
269
+ and not context.must_evaluate_condition_on_this_level_not_push_down
270
+ and not should_evaluate_filter_on_this_level_not_push_down
271
+ else None
272
+ )
273
+ # but if it's not basic, and it's not condition;
274
+ # we do need to push it down (and have another layer of filter evaluation)
275
+ # to ensure filtering happens before something like a SUM
276
+ if (
277
+ context.conditions
278
+ and priority_concept.derivation not in ROOT_DERIVATIONS
279
+ and priority_concept.address not in context.conditions.row_arguments
280
+ ):
281
+ logger.info(
282
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Force including conditions in {priority_concept.address} to push filtering above complex condition that is not condition member or parent"
845
283
  )
846
- # filter evaluation
847
- # always pass the filter up when we aren't looking at all filter inputs
848
- # or there are any non-filter complex types
849
- if conditions:
850
- should_evaluate_filter_on_this_level_not_push_down = all(
851
- [x.address in mandatory_list for x in conditions.row_arguments]
852
- ) and not any(
853
- [
854
- x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
855
- for x in mandatory_list
856
- if x.address not in conditions.row_arguments
857
- ]
284
+ local_conditions = context.conditions
285
+ return local_conditions
286
+
287
+
288
+ def check_for_early_exit(
289
+ complete, partial, context: LoopContext, priority_concept: BuildConcept
290
+ ) -> bool:
291
+ if complete == ValidationResult.INCOMPLETE_CONDITION:
292
+ cond_dict = {str(node): node.preexisting_conditions for node in context.stack}
293
+ for node in context.stack:
294
+ logger.info(
295
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Node {node} has conditions {node.preexisting_conditions} and {node.conditions}"
858
296
  )
859
- else:
860
- should_evaluate_filter_on_this_level_not_push_down = True
861
- local_conditions = (
862
- conditions
863
- if conditions
864
- and not must_evaluate_condition_on_this_level_not_push_down
865
- and not should_evaluate_filter_on_this_level_not_push_down
866
- else None
297
+ raise SyntaxError(f"Have {cond_dict} and need {str(context.conditions)}")
298
+ # early exit if we have a complete stack with one node
299
+ # we can only early exit if we have a complete stack
300
+ # and we are not looking for more non-partial sources
301
+ if complete == ValidationResult.COMPLETE and (
302
+ not context.accept_partial or (context.accept_partial and not partial)
303
+ ):
304
+ logger.info(
305
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} breaking loop, complete"
867
306
  )
868
- # but if it's not basic, and it's not condition;
869
- # we do need to push it down (and have another layer of filter evaluation)
870
- # to ensure filtering happens before something like a SUM
871
- if (
872
- conditions
873
- and priority_concept.derivation
874
- not in (Derivation.ROOT, Derivation.CONSTANT)
875
- and priority_concept.address not in conditions.row_arguments
876
- ):
307
+ return True
308
+ elif complete == ValidationResult.COMPLETE and context.accept_partial and partial:
309
+ if len(context.attempted) == len(context.mandatory_list):
877
310
  logger.info(
878
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Force including conditions in {priority_concept.address} to push filtering above complex condition that is not condition member or parent"
311
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Breaking as we have attempted all nodes"
879
312
  )
880
- local_conditions = conditions
881
-
313
+ return True
882
314
  logger.info(
883
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)} derivation {priority_concept.derivation} granularity {priority_concept.granularity} with conditions {local_conditions}"
315
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Found complete stack with partials {partial}, continuing search, attempted {context.attempted} all {len(context.mandatory_list)}"
316
+ )
317
+ else:
318
+ logger.info(
319
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Not complete, continuing search"
320
+ )
321
+ # if we have attempted on root node, we've tried them all.
322
+ # inject in another search with filter concepts
323
+ if priority_concept.derivation == Derivation.ROOT:
324
+ logger.info(
325
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Breaking as attempted root with no results"
884
326
  )
327
+ return True
328
+ return False
885
329
 
886
- candidates = [
887
- c for c in mandatory_list if c.address != priority_concept.address
330
+
331
+ def generate_loop_completion(context: LoopContext, virtual) -> StrategyNode:
332
+ condition_required = True
333
+ non_virtual = [c for c in context.completion_mandatory if c.address not in virtual]
334
+ non_virtual_output = [
335
+ c for c in context.original_mandatory if c.address not in virtual
336
+ ]
337
+ non_virtual_different = len(context.completion_mandatory) != len(
338
+ context.original_mandatory
339
+ )
340
+ non_virtual_difference_values = set(
341
+ [x.address for x in context.completion_mandatory]
342
+ ).difference(set([x.address for x in context.original_mandatory]))
343
+ if not context.conditions:
344
+ condition_required = False
345
+ non_virtual = [c for c in context.mandatory_list if c.address not in virtual]
346
+
347
+ elif all(
348
+ [
349
+ x.preexisting_conditions == context.conditions.conditional
350
+ for x in context.stack
888
351
  ]
889
- candidate_lists = generate_candidates_restrictive(
890
- priority_concept, candidates, skip, depth=depth, conditions=conditions
352
+ ):
353
+ condition_required = False
354
+ non_virtual = [c for c in context.mandatory_list if c.address not in virtual]
355
+
356
+ if context.conditions and not condition_required:
357
+ parent_map = {
358
+ str(x): x.preexisting_conditions == context.conditions.conditional
359
+ for x in context.stack
360
+ }
361
+ logger.info(
362
+ f"Condition {context.conditions} not required, parents included filtering! {parent_map }"
891
363
  )
892
- for clist in candidate_lists:
364
+ if len(context.stack) == 1:
365
+ output: StrategyNode = context.stack[0]
366
+ if non_virtual_different:
893
367
  logger.info(
894
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Beginning sourcing loop for {priority_concept.address}, accept_partial {accept_partial}, optional {[v.address for v in clist]}, exhausted {[c for c in skip]}"
368
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Found different non-virtual output concepts ({non_virtual_difference_values}), removing condition injected values"
895
369
  )
896
- node = generate_node(
897
- priority_concept,
898
- clist,
899
- environment,
900
- g,
901
- depth,
902
- source_concepts=search_concepts,
903
- accept_partial=accept_partial,
904
- history=history,
905
- conditions=local_conditions,
370
+ output.set_output_concepts(
371
+ [x for x in output.output_concepts if x.address in non_virtual_output],
372
+ rebuild=False,
906
373
  )
907
- if node:
908
- stack.append(node)
909
- try:
910
- node.resolve()
911
- except Exception as e:
912
- logger.error(
913
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve node {node} {e}"
914
- )
915
- raise e
916
- # these concepts should not be attempted to be sourced again
917
- # as fetching them requires operating on a subset of concepts
918
- if priority_concept.derivation in [
919
- Derivation.AGGREGATE,
920
- Derivation.FILTER,
921
- Derivation.WINDOW,
922
- Derivation.UNNEST,
923
- Derivation.ROWSET,
924
- Derivation.BASIC,
925
- Derivation.MULTISELECT,
926
- Derivation.UNION,
927
- ]:
928
- skip.add(priority_concept.address)
929
- break
930
- attempted.add(priority_concept.address)
931
- complete, found, missing, partial, virtual = validate_stack(
932
- environment,
933
- stack,
934
- mandatory_list,
935
- completion_mandatory,
936
- conditions=conditions,
937
- accept_partial=accept_partial,
938
- )
939
- mandatory_completion = [c.address for c in completion_mandatory]
374
+
940
375
  logger.info(
941
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} {priority_concept.derivation} condition {conditions} flag for accepting partial addresses is"
942
- f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} synonyms partial {partial} virtual {virtual}), attempted {attempted}, mandatory w/ filter {mandatory_completion}"
376
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
943
377
  )
944
- if complete == ValidationResult.INCOMPLETE_CONDITION:
945
- cond_dict = {str(node): node.preexisting_conditions for node in stack}
946
- logger.error(f"Have {cond_dict} and need {str(conditions)}")
947
- raise SyntaxError(f"Have {cond_dict} and need {str(conditions)}")
948
- # early exit if we have a complete stack with one node
949
- # we can only early exit if we have a complete stack
950
- # and we are not looking for more non-partial sources
951
- if complete == ValidationResult.COMPLETE and (
952
- not accept_partial or (accept_partial and not partial)
953
- ):
954
- logger.info(
955
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} breaking loop, complete"
956
- )
957
- break
958
- elif complete == ValidationResult.COMPLETE and accept_partial and partial:
959
- if len(attempted) == len(mandatory_list):
960
- logger.info(
961
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Breaking as we have attempted all nodes"
962
- )
963
- break
964
- logger.info(
965
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found complete stack with partials {partial}, continuing search, attempted {attempted} all {len(mandatory_list)}"
378
+ else:
379
+ logger.info(
380
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} wrapping multiple parent nodes {[type(x) for x in context.stack]} in merge node"
381
+ )
382
+ output = MergeNode(
383
+ input_concepts=non_virtual,
384
+ output_concepts=non_virtual,
385
+ environment=context.environment,
386
+ parents=context.stack,
387
+ depth=context.depth,
388
+ )
389
+
390
+ # ensure we can resolve our final merge
391
+ output.resolve()
392
+ if condition_required and context.conditions:
393
+ output.add_condition(context.conditions.conditional)
394
+ if context.conditions.existence_arguments:
395
+ append_existence_check(
396
+ output,
397
+ context.environment,
398
+ context.g,
399
+ where=context.conditions,
400
+ history=context.history,
966
401
  )
967
- else:
402
+ elif context.conditions:
403
+ output.preexisting_conditions = context.conditions.conditional
404
+ logger.info(
405
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Graph is connected, returning {type(output)} node partial {[c.address for c in output.partial_concepts]} with {context.conditions}"
406
+ )
407
+ if condition_required and context.conditions and non_virtual_different:
408
+ logger.info(
409
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Conditions {context.conditions} were injected, checking if we need a group to restore grain"
410
+ )
411
+ result = GroupNode.check_if_required(
412
+ downstream_concepts=context.original_mandatory,
413
+ parents=[output.resolve()],
414
+ environment=context.environment,
415
+ depth=context.depth,
416
+ )
417
+ if result.required:
968
418
  logger.info(
969
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Not complete, continuing search"
419
+ f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Adding group node"
970
420
  )
971
- # if we have attempted on root node, we've tried them all.
972
- # inject in another search with filter concepts
973
- if priority_concept.derivation == Derivation.ROOT:
974
- logger.info(
975
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Breaking as attempted root with no results"
421
+ return GroupNode(
422
+ output_concepts=context.original_mandatory,
423
+ input_concepts=context.original_mandatory,
424
+ environment=context.environment,
425
+ parents=[output],
426
+ partial_concepts=output.partial_concepts,
427
+ preexisting_conditions=context.conditions.conditional,
428
+ depth=context.depth,
976
429
  )
977
- break
430
+ return output
978
431
 
979
- logger.info(
980
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}, virtual {virtual}"
432
+
433
+ def _search_concepts(
434
+ mandatory_list: List[BuildConcept],
435
+ environment: BuildEnvironment,
436
+ depth: int,
437
+ g: ReferenceGraph,
438
+ history: History,
439
+ accept_partial: bool = False,
440
+ conditions: BuildWhereClause | None = None,
441
+ ) -> StrategyNode | None:
442
+
443
+ context = initialize_loop_context(
444
+ mandatory_list=mandatory_list,
445
+ environment=environment,
446
+ depth=depth,
447
+ g=g,
448
+ history=history,
449
+ accept_partial=accept_partial,
450
+ conditions=conditions,
981
451
  )
982
- if complete == ValidationResult.COMPLETE:
983
- condition_required = True
984
- non_virtual = [c for c in completion_mandatory if c.address not in virtual]
985
- non_virtual_output = [c for c in original_mandatory if c.address not in virtual]
986
- non_virtual_different = len(completion_mandatory) != len(original_mandatory)
987
- non_virtual_difference_values = set(
988
- [x.address for x in completion_mandatory]
989
- ).difference(set([x.address for x in original_mandatory]))
990
- if not conditions:
991
- condition_required = False
992
- non_virtual = [c for c in mandatory_list if c.address not in virtual]
993
452
 
994
- elif all([x.preexisting_conditions == conditions.conditional for x in stack]):
995
- condition_required = False
996
- non_virtual = [c for c in mandatory_list if c.address not in virtual]
453
+ while context.incomplete:
454
+ priority_concept = get_priority_concept(
455
+ context.mandatory_list,
456
+ context.attempted,
457
+ found_concepts=context.found,
458
+ depth=depth,
459
+ )
997
460
 
998
- if conditions and not condition_required:
999
- parent_map = {
1000
- str(x): x.preexisting_conditions == conditions.conditional
1001
- for x in stack
1002
- }
1003
- logger.info(
1004
- f"Condition {conditions} not required, parents included filtering! {parent_map }"
1005
- )
1006
- if len(stack) == 1:
1007
- output: StrategyNode = stack[0]
1008
- if non_virtual_different:
1009
- logger.info(
1010
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found different non-virtual output concepts ({non_virtual_difference_values}), removing condition injected values"
1011
- )
1012
- output.set_output_concepts(
1013
- [
1014
- x
1015
- for x in output.output_concepts
1016
- if x.address in non_virtual_output
1017
- ],
1018
- rebuild=False,
1019
- )
461
+ local_conditions = evaluate_loop_conditions(context, priority_concept)
462
+ logger.info(
463
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)} derivation {priority_concept.derivation} granularity {priority_concept.granularity} with conditions {local_conditions}"
464
+ )
1020
465
 
1021
- logger.info(
1022
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
1023
- )
1024
- else:
1025
- logger.info(
1026
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} wrapping multiple parent nodes {[type(x) for x in stack]} in merge node"
1027
- )
1028
- output = MergeNode(
1029
- input_concepts=non_virtual,
1030
- output_concepts=non_virtual,
1031
- environment=environment,
1032
- parents=stack,
1033
- depth=depth,
1034
- )
466
+ candidates = [
467
+ c for c in context.mandatory_list if c.address != priority_concept.address
468
+ ]
469
+ candidate_list = generate_candidates_restrictive(
470
+ priority_concept,
471
+ candidates,
472
+ context.skip,
473
+ depth=depth,
474
+ conditions=context.conditions,
475
+ )
1035
476
 
1036
- # ensure we can resolve our final merge
1037
- output.resolve()
1038
- if condition_required and conditions:
1039
- output.add_condition(conditions.conditional)
1040
- if conditions.existence_arguments:
1041
- append_existence_check(
1042
- output, environment, g, where=conditions, history=history
1043
- )
1044
- elif conditions:
1045
- output.preexisting_conditions = conditions.conditional
1046
477
  logger.info(
1047
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Graph is connected, returning {type(output)} node partial {[c.address for c in output.partial_concepts]}"
478
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Beginning sourcing loop for {priority_concept.address}, accept_partial {accept_partial}, optional {[v.address for v in candidate_list]}, exhausted {[c for c in context.skip]}"
1048
479
  )
1049
- if condition_required and conditions and non_virtual_different:
1050
- logger.info(
1051
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Conditions {conditions} were injected, checking if we need a group to restore grain"
1052
- )
1053
- result = GroupNode.check_if_required(
1054
- downstream_concepts=original_mandatory,
1055
- parents=[output.resolve()],
1056
- environment=environment,
1057
- depth=depth,
1058
- )
1059
- if result.required:
1060
- logger.info(
1061
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Adding group node"
1062
- )
1063
- return GroupNode(
1064
- output_concepts=original_mandatory,
1065
- input_concepts=original_mandatory,
1066
- environment=environment,
1067
- parents=[output],
1068
- partial_concepts=output.partial_concepts,
1069
- preexisting_conditions=conditions.conditional,
1070
- depth=depth,
1071
- )
1072
- return output
480
+ node = generate_node(
481
+ priority_concept,
482
+ candidate_list,
483
+ environment,
484
+ g,
485
+ depth,
486
+ source_concepts=search_concepts,
487
+ accept_partial=accept_partial,
488
+ history=history,
489
+ conditions=local_conditions,
490
+ )
491
+ if node:
492
+ context.stack.append(node)
493
+ node.resolve()
494
+ # these concepts should not be attempted to be sourced again
495
+ # as fetching them requires operating on a subset of concepts
496
+ if priority_concept.derivation in SKIPPED_DERIVATIONS:
497
+ context.skip.add(priority_concept.address)
498
+ context.attempted.add(priority_concept.address)
499
+ complete, found_c, missing_c, partial, virtual = validate_stack(
500
+ environment,
501
+ context.stack,
502
+ context.mandatory_list,
503
+ context.completion_mandatory,
504
+ conditions=context.conditions,
505
+ accept_partial=accept_partial,
506
+ )
507
+ # assig
508
+ context.found = found_c
509
+ early_exit = check_for_early_exit(complete, partial, context, priority_concept)
510
+ if early_exit:
511
+ break
512
+
513
+ logger.info(
514
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {context.found} from {[n for n in context.stack]} (missing {context.all_mandatory - context.found}), attempted {context.attempted}, virtual {virtual}"
515
+ )
516
+ if complete == ValidationResult.COMPLETE:
517
+ return generate_loop_completion(context, virtual)
1073
518
 
1074
519
  # if we can't find it after expanding to a merge, then
1075
520
  # accept partials in join paths
1076
-
1077
521
  if not accept_partial:
1078
522
  logger.info(
1079
523
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Stack is not connected graph, flag for accepting partial addresses is {accept_partial}, changing flag"
1080
524
  )
1081
525
  partial_search = search_concepts(
526
+ # use the original mandatory list
1082
527
  mandatory_list=mandatory_list,
1083
528
  environment=environment,
1084
529
  depth=depth,
@@ -1093,7 +538,7 @@ def _search_concepts(
1093
538
  )
1094
539
  return partial_search
1095
540
  logger.error(
1096
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found},"
541
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {context.all_mandatory - context.found},"
1097
542
  )
1098
543
 
1099
544
  return None