pytrilogy 0.0.1.110__py3-none-any.whl → 0.0.1.111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/RECORD +33 -33
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -1
- trilogy/core/models.py +85 -67
- trilogy/core/optimization.py +23 -8
- trilogy/core/processing/concept_strategies_v3.py +44 -19
- trilogy/core/processing/node_generators/basic_node.py +2 -0
- trilogy/core/processing/node_generators/common.py +3 -1
- trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
- trilogy/core/processing/node_generators/filter_node.py +36 -6
- trilogy/core/processing/node_generators/node_merge_node.py +34 -23
- trilogy/core/processing/node_generators/rowset_node.py +30 -6
- trilogy/core/processing/node_generators/select_node.py +23 -9
- trilogy/core/processing/node_generators/unnest_node.py +24 -3
- trilogy/core/processing/node_generators/window_node.py +4 -2
- trilogy/core/processing/nodes/__init__.py +7 -6
- trilogy/core/processing/nodes/base_node.py +40 -6
- trilogy/core/processing/nodes/filter_node.py +15 -1
- trilogy/core/processing/nodes/group_node.py +20 -1
- trilogy/core/processing/nodes/merge_node.py +36 -7
- trilogy/core/processing/nodes/select_node_v2.py +34 -39
- trilogy/core/processing/nodes/unnest_node.py +12 -0
- trilogy/core/processing/nodes/window_node.py +11 -0
- trilogy/core/processing/utility.py +0 -14
- trilogy/core/query_processor.py +125 -29
- trilogy/dialect/base.py +45 -40
- trilogy/executor.py +31 -3
- trilogy/parsing/parse_engine.py +49 -17
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
from typing import List, Optional, Callable
|
|
3
3
|
|
|
4
|
-
|
|
5
4
|
from trilogy.constants import logger
|
|
6
5
|
from trilogy.core.enums import PurposeLineage, Granularity, FunctionType
|
|
7
6
|
from trilogy.core.env_processor import generate_graph
|
|
@@ -278,9 +277,10 @@ def generate_node(
|
|
|
278
277
|
logger.info(
|
|
279
278
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
|
|
280
279
|
)
|
|
281
|
-
|
|
280
|
+
node = gen_concept_merge_node(
|
|
282
281
|
concept, local_optional, environment, g, depth + 1, source_concepts, history
|
|
283
282
|
)
|
|
283
|
+
return node
|
|
284
284
|
elif concept.derivation == PurposeLineage.CONSTANT:
|
|
285
285
|
logger.info(
|
|
286
286
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
|
|
@@ -340,13 +340,15 @@ def validate_stack(
|
|
|
340
340
|
stack: List[StrategyNode],
|
|
341
341
|
concepts: List[Concept],
|
|
342
342
|
accept_partial: bool = False,
|
|
343
|
-
) -> tuple[ValidationResult, set[str], set[str], set[str]]:
|
|
343
|
+
) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
|
|
344
344
|
found_map = defaultdict(set)
|
|
345
345
|
found_addresses: set[str] = set()
|
|
346
346
|
non_partial_addresses: set[str] = set()
|
|
347
347
|
partial_addresses: set[str] = set()
|
|
348
|
+
virtual_addresses: set[str] = set()
|
|
348
349
|
for node in stack:
|
|
349
|
-
|
|
350
|
+
resolved = node.resolve()
|
|
351
|
+
for concept in resolved.output_concepts:
|
|
350
352
|
found_map[str(node)].add(concept)
|
|
351
353
|
if concept not in node.partial_concepts:
|
|
352
354
|
found_addresses.add(concept.address)
|
|
@@ -354,11 +356,20 @@ def validate_stack(
|
|
|
354
356
|
# remove it from our partial tracking
|
|
355
357
|
if concept.address in partial_addresses:
|
|
356
358
|
partial_addresses.remove(concept.address)
|
|
359
|
+
if concept.address in virtual_addresses:
|
|
360
|
+
virtual_addresses.remove(concept.address)
|
|
357
361
|
if concept in node.partial_concepts:
|
|
362
|
+
if concept.address in non_partial_addresses:
|
|
363
|
+
continue
|
|
358
364
|
partial_addresses.add(concept.address)
|
|
359
365
|
if accept_partial:
|
|
360
366
|
found_addresses.add(concept.address)
|
|
361
367
|
found_map[str(node)].add(concept)
|
|
368
|
+
for concept in node.virtual_output_concepts:
|
|
369
|
+
if concept.address in non_partial_addresses:
|
|
370
|
+
continue
|
|
371
|
+
found_addresses.add(concept.address)
|
|
372
|
+
virtual_addresses.add(concept.address)
|
|
362
373
|
# zip in those we know we found
|
|
363
374
|
if not all([c.address in found_addresses for c in concepts]):
|
|
364
375
|
return (
|
|
@@ -366,12 +377,25 @@ def validate_stack(
|
|
|
366
377
|
found_addresses,
|
|
367
378
|
{c.address for c in concepts if c.address not in found_addresses},
|
|
368
379
|
partial_addresses,
|
|
380
|
+
virtual_addresses,
|
|
369
381
|
)
|
|
370
382
|
graph_count, graphs = get_disconnected_components(found_map)
|
|
371
383
|
if graph_count in (0, 1):
|
|
372
|
-
return
|
|
384
|
+
return (
|
|
385
|
+
ValidationResult.COMPLETE,
|
|
386
|
+
found_addresses,
|
|
387
|
+
set(),
|
|
388
|
+
partial_addresses,
|
|
389
|
+
virtual_addresses,
|
|
390
|
+
)
|
|
373
391
|
# if we have too many subgraphs, we need to keep searching
|
|
374
|
-
return
|
|
392
|
+
return (
|
|
393
|
+
ValidationResult.DISCONNECTED,
|
|
394
|
+
found_addresses,
|
|
395
|
+
set(),
|
|
396
|
+
partial_addresses,
|
|
397
|
+
virtual_addresses,
|
|
398
|
+
)
|
|
375
399
|
|
|
376
400
|
|
|
377
401
|
def depth_to_prefix(depth: int) -> str:
|
|
@@ -404,7 +428,10 @@ def search_concepts(
|
|
|
404
428
|
accept_partial=accept_partial,
|
|
405
429
|
history=history,
|
|
406
430
|
)
|
|
407
|
-
|
|
431
|
+
# a node may be mutated after be cached; always store a copy
|
|
432
|
+
history.search_to_history(
|
|
433
|
+
mandatory_list, accept_partial, result.copy() if result else None
|
|
434
|
+
)
|
|
408
435
|
return result
|
|
409
436
|
|
|
410
437
|
|
|
@@ -472,13 +499,13 @@ def _search_concepts(
|
|
|
472
499
|
skip.add(priority_concept.address)
|
|
473
500
|
break
|
|
474
501
|
attempted.add(priority_concept.address)
|
|
475
|
-
complete, found, missing, partial = validate_stack(
|
|
502
|
+
complete, found, missing, partial, virtual = validate_stack(
|
|
476
503
|
stack, mandatory_list, accept_partial
|
|
477
504
|
)
|
|
478
505
|
|
|
479
506
|
logger.info(
|
|
480
507
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} flag for accepting partial addresses is "
|
|
481
|
-
f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial}), attempted {attempted}"
|
|
508
|
+
f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial} virtual {virtual}), attempted {attempted}"
|
|
482
509
|
)
|
|
483
510
|
# early exit if we have a complete stack with one node
|
|
484
511
|
# we can only early exit if we have a complete stack
|
|
@@ -489,7 +516,7 @@ def _search_concepts(
|
|
|
489
516
|
break
|
|
490
517
|
|
|
491
518
|
logger.info(
|
|
492
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}"
|
|
519
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}, virtual {virtual}"
|
|
493
520
|
)
|
|
494
521
|
if complete == ValidationResult.COMPLETE:
|
|
495
522
|
all_partial = [
|
|
@@ -503,24 +530,22 @@ def _search_concepts(
|
|
|
503
530
|
]
|
|
504
531
|
)
|
|
505
532
|
]
|
|
533
|
+
non_virtual = [c for c in mandatory_list if c.address not in virtual]
|
|
506
534
|
if len(stack) == 1:
|
|
535
|
+
output = stack[0]
|
|
507
536
|
logger.info(
|
|
508
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning
|
|
537
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
|
|
509
538
|
)
|
|
510
|
-
return
|
|
539
|
+
return output
|
|
511
540
|
|
|
512
541
|
output = MergeNode(
|
|
513
|
-
input_concepts=
|
|
514
|
-
output_concepts=
|
|
542
|
+
input_concepts=non_virtual,
|
|
543
|
+
output_concepts=non_virtual,
|
|
515
544
|
environment=environment,
|
|
516
545
|
g=g,
|
|
517
546
|
parents=stack,
|
|
518
547
|
depth=depth,
|
|
519
548
|
partial_concepts=all_partial,
|
|
520
|
-
# always hide merge concepts
|
|
521
|
-
hidden_concepts=[
|
|
522
|
-
x for x in mandatory_list if x.derivation == PurposeLineage.MERGE
|
|
523
|
-
],
|
|
524
549
|
)
|
|
525
550
|
|
|
526
551
|
# ensure we can resolve our final merge
|
|
@@ -573,7 +598,7 @@ def _search_concepts(
|
|
|
573
598
|
)
|
|
574
599
|
return partial_search
|
|
575
600
|
logger.error(
|
|
576
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found}"
|
|
601
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found},"
|
|
577
602
|
)
|
|
578
603
|
return None
|
|
579
604
|
|
|
@@ -8,6 +8,7 @@ from trilogy.core.processing.nodes import StrategyNode, History, MergeNode
|
|
|
8
8
|
from trilogy.core.processing.node_generators.common import (
|
|
9
9
|
resolve_function_parent_concepts,
|
|
10
10
|
)
|
|
11
|
+
from trilogy.utility import unique
|
|
11
12
|
from trilogy.constants import logger
|
|
12
13
|
|
|
13
14
|
LOGGER_PREFIX = "[GEN_BASIC_NODE]"
|
|
@@ -37,6 +38,7 @@ def gen_basic_node(
|
|
|
37
38
|
attempts.append((parent_concepts + local_optional, local_optional + [concept]))
|
|
38
39
|
|
|
39
40
|
for attempt, output in reversed(attempts):
|
|
41
|
+
attempt = unique(attempt, "address")
|
|
40
42
|
parent_node = source_concepts(
|
|
41
43
|
mandatory_list=attempt,
|
|
42
44
|
environment=environment,
|
|
@@ -56,7 +56,9 @@ def resolve_filter_parent_concepts(
|
|
|
56
56
|
base_existence = []
|
|
57
57
|
base_rows = [direct_parent]
|
|
58
58
|
base_rows += concept.lineage.where.row_arguments
|
|
59
|
-
|
|
59
|
+
# TODO: pass tuple groups through
|
|
60
|
+
for ctuple in concept.lineage.where.existence_arguments:
|
|
61
|
+
base_existence += list(ctuple)
|
|
60
62
|
if direct_parent.grain:
|
|
61
63
|
base_rows += direct_parent.grain.components_copy
|
|
62
64
|
if (
|
|
@@ -56,6 +56,7 @@ def gen_concept_merge_node(
|
|
|
56
56
|
|
|
57
57
|
# get additional concepts that should be merged across the environments
|
|
58
58
|
additional_merge: List[Concept] = [*lineage.concepts]
|
|
59
|
+
target_namespaces = set(x.namespace for x in [concept] + local_optional)
|
|
59
60
|
for x in local_optional:
|
|
60
61
|
if x.address in environment.merged_concepts:
|
|
61
62
|
ms = environment.merged_concepts[x.address].lineage
|
|
@@ -64,6 +65,8 @@ def gen_concept_merge_node(
|
|
|
64
65
|
|
|
65
66
|
for select in lineage.concepts:
|
|
66
67
|
# if it's a merge concept, filter it out of the optional
|
|
68
|
+
if select.namespace not in target_namespaces:
|
|
69
|
+
continue
|
|
67
70
|
sub_optional = [
|
|
68
71
|
x
|
|
69
72
|
for x in local_optional
|
|
@@ -76,6 +79,9 @@ def gen_concept_merge_node(
|
|
|
76
79
|
]
|
|
77
80
|
sub_optional += sub_additional_merge
|
|
78
81
|
final: List[Concept] = unique([select] + sub_optional, "address")
|
|
82
|
+
logger.info(
|
|
83
|
+
f"{padding(depth)}{LOGGER_PREFIX} generating concept merge parent node with {[x.address for x in final]}"
|
|
84
|
+
)
|
|
79
85
|
snode: StrategyNode = source_concepts(
|
|
80
86
|
mandatory_list=final,
|
|
81
87
|
environment=environment,
|
|
@@ -111,17 +117,18 @@ def gen_concept_merge_node(
|
|
|
111
117
|
|
|
112
118
|
additional_relevant = [x for x in outputs if x.address in enrichment]
|
|
113
119
|
final_outputs = outputs + additional_relevant + [concept]
|
|
120
|
+
virtual_outputs = [x for x in final_outputs if x.derivation == PurposeLineage.MERGE]
|
|
114
121
|
node = MergeNode(
|
|
115
122
|
input_concepts=[x for y in base_parents for x in y.output_concepts],
|
|
116
|
-
output_concepts=[
|
|
117
|
-
|
|
118
|
-
x for x in final_outputs if x.derivation == PurposeLineage.MERGE
|
|
123
|
+
output_concepts=[
|
|
124
|
+
x for x in final_outputs if x.derivation != PurposeLineage.MERGE
|
|
119
125
|
],
|
|
120
126
|
environment=environment,
|
|
121
127
|
g=g,
|
|
122
128
|
depth=depth,
|
|
123
129
|
parents=base_parents,
|
|
124
130
|
node_joins=node_joins,
|
|
131
|
+
virtual_output_concepts=virtual_outputs,
|
|
125
132
|
)
|
|
126
133
|
|
|
127
134
|
qds = node.rebuild_cache()
|
|
@@ -149,9 +156,17 @@ def gen_concept_merge_node(
|
|
|
149
156
|
f"{padding(depth)}{LOGGER_PREFIX} all enriched concepts returned from base merge concept node; exiting early"
|
|
150
157
|
)
|
|
151
158
|
return node
|
|
159
|
+
missing = [
|
|
160
|
+
x
|
|
161
|
+
for x in local_optional
|
|
162
|
+
if x.address not in [y.address for y in node.output_concepts]
|
|
163
|
+
]
|
|
164
|
+
logger.info(
|
|
165
|
+
f"{padding(depth)}{LOGGER_PREFIX} generating merge concept enrichment node for missing {[x.address for x in missing]}"
|
|
166
|
+
)
|
|
152
167
|
enrich_node: MergeNode = source_concepts( # this fetches the parent + join keys
|
|
153
168
|
# to then connect to the rest of the query
|
|
154
|
-
mandatory_list=additional_relevant +
|
|
169
|
+
mandatory_list=additional_relevant + missing,
|
|
155
170
|
environment=environment,
|
|
156
171
|
g=g,
|
|
157
172
|
depth=depth + 1,
|
|
@@ -159,7 +174,7 @@ def gen_concept_merge_node(
|
|
|
159
174
|
)
|
|
160
175
|
if not enrich_node:
|
|
161
176
|
logger.info(
|
|
162
|
-
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate merge concept enrichment node for {concept} with optional {local_optional}, returning just merge concept"
|
|
177
|
+
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate merge concept enrichment node for {concept.address} with optional {[x.address for x in local_optional]}, returning just merge concept"
|
|
163
178
|
)
|
|
164
179
|
return node
|
|
165
180
|
|
|
@@ -170,12 +185,12 @@ def gen_concept_merge_node(
|
|
|
170
185
|
return MergeNode(
|
|
171
186
|
input_concepts=enrich_node.output_concepts + node.output_concepts,
|
|
172
187
|
# also filter out the
|
|
173
|
-
output_concepts=
|
|
174
|
-
hidden_concepts=[
|
|
188
|
+
output_concepts=[
|
|
175
189
|
x
|
|
176
190
|
for x in node.output_concepts + local_optional
|
|
177
|
-
if x.derivation
|
|
191
|
+
if x.derivation != PurposeLineage.MERGE
|
|
178
192
|
],
|
|
193
|
+
hidden_concepts=[],
|
|
179
194
|
environment=environment,
|
|
180
195
|
g=g,
|
|
181
196
|
depth=depth,
|
|
@@ -195,4 +210,5 @@ def gen_concept_merge_node(
|
|
|
195
210
|
)
|
|
196
211
|
],
|
|
197
212
|
partial_concepts=node.partial_concepts,
|
|
213
|
+
virtual_output_concepts=virtual_outputs,
|
|
198
214
|
)
|
|
@@ -2,11 +2,14 @@ from typing import List
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
from trilogy.core.enums import JoinType
|
|
5
|
-
from trilogy.core.models import
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
from trilogy.core.models import Concept, Environment, FilterItem
|
|
6
|
+
from trilogy.core.processing.nodes import (
|
|
7
|
+
FilterNode,
|
|
8
|
+
MergeNode,
|
|
9
|
+
NodeJoin,
|
|
10
|
+
History,
|
|
11
|
+
StrategyNode,
|
|
8
12
|
)
|
|
9
|
-
from trilogy.core.processing.nodes import FilterNode, MergeNode, NodeJoin, History
|
|
10
13
|
from trilogy.core.processing.node_generators.common import (
|
|
11
14
|
resolve_filter_parent_concepts,
|
|
12
15
|
)
|
|
@@ -25,16 +28,19 @@ def gen_filter_node(
|
|
|
25
28
|
depth: int,
|
|
26
29
|
source_concepts,
|
|
27
30
|
history: History | None = None,
|
|
28
|
-
) ->
|
|
31
|
+
) -> StrategyNode | None:
|
|
29
32
|
immediate_parent, parent_row_concepts, parent_existence_concepts = (
|
|
30
33
|
resolve_filter_parent_concepts(concept)
|
|
31
34
|
)
|
|
35
|
+
if not isinstance(concept.lineage, FilterItem):
|
|
36
|
+
raise SyntaxError('Filter node must have a lineage of type "FilterItem"')
|
|
37
|
+
where = concept.lineage.where
|
|
32
38
|
|
|
33
39
|
logger.info(
|
|
34
40
|
f"{padding(depth)}{LOGGER_PREFIX} fetching filter node row parents {[x.address for x in parent_row_concepts]}"
|
|
35
41
|
)
|
|
36
42
|
core_parents = []
|
|
37
|
-
parent = source_concepts(
|
|
43
|
+
parent: StrategyNode = source_concepts(
|
|
38
44
|
mandatory_list=parent_row_concepts,
|
|
39
45
|
environment=environment,
|
|
40
46
|
g=g,
|
|
@@ -43,7 +49,28 @@ def gen_filter_node(
|
|
|
43
49
|
)
|
|
44
50
|
|
|
45
51
|
if not parent:
|
|
52
|
+
logger.info(
|
|
53
|
+
f"{padding(depth)}{LOGGER_PREFIX} filter node row parents {[x.address for x in parent_row_concepts]} could not be found"
|
|
54
|
+
)
|
|
46
55
|
return None
|
|
56
|
+
|
|
57
|
+
if not local_optional and not parent_existence_concepts:
|
|
58
|
+
optimized_pushdown = True
|
|
59
|
+
else:
|
|
60
|
+
optimized_pushdown = False
|
|
61
|
+
|
|
62
|
+
if optimized_pushdown:
|
|
63
|
+
if parent.conditions:
|
|
64
|
+
parent.conditions = parent.conditions + where.conditional
|
|
65
|
+
else:
|
|
66
|
+
parent.conditions = where.conditional
|
|
67
|
+
parent.output_concepts = [concept]
|
|
68
|
+
parent.rebuild_cache()
|
|
69
|
+
logger.info(
|
|
70
|
+
f"{padding(depth)}{LOGGER_PREFIX} returning optimized filter node with pushdown to parent with condition {where.conditional}"
|
|
71
|
+
)
|
|
72
|
+
return parent
|
|
73
|
+
|
|
47
74
|
core_parents.append(parent)
|
|
48
75
|
if parent_existence_concepts:
|
|
49
76
|
logger.info(
|
|
@@ -57,6 +84,9 @@ def gen_filter_node(
|
|
|
57
84
|
history=history,
|
|
58
85
|
)
|
|
59
86
|
if not parent_existence:
|
|
87
|
+
logger.info(
|
|
88
|
+
f"{padding(depth)}{LOGGER_PREFIX} filter existence node parents could not be found"
|
|
89
|
+
)
|
|
60
90
|
return None
|
|
61
91
|
core_parents.append(parent_existence)
|
|
62
92
|
|
|
@@ -10,6 +10,7 @@ from trilogy.utility import unique
|
|
|
10
10
|
from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
|
|
11
11
|
from trilogy.core.processing.utility import padding
|
|
12
12
|
from trilogy.core.processing.graph_utils import extract_mandatory_subgraphs
|
|
13
|
+
from trilogy.core.enums import PurposeLineage
|
|
13
14
|
|
|
14
15
|
LOGGER_PREFIX = "[GEN_MERGE_NODE]"
|
|
15
16
|
|
|
@@ -65,13 +66,13 @@ def identify_ds_join_paths(
|
|
|
65
66
|
]
|
|
66
67
|
if partial and not accept_partial:
|
|
67
68
|
return None
|
|
68
|
-
|
|
69
|
+
|
|
69
70
|
return PathInfo(
|
|
70
71
|
paths=paths,
|
|
71
72
|
datasource=datasource,
|
|
72
73
|
reduced_concepts=reduce_path_concepts(paths, g),
|
|
73
74
|
concept_subgraphs=extract_mandatory_subgraphs(paths, g),
|
|
74
|
-
)
|
|
75
|
+
)
|
|
75
76
|
return None
|
|
76
77
|
|
|
77
78
|
|
|
@@ -88,14 +89,7 @@ def gen_merge_node(
|
|
|
88
89
|
join_candidates: List[PathInfo] = []
|
|
89
90
|
# anchor on datasources
|
|
90
91
|
final_all_concepts = []
|
|
91
|
-
# implicit_upstream = {}
|
|
92
92
|
for x in all_concepts:
|
|
93
|
-
# if x.derivation in (PurposeLineage.AGGREGATE, PurposeLineage.BASIC):
|
|
94
|
-
# final_all_concepts +=resolve_function_parent_concepts(x)
|
|
95
|
-
# elif x.derivation == PurposeLineage.FILTER:
|
|
96
|
-
# final_all_concepts +=resolve_filter_parent_concepts(x)
|
|
97
|
-
# else:
|
|
98
|
-
# final_all_concepts.append(x)
|
|
99
93
|
final_all_concepts.append(x)
|
|
100
94
|
for datasource in environment.datasources.values():
|
|
101
95
|
path = identify_ds_join_paths(final_all_concepts, g, datasource, accept_partial)
|
|
@@ -104,18 +98,25 @@ def gen_merge_node(
|
|
|
104
98
|
join_candidates.sort(key=lambda x: sum([len(v) for v in x.paths.values()]))
|
|
105
99
|
if not join_candidates:
|
|
106
100
|
return None
|
|
107
|
-
|
|
108
|
-
logger.info(
|
|
109
|
-
f"{padding(depth)}{LOGGER_PREFIX} Join candidate: {join_candidate.paths}"
|
|
110
|
-
)
|
|
111
|
-
join_additions: List[set[str]] = []
|
|
101
|
+
join_additions: list[set[str]] = []
|
|
112
102
|
for candidate in join_candidates:
|
|
113
103
|
join_additions.append(candidate.reduced_concepts)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
104
|
+
|
|
105
|
+
common: set[str] = set()
|
|
106
|
+
final_candidates: list[set[str]] = []
|
|
107
|
+
# find all values that show up in every join_additions
|
|
108
|
+
for ja in join_additions:
|
|
109
|
+
if not common:
|
|
110
|
+
common = ja
|
|
111
|
+
else:
|
|
112
|
+
common = common.intersection(ja)
|
|
113
|
+
if all(ja.issubset(y) for y in join_additions):
|
|
114
|
+
final_candidates.append(ja)
|
|
115
|
+
|
|
116
|
+
if not final_candidates:
|
|
117
|
+
filtered_paths = [x.difference(common) for x in join_additions]
|
|
117
118
|
raise AmbiguousRelationshipResolutionException(
|
|
118
|
-
f"Ambiguous concept join resolution - possible paths =
|
|
119
|
+
f"Ambiguous concept join resolution fetching {[x.address for x in all_concepts]} - unique values in possible paths = {filtered_paths}. Include an additional concept to disambiguate",
|
|
119
120
|
join_additions,
|
|
120
121
|
)
|
|
121
122
|
if not join_candidates:
|
|
@@ -123,9 +124,10 @@ def gen_merge_node(
|
|
|
123
124
|
f"{padding(depth)}{LOGGER_PREFIX} No additional join candidates could be found"
|
|
124
125
|
)
|
|
125
126
|
return None
|
|
126
|
-
shortest: PathInfo = sorted(
|
|
127
|
-
|
|
128
|
-
|
|
127
|
+
shortest: PathInfo = sorted(
|
|
128
|
+
[x for x in join_candidates if x.reduced_concepts in final_candidates],
|
|
129
|
+
key=lambda x: len(x.reduced_concepts),
|
|
130
|
+
)[0]
|
|
129
131
|
logger.info(f"{padding(depth)}{LOGGER_PREFIX} final path is {shortest.paths}")
|
|
130
132
|
# logger.info(f'{padding(depth)}{LOGGER_PREFIX} final reduced concepts are {shortest.concs}')
|
|
131
133
|
parents = []
|
|
@@ -145,11 +147,20 @@ def gen_merge_node(
|
|
|
145
147
|
f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
|
|
146
148
|
)
|
|
147
149
|
return None
|
|
150
|
+
logger.info(
|
|
151
|
+
f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)}"
|
|
152
|
+
)
|
|
148
153
|
parents.append(parent)
|
|
149
154
|
|
|
150
155
|
return MergeNode(
|
|
151
|
-
input_concepts=[
|
|
152
|
-
|
|
156
|
+
input_concepts=[
|
|
157
|
+
environment.concepts[x]
|
|
158
|
+
for x in shortest.reduced_concepts
|
|
159
|
+
if environment.concepts[x].derivation != PurposeLineage.MERGE
|
|
160
|
+
],
|
|
161
|
+
output_concepts=[
|
|
162
|
+
x for x in all_concepts if x.derivation != PurposeLineage.MERGE
|
|
163
|
+
],
|
|
153
164
|
environment=environment,
|
|
154
165
|
g=g,
|
|
155
166
|
parents=parents,
|
|
@@ -35,8 +35,26 @@ def gen_rowset_node(
|
|
|
35
35
|
lineage: RowsetItem = concept.lineage
|
|
36
36
|
rowset: RowsetDerivationStatement = lineage.rowset
|
|
37
37
|
select: SelectStatement | MultiSelectStatement = lineage.rowset.select
|
|
38
|
+
parents: List[StrategyNode] = []
|
|
38
39
|
if where := select.where_clause:
|
|
39
|
-
targets = select.output_components + where.conditional.
|
|
40
|
+
targets = select.output_components + where.conditional.row_arguments
|
|
41
|
+
for sub_select in where.conditional.existence_arguments:
|
|
42
|
+
logger.info(
|
|
43
|
+
f"{padding(depth)}{LOGGER_PREFIX} generating parent existence node with {[x.address for x in sub_select]}"
|
|
44
|
+
)
|
|
45
|
+
parent_check = source_concepts(
|
|
46
|
+
mandatory_list=sub_select,
|
|
47
|
+
environment=environment,
|
|
48
|
+
g=g,
|
|
49
|
+
depth=depth + 1,
|
|
50
|
+
history=history,
|
|
51
|
+
)
|
|
52
|
+
if not parent_check:
|
|
53
|
+
logger.info(
|
|
54
|
+
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate parent existence node for rowset node for {concept}"
|
|
55
|
+
)
|
|
56
|
+
return None
|
|
57
|
+
parents.append(parent_check)
|
|
40
58
|
else:
|
|
41
59
|
targets = select.output_components
|
|
42
60
|
node: StrategyNode = source_concepts(
|
|
@@ -46,6 +64,14 @@ def gen_rowset_node(
|
|
|
46
64
|
depth=depth + 1,
|
|
47
65
|
history=history,
|
|
48
66
|
)
|
|
67
|
+
|
|
68
|
+
# add our existence concepts in
|
|
69
|
+
if parents:
|
|
70
|
+
node.parents += parents
|
|
71
|
+
for parent in parents:
|
|
72
|
+
for x in parent.output_concepts:
|
|
73
|
+
if x.address not in node.output_lcl:
|
|
74
|
+
node.existence_concepts.append(x)
|
|
49
75
|
if not node:
|
|
50
76
|
logger.info(
|
|
51
77
|
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate rowset node for {concept}"
|
|
@@ -53,11 +79,7 @@ def gen_rowset_node(
|
|
|
53
79
|
return None
|
|
54
80
|
node.conditions = select.where_clause.conditional if select.where_clause else None
|
|
55
81
|
enrichment = set([x.address for x in local_optional])
|
|
56
|
-
rowset_relevant = [
|
|
57
|
-
x
|
|
58
|
-
for x in rowset.derived_concepts
|
|
59
|
-
# if x.address == concept.address or x.address in enrichment
|
|
60
|
-
]
|
|
82
|
+
rowset_relevant = [x for x in rowset.derived_concepts]
|
|
61
83
|
select_hidden = set([x.address for x in select.hidden_components])
|
|
62
84
|
rowset_hidden = [
|
|
63
85
|
x
|
|
@@ -86,9 +108,11 @@ def gen_rowset_node(
|
|
|
86
108
|
# but don't include anything aggregate at this point
|
|
87
109
|
node.rebuild_cache()
|
|
88
110
|
assert node.resolution_cache
|
|
111
|
+
|
|
89
112
|
node.resolution_cache.grain = concept_list_to_grain(
|
|
90
113
|
node.output_concepts, parent_sources=node.resolution_cache.datasources
|
|
91
114
|
)
|
|
115
|
+
|
|
92
116
|
possible_joins = concept_to_relevant_joins(additional_relevant)
|
|
93
117
|
if not local_optional:
|
|
94
118
|
logger.info(
|
|
@@ -53,7 +53,7 @@ def dm_to_strategy_node(
|
|
|
53
53
|
# we have to group
|
|
54
54
|
else:
|
|
55
55
|
logger.info(
|
|
56
|
-
f"{padding(depth)}{LOGGER_PREFIX} not all grain components are in output {str(dm.matched)}, group to actual grain"
|
|
56
|
+
f"{padding(depth)}{LOGGER_PREFIX} not all grain components {target_grain} are in output {str(dm.matched)}, group to actual grain"
|
|
57
57
|
)
|
|
58
58
|
force_group = True
|
|
59
59
|
elif all([x in dm.matched for x in datasource.grain.components]):
|
|
@@ -76,7 +76,7 @@ def dm_to_strategy_node(
|
|
|
76
76
|
partial_concepts=dm.partial.concepts,
|
|
77
77
|
accept_partial=accept_partial,
|
|
78
78
|
datasource=datasource,
|
|
79
|
-
grain=
|
|
79
|
+
grain=datasource.grain,
|
|
80
80
|
)
|
|
81
81
|
# we need to nest the group node one further
|
|
82
82
|
if force_group is True:
|
|
@@ -317,13 +317,19 @@ def gen_select_node_from_table(
|
|
|
317
317
|
)
|
|
318
318
|
if target_grain and target_grain.issubset(datasource.grain):
|
|
319
319
|
|
|
320
|
-
if
|
|
320
|
+
if (
|
|
321
|
+
all([x in all_lcl for x in target_grain.components])
|
|
322
|
+
and target_grain == datasource.grain
|
|
323
|
+
):
|
|
324
|
+
logger.info(
|
|
325
|
+
f"{padding(depth)}{LOGGER_PREFIX} target grain components match all lcl, group to false"
|
|
326
|
+
)
|
|
321
327
|
force_group = False
|
|
322
328
|
# if we are not returning the grain
|
|
323
329
|
# we have to group
|
|
324
330
|
else:
|
|
325
331
|
logger.info(
|
|
326
|
-
f"{padding(depth)}{LOGGER_PREFIX} not all grain components are in output {str(all_lcl)}, group to actual grain"
|
|
332
|
+
f"{padding(depth)}{LOGGER_PREFIX} not all grain components {target_grain} are in output {str(all_lcl)}, group to actual grain"
|
|
327
333
|
)
|
|
328
334
|
force_group = True
|
|
329
335
|
elif all([x in all_lcl for x in datasource.grain.components]):
|
|
@@ -363,7 +369,7 @@ def gen_select_node_from_table(
|
|
|
363
369
|
else:
|
|
364
370
|
candidate = bcandidate
|
|
365
371
|
logger.info(
|
|
366
|
-
f"{padding(depth)}{LOGGER_PREFIX} found select node with {datasource.identifier}, returning {candidate.output_lcl}"
|
|
372
|
+
f"{padding(depth)}{LOGGER_PREFIX} found select node with {datasource.identifier}, force group is {force_group}, returning {candidate.output_lcl}"
|
|
367
373
|
)
|
|
368
374
|
candidates[datasource.identifier] = candidate
|
|
369
375
|
scores[datasource.identifier] = -len(partial_concepts)
|
|
@@ -467,6 +473,8 @@ def gen_select_node(
|
|
|
467
473
|
target_grain = Grain()
|
|
468
474
|
for ac in all_concepts:
|
|
469
475
|
target_grain += ac.grain
|
|
476
|
+
if target_grain.abstract:
|
|
477
|
+
target_grain = Grain(components=all_concepts)
|
|
470
478
|
if materialized_lcl != all_lcl:
|
|
471
479
|
logger.info(
|
|
472
480
|
f"{padding(depth)}{LOGGER_PREFIX} Skipping select node generation for {concept.address} "
|
|
@@ -513,13 +521,15 @@ def gen_select_node(
|
|
|
513
521
|
[c.address in [x.address for x in p.partial_concepts] for p in parents]
|
|
514
522
|
)
|
|
515
523
|
]
|
|
516
|
-
force_group =
|
|
524
|
+
force_group = None
|
|
525
|
+
inferred_grain = sum([x.grain for x in parents if x.grain], Grain())
|
|
517
526
|
for candidate in parents:
|
|
518
527
|
if candidate.grain and not candidate.grain.issubset(target_grain):
|
|
519
528
|
force_group = True
|
|
520
529
|
if len(parents) == 1:
|
|
521
530
|
candidate = parents[0]
|
|
522
531
|
else:
|
|
532
|
+
|
|
523
533
|
candidate = MergeNode(
|
|
524
534
|
output_concepts=[concept] + found,
|
|
525
535
|
input_concepts=[concept] + found,
|
|
@@ -528,13 +538,13 @@ def gen_select_node(
|
|
|
528
538
|
parents=parents,
|
|
529
539
|
depth=depth,
|
|
530
540
|
partial_concepts=all_partial,
|
|
531
|
-
grain=
|
|
541
|
+
grain=inferred_grain,
|
|
532
542
|
)
|
|
533
543
|
candidate.depth += 1
|
|
534
|
-
source_grain = candidate.grain
|
|
544
|
+
# source_grain = candidate.grain
|
|
535
545
|
if force_group:
|
|
536
546
|
logger.info(
|
|
537
|
-
f"{padding(depth)}{LOGGER_PREFIX} datasource grain {
|
|
547
|
+
f"{padding(depth)}{LOGGER_PREFIX} datasource grain {inferred_grain} does not match target grain {target_grain} for select, adding group node"
|
|
538
548
|
)
|
|
539
549
|
return GroupNode(
|
|
540
550
|
output_concepts=candidate.output_concepts,
|
|
@@ -545,6 +555,10 @@ def gen_select_node(
|
|
|
545
555
|
depth=depth,
|
|
546
556
|
partial_concepts=candidate.partial_concepts,
|
|
547
557
|
)
|
|
558
|
+
else:
|
|
559
|
+
logger.info(
|
|
560
|
+
f"{padding(depth)}{LOGGER_PREFIX} datasource grain {inferred_grain} matches target grain {target_grain} for select, returning without group"
|
|
561
|
+
)
|
|
548
562
|
return candidate
|
|
549
563
|
|
|
550
564
|
if not accept_partial_optional:
|