pytrilogy 0.0.1.118__py3-none-any.whl → 0.0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.2.dist-info}/METADATA +1 -1
- pytrilogy-0.0.2.2.dist-info/RECORD +82 -0
- {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.2.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +6 -0
- trilogy/core/enums.py +7 -2
- trilogy/core/env_processor.py +43 -19
- trilogy/core/functions.py +1 -0
- trilogy/core/models.py +674 -146
- trilogy/core/optimization.py +31 -28
- trilogy/core/optimizations/inline_constant.py +4 -1
- trilogy/core/optimizations/inline_datasource.py +25 -4
- trilogy/core/optimizations/predicate_pushdown.py +94 -54
- trilogy/core/processing/concept_strategies_v3.py +69 -39
- trilogy/core/processing/graph_utils.py +3 -3
- trilogy/core/processing/node_generators/__init__.py +0 -2
- trilogy/core/processing/node_generators/basic_node.py +30 -17
- trilogy/core/processing/node_generators/filter_node.py +3 -1
- trilogy/core/processing/node_generators/node_merge_node.py +345 -96
- trilogy/core/processing/node_generators/rowset_node.py +18 -16
- trilogy/core/processing/node_generators/select_node.py +45 -85
- trilogy/core/processing/nodes/__init__.py +2 -0
- trilogy/core/processing/nodes/base_node.py +22 -5
- trilogy/core/processing/nodes/filter_node.py +3 -0
- trilogy/core/processing/nodes/group_node.py +20 -2
- trilogy/core/processing/nodes/merge_node.py +32 -18
- trilogy/core/processing/nodes/select_node_v2.py +17 -3
- trilogy/core/processing/utility.py +100 -8
- trilogy/core/query_processor.py +77 -24
- trilogy/dialect/base.py +11 -46
- trilogy/dialect/bigquery.py +1 -1
- trilogy/dialect/common.py +11 -0
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/presto.py +1 -0
- trilogy/hooks/graph_hook.py +50 -5
- trilogy/hooks/query_debugger.py +1 -0
- trilogy/parsing/common.py +8 -5
- trilogy/parsing/parse_engine.py +52 -27
- trilogy/parsing/render.py +20 -9
- trilogy/parsing/trilogy.lark +13 -8
- pytrilogy-0.0.1.118.dist-info/RECORD +0 -83
- trilogy/core/processing/node_generators/concept_merge_node.py +0 -214
- {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.2.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.2.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.2.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import List, Optional
|
|
1
|
+
from typing import List, Optional, Callable
|
|
3
2
|
|
|
4
3
|
from trilogy.core.enums import PurposeLineage
|
|
5
4
|
from trilogy.core.models import (
|
|
@@ -43,9 +42,11 @@ def dm_to_strategy_node(
|
|
|
43
42
|
environment: Environment,
|
|
44
43
|
g: nx.DiGraph,
|
|
45
44
|
depth: int,
|
|
45
|
+
source_concepts: Callable,
|
|
46
46
|
accept_partial: bool = False,
|
|
47
47
|
) -> StrategyNode:
|
|
48
48
|
datasource = dm.datasource
|
|
49
|
+
|
|
49
50
|
if target_grain and target_grain.issubset(datasource.grain):
|
|
50
51
|
if all([x in dm.matched for x in target_grain.components]):
|
|
51
52
|
force_group = False
|
|
@@ -66,6 +67,17 @@ def dm_to_strategy_node(
|
|
|
66
67
|
f"{padding(depth)}{LOGGER_PREFIX} target grain is not subset of datasource grain {datasource.grain}, required to group"
|
|
67
68
|
)
|
|
68
69
|
force_group = True
|
|
70
|
+
# if isinstance(datasource, MergeDatasource):
|
|
71
|
+
# # if we're within a namespace, don't find merge nodes
|
|
72
|
+
# bcandidate: StrategyNode = gen_environment_merge_node(
|
|
73
|
+
# all_concepts=dm.matched.concepts,
|
|
74
|
+
# environment=environment,
|
|
75
|
+
# g=g,
|
|
76
|
+
# depth=depth,
|
|
77
|
+
# datasource=datasource,
|
|
78
|
+
# source_concepts=source_concepts,
|
|
79
|
+
# )
|
|
80
|
+
# else:
|
|
69
81
|
bcandidate: StrategyNode = SelectNode(
|
|
70
82
|
input_concepts=[c.concept for c in datasource.columns],
|
|
71
83
|
output_concepts=dm.matched.concepts,
|
|
@@ -77,6 +89,7 @@ def dm_to_strategy_node(
|
|
|
77
89
|
accept_partial=accept_partial,
|
|
78
90
|
datasource=datasource,
|
|
79
91
|
grain=datasource.grain,
|
|
92
|
+
conditions=datasource.where.conditional if datasource.where else None,
|
|
80
93
|
)
|
|
81
94
|
# we need to nest the group node one further
|
|
82
95
|
if force_group is True:
|
|
@@ -101,6 +114,7 @@ def gen_select_nodes_from_tables_v2(
|
|
|
101
114
|
environment: Environment,
|
|
102
115
|
depth: int,
|
|
103
116
|
target_grain: Grain,
|
|
117
|
+
source_concepts: Callable,
|
|
104
118
|
accept_partial: bool = False,
|
|
105
119
|
) -> tuple[bool, list[Concept], list[StrategyNode]]:
|
|
106
120
|
# if we have only constants
|
|
@@ -168,6 +182,7 @@ def gen_select_nodes_from_tables_v2(
|
|
|
168
182
|
if ds_valid and address_valid:
|
|
169
183
|
matched_paths.append(path)
|
|
170
184
|
matched.append(all_lcl.concepts[idx])
|
|
185
|
+
|
|
171
186
|
except nx.NodeNotFound:
|
|
172
187
|
continue
|
|
173
188
|
except nx.exception.NetworkXNoPath:
|
|
@@ -206,6 +221,7 @@ def gen_select_nodes_from_tables_v2(
|
|
|
206
221
|
- 0.1 * len(matches[x].partial.addresses),
|
|
207
222
|
)
|
|
208
223
|
final: DatasourceMatch = matches[final_key]
|
|
224
|
+
|
|
209
225
|
candidate = dm_to_strategy_node(
|
|
210
226
|
final,
|
|
211
227
|
target_grain=Grain(
|
|
@@ -217,6 +233,7 @@ def gen_select_nodes_from_tables_v2(
|
|
|
217
233
|
g=g,
|
|
218
234
|
depth=depth,
|
|
219
235
|
accept_partial=accept_partial,
|
|
236
|
+
source_concepts=source_concepts,
|
|
220
237
|
)
|
|
221
238
|
to_return.append(candidate)
|
|
222
239
|
del matches[final_key]
|
|
@@ -233,6 +250,7 @@ def gen_select_node_from_table(
|
|
|
233
250
|
environment: Environment,
|
|
234
251
|
depth: int,
|
|
235
252
|
target_grain: Grain,
|
|
253
|
+
source_concepts,
|
|
236
254
|
accept_partial: bool = False,
|
|
237
255
|
) -> Optional[StrategyNode]:
|
|
238
256
|
# if we have only constants
|
|
@@ -277,9 +295,7 @@ def gen_select_node_from_table(
|
|
|
277
295
|
try:
|
|
278
296
|
g.nodes[ncandidate]
|
|
279
297
|
except KeyError:
|
|
280
|
-
raise
|
|
281
|
-
"Could not find node for {}".format(ncandidate)
|
|
282
|
-
)
|
|
298
|
+
raise nx.exception.NetworkXNoPath
|
|
283
299
|
raise e
|
|
284
300
|
except nx.exception.NetworkXNoPath:
|
|
285
301
|
all_found = False
|
|
@@ -354,6 +370,7 @@ def gen_select_node_from_table(
|
|
|
354
370
|
accept_partial=accept_partial,
|
|
355
371
|
datasource=datasource,
|
|
356
372
|
grain=Grain(components=all_concepts),
|
|
373
|
+
conditions=datasource.where.conditional if datasource.where else None,
|
|
357
374
|
)
|
|
358
375
|
# we need to nest the group node one further
|
|
359
376
|
if force_group is True:
|
|
@@ -379,81 +396,13 @@ def gen_select_node_from_table(
|
|
|
379
396
|
return candidates[final]
|
|
380
397
|
|
|
381
398
|
|
|
382
|
-
def gen_select_nodes_from_tables(
|
|
383
|
-
local_optional: List[Concept],
|
|
384
|
-
depth: int,
|
|
385
|
-
concept: Concept,
|
|
386
|
-
environment: Environment,
|
|
387
|
-
g: nx.DiGraph,
|
|
388
|
-
accept_partial: bool,
|
|
389
|
-
all_concepts: List[Concept],
|
|
390
|
-
) -> tuple[bool, list[Concept], list[StrategyNode]]:
|
|
391
|
-
parents: List[StrategyNode] = []
|
|
392
|
-
found: List[Concept] = []
|
|
393
|
-
logger.info(
|
|
394
|
-
f"{padding(depth)}{LOGGER_PREFIX} looking for multiple sources that can satisfy"
|
|
395
|
-
)
|
|
396
|
-
all_found = False
|
|
397
|
-
unreachable: list[str] = []
|
|
398
|
-
# first pass
|
|
399
|
-
for opt_con in local_optional:
|
|
400
|
-
ds = gen_select_node_from_table(
|
|
401
|
-
concept,
|
|
402
|
-
[concept, opt_con],
|
|
403
|
-
g=g,
|
|
404
|
-
environment=environment,
|
|
405
|
-
depth=depth + 1,
|
|
406
|
-
accept_partial=accept_partial,
|
|
407
|
-
target_grain=Grain(components=all_concepts),
|
|
408
|
-
)
|
|
409
|
-
if not ds:
|
|
410
|
-
unreachable.append(opt_con.address)
|
|
411
|
-
all_found = False
|
|
412
|
-
for x in reversed(range(1, len(local_optional) + 1)):
|
|
413
|
-
if all_found:
|
|
414
|
-
break
|
|
415
|
-
for combo in combinations(local_optional, x):
|
|
416
|
-
if all_found:
|
|
417
|
-
break
|
|
418
|
-
# filter to just the original ones we need to get
|
|
419
|
-
local_combo = [
|
|
420
|
-
x for x in combo if x not in found and x.address not in unreachable
|
|
421
|
-
]
|
|
422
|
-
# skip if nothing new in this combo
|
|
423
|
-
if not local_combo:
|
|
424
|
-
continue
|
|
425
|
-
# include core concept as join
|
|
426
|
-
all_concepts = [concept, *local_combo]
|
|
427
|
-
|
|
428
|
-
ds = gen_select_node_from_table(
|
|
429
|
-
concept,
|
|
430
|
-
all_concepts,
|
|
431
|
-
g=g,
|
|
432
|
-
environment=environment,
|
|
433
|
-
depth=depth + 1,
|
|
434
|
-
accept_partial=accept_partial,
|
|
435
|
-
target_grain=Grain(components=all_concepts),
|
|
436
|
-
)
|
|
437
|
-
if ds:
|
|
438
|
-
logger.info(
|
|
439
|
-
f"{padding(depth)}{LOGGER_PREFIX} found a source with {[x.address for x in all_concepts]}"
|
|
440
|
-
)
|
|
441
|
-
parents.append(ds)
|
|
442
|
-
found += [x for x in ds.output_concepts if x != concept]
|
|
443
|
-
if {x.address for x in found} == {c.address for c in local_optional}:
|
|
444
|
-
logger.info(
|
|
445
|
-
f"{padding(depth)}{LOGGER_PREFIX} found all optional {[c.address for c in local_optional]}"
|
|
446
|
-
)
|
|
447
|
-
all_found = True
|
|
448
|
-
return all_found, found, parents
|
|
449
|
-
|
|
450
|
-
|
|
451
399
|
def gen_select_node(
|
|
452
400
|
concept: Concept,
|
|
453
401
|
local_optional: List[Concept],
|
|
454
402
|
environment: Environment,
|
|
455
403
|
g,
|
|
456
404
|
depth: int,
|
|
405
|
+
source_concepts,
|
|
457
406
|
accept_partial: bool = False,
|
|
458
407
|
fail_if_not_found: bool = True,
|
|
459
408
|
accept_partial_optional: bool = True,
|
|
@@ -477,8 +426,8 @@ def gen_select_node(
|
|
|
477
426
|
target_grain = Grain(components=all_concepts)
|
|
478
427
|
if materialized_lcl != all_lcl:
|
|
479
428
|
logger.info(
|
|
480
|
-
f"{padding(depth)}{LOGGER_PREFIX} Skipping select node generation for {concept.address}
|
|
481
|
-
f" as it + optional (looking for all {all_lcl}) includes non-materialized concepts
|
|
429
|
+
f"{padding(depth)}{LOGGER_PREFIX} Skipping select node generation for {concept.address}"
|
|
430
|
+
f" as it + optional (looking for all {all_lcl}) includes non-materialized concepts"
|
|
482
431
|
)
|
|
483
432
|
if fail_if_not_found:
|
|
484
433
|
raise NoDatasourceException(f"No datasource exists for {concept}")
|
|
@@ -495,6 +444,7 @@ def gen_select_node(
|
|
|
495
444
|
depth=depth,
|
|
496
445
|
accept_partial=accept_partial,
|
|
497
446
|
target_grain=target_grain,
|
|
447
|
+
source_concepts=source_concepts,
|
|
498
448
|
)
|
|
499
449
|
if ds:
|
|
500
450
|
logger.info(
|
|
@@ -503,17 +453,16 @@ def gen_select_node(
|
|
|
503
453
|
return ds
|
|
504
454
|
# if we cannot find a match
|
|
505
455
|
all_found, found, parents = gen_select_nodes_from_tables_v2(
|
|
506
|
-
concept,
|
|
456
|
+
concept,
|
|
457
|
+
all_concepts,
|
|
458
|
+
g,
|
|
459
|
+
environment,
|
|
460
|
+
depth=depth,
|
|
461
|
+
target_grain=target_grain,
|
|
462
|
+
accept_partial=accept_partial,
|
|
463
|
+
source_concepts=source_concepts,
|
|
507
464
|
)
|
|
508
465
|
if parents and (all_found or accept_partial_optional):
|
|
509
|
-
if all_found:
|
|
510
|
-
logger.info(
|
|
511
|
-
f"{padding(depth)}{LOGGER_PREFIX} found all optional {[c.address for c in local_optional]} via joins"
|
|
512
|
-
)
|
|
513
|
-
else:
|
|
514
|
-
logger.info(
|
|
515
|
-
f"{padding(depth)}{LOGGER_PREFIX} found some optional, returning"
|
|
516
|
-
)
|
|
517
466
|
all_partial = [
|
|
518
467
|
c
|
|
519
468
|
for c in all_concepts
|
|
@@ -521,6 +470,16 @@ def gen_select_node(
|
|
|
521
470
|
[c.address in [x.address for x in p.partial_concepts] for p in parents]
|
|
522
471
|
)
|
|
523
472
|
]
|
|
473
|
+
|
|
474
|
+
if all_found:
|
|
475
|
+
logger.info(
|
|
476
|
+
f"{padding(depth)}{LOGGER_PREFIX} found all optional {[c.address for c in local_optional]} via joins"
|
|
477
|
+
)
|
|
478
|
+
else:
|
|
479
|
+
logger.info(
|
|
480
|
+
f"{padding(depth)}{LOGGER_PREFIX} found some optional {[x.address for x in found]}, and partial return allowed: returning"
|
|
481
|
+
)
|
|
482
|
+
|
|
524
483
|
force_group = None
|
|
525
484
|
inferred_grain = sum([x.grain for x in parents if x.grain], Grain())
|
|
526
485
|
for candidate in parents:
|
|
@@ -571,6 +530,7 @@ def gen_select_node(
|
|
|
571
530
|
depth=depth,
|
|
572
531
|
accept_partial=accept_partial,
|
|
573
532
|
target_grain=Grain(components=[concept]),
|
|
533
|
+
source_concepts=source_concepts,
|
|
574
534
|
)
|
|
575
535
|
|
|
576
536
|
if not ds and fail_if_not_found:
|
|
@@ -93,6 +93,7 @@ class History(BaseModel):
|
|
|
93
93
|
environment: Environment,
|
|
94
94
|
g,
|
|
95
95
|
depth: int,
|
|
96
|
+
source_concepts,
|
|
96
97
|
fail_if_not_found: bool = False,
|
|
97
98
|
accept_partial: bool = False,
|
|
98
99
|
accept_partial_optional: bool = False,
|
|
@@ -117,6 +118,7 @@ class History(BaseModel):
|
|
|
117
118
|
fail_if_not_found=fail_if_not_found,
|
|
118
119
|
accept_partial=accept_partial,
|
|
119
120
|
accept_partial_optional=accept_partial_optional,
|
|
121
|
+
source_concepts=source_concepts,
|
|
120
122
|
)
|
|
121
123
|
self.select_history[fingerprint] = gen
|
|
122
124
|
return gen
|
|
@@ -17,7 +17,6 @@ from trilogy.core.models import (
|
|
|
17
17
|
from trilogy.core.enums import Purpose, JoinType, PurposeLineage, Granularity
|
|
18
18
|
from trilogy.utility import unique
|
|
19
19
|
from dataclasses import dataclass
|
|
20
|
-
from trilogy.constants import logger
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
def concept_list_to_grain(
|
|
@@ -59,7 +58,6 @@ def resolve_concept_map(
|
|
|
59
58
|
inherited = set([t.address for t in inherited_inputs])
|
|
60
59
|
for input in inputs:
|
|
61
60
|
for concept in input.output_concepts:
|
|
62
|
-
logger.info(concept.address)
|
|
63
61
|
if concept.address not in input.non_partial_concept_addresses:
|
|
64
62
|
continue
|
|
65
63
|
if concept.address not in inherited:
|
|
@@ -152,11 +150,26 @@ class StrategyNode:
|
|
|
152
150
|
if not parent:
|
|
153
151
|
raise SyntaxError("Unresolvable parent")
|
|
154
152
|
|
|
155
|
-
def
|
|
156
|
-
|
|
153
|
+
def add_output_concepts(self, concepts: List[Concept]):
|
|
154
|
+
for concept in concepts:
|
|
155
|
+
self.output_concepts.append(concept)
|
|
157
156
|
self.output_lcl = LooseConceptList(concepts=self.output_concepts)
|
|
158
157
|
self.rebuild_cache()
|
|
159
158
|
|
|
159
|
+
def add_output_concept(self, concept: Concept):
|
|
160
|
+
self.add_output_concepts([concept])
|
|
161
|
+
|
|
162
|
+
def hide_output_concepts(self, concepts: List[Concept]):
|
|
163
|
+
for x in concepts:
|
|
164
|
+
self.hidden_concepts.append(x)
|
|
165
|
+
self.rebuild_cache()
|
|
166
|
+
|
|
167
|
+
def remove_output_concepts(self, concepts: List[Concept]):
|
|
168
|
+
for x in concepts:
|
|
169
|
+
self.hidden_concepts.append(x)
|
|
170
|
+
self.output_concepts = [x for x in self.output_concepts if x not in concepts]
|
|
171
|
+
self.rebuild_cache()
|
|
172
|
+
|
|
160
173
|
@property
|
|
161
174
|
def logging_prefix(self) -> str:
|
|
162
175
|
return "\t" * self.depth
|
|
@@ -167,7 +180,7 @@ class StrategyNode:
|
|
|
167
180
|
|
|
168
181
|
@property
|
|
169
182
|
def all_used_concepts(self) -> list[Concept]:
|
|
170
|
-
return [*self.input_concepts]
|
|
183
|
+
return [*self.input_concepts, *self.existence_concepts]
|
|
171
184
|
|
|
172
185
|
def __repr__(self):
|
|
173
186
|
concepts = self.all_concepts
|
|
@@ -196,6 +209,7 @@ class StrategyNode:
|
|
|
196
209
|
condition=self.conditions,
|
|
197
210
|
partial_concepts=self.partial_concepts,
|
|
198
211
|
force_group=self.force_group,
|
|
212
|
+
hidden_concepts=self.hidden_concepts,
|
|
199
213
|
)
|
|
200
214
|
|
|
201
215
|
def rebuild_cache(self) -> QueryDatasource:
|
|
@@ -239,8 +253,11 @@ class NodeJoin:
|
|
|
239
253
|
concepts: List[Concept]
|
|
240
254
|
join_type: JoinType
|
|
241
255
|
filter_to_mutual: bool = False
|
|
256
|
+
concept_pairs: list[tuple[Concept, Concept]] | None = None
|
|
242
257
|
|
|
243
258
|
def __post_init__(self):
|
|
259
|
+
if self.concept_pairs:
|
|
260
|
+
return
|
|
244
261
|
final_concepts = []
|
|
245
262
|
for concept in self.concepts:
|
|
246
263
|
include = True
|
|
@@ -7,6 +7,7 @@ from trilogy.core.models import (
|
|
|
7
7
|
Conditional,
|
|
8
8
|
Comparison,
|
|
9
9
|
Parenthetical,
|
|
10
|
+
Grain,
|
|
10
11
|
)
|
|
11
12
|
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
12
13
|
|
|
@@ -34,6 +35,7 @@ class FilterNode(StrategyNode):
|
|
|
34
35
|
conditions: Conditional | Comparison | Parenthetical | None = None,
|
|
35
36
|
partial_concepts: List[Concept] | None = None,
|
|
36
37
|
force_group: bool | None = False,
|
|
38
|
+
grain: Grain | None = None,
|
|
37
39
|
):
|
|
38
40
|
super().__init__(
|
|
39
41
|
output_concepts=output_concepts,
|
|
@@ -46,6 +48,7 @@ class FilterNode(StrategyNode):
|
|
|
46
48
|
conditions=conditions,
|
|
47
49
|
partial_concepts=partial_concepts,
|
|
48
50
|
force_group=force_group,
|
|
51
|
+
grain=grain,
|
|
49
52
|
)
|
|
50
53
|
|
|
51
54
|
def copy(self) -> "FilterNode":
|
|
@@ -9,12 +9,16 @@ from trilogy.core.models import (
|
|
|
9
9
|
Concept,
|
|
10
10
|
Environment,
|
|
11
11
|
LooseConceptList,
|
|
12
|
+
Conditional,
|
|
13
|
+
Comparison,
|
|
14
|
+
Parenthetical,
|
|
12
15
|
)
|
|
13
16
|
from trilogy.core.processing.nodes.base_node import (
|
|
14
17
|
StrategyNode,
|
|
15
18
|
resolve_concept_map,
|
|
16
19
|
concept_list_to_grain,
|
|
17
20
|
)
|
|
21
|
+
from trilogy.utility import unique
|
|
18
22
|
|
|
19
23
|
|
|
20
24
|
LOGGER_PREFIX = "[CONCEPT DETAIL - GROUP NODE]"
|
|
@@ -34,6 +38,7 @@ class GroupNode(StrategyNode):
|
|
|
34
38
|
depth: int = 0,
|
|
35
39
|
partial_concepts: Optional[List[Concept]] = None,
|
|
36
40
|
force_group: bool | None = None,
|
|
41
|
+
conditions: Conditional | Comparison | Parenthetical | None = None,
|
|
37
42
|
):
|
|
38
43
|
super().__init__(
|
|
39
44
|
input_concepts=input_concepts,
|
|
@@ -45,6 +50,7 @@ class GroupNode(StrategyNode):
|
|
|
45
50
|
depth=depth,
|
|
46
51
|
partial_concepts=partial_concepts,
|
|
47
52
|
force_group=force_group,
|
|
53
|
+
conditions=conditions,
|
|
48
54
|
)
|
|
49
55
|
|
|
50
56
|
def _resolve(self) -> QueryDatasource:
|
|
@@ -79,7 +85,10 @@ class GroupNode(StrategyNode):
|
|
|
79
85
|
logger.info(
|
|
80
86
|
f"{self.logging_prefix}{LOGGER_PREFIX} No group by required, returning parent node"
|
|
81
87
|
)
|
|
82
|
-
|
|
88
|
+
will_return: QueryDatasource = parent_sources[0]
|
|
89
|
+
if self.conditions:
|
|
90
|
+
will_return.condition = self.conditions + will_return.condition
|
|
91
|
+
return will_return
|
|
83
92
|
# otherwise if no group by, just treat it as a select
|
|
84
93
|
source_type = SourceType.SELECT
|
|
85
94
|
else:
|
|
@@ -109,7 +118,15 @@ class GroupNode(StrategyNode):
|
|
|
109
118
|
source_type=source_type,
|
|
110
119
|
source_map=resolve_concept_map(
|
|
111
120
|
parent_sources,
|
|
112
|
-
targets=self.output_concepts,
|
|
121
|
+
# targets = self.output_concepts,
|
|
122
|
+
targets=(
|
|
123
|
+
unique(
|
|
124
|
+
self.output_concepts + self.conditions.concept_arguments,
|
|
125
|
+
"address",
|
|
126
|
+
)
|
|
127
|
+
if self.conditions
|
|
128
|
+
else self.output_concepts
|
|
129
|
+
),
|
|
113
130
|
inherited_inputs=self.input_concepts,
|
|
114
131
|
),
|
|
115
132
|
joins=[],
|
|
@@ -129,4 +146,5 @@ class GroupNode(StrategyNode):
|
|
|
129
146
|
depth=self.depth,
|
|
130
147
|
partial_concepts=list(self.partial_concepts),
|
|
131
148
|
force_group=self.force_group,
|
|
149
|
+
conditions=self.conditions,
|
|
132
150
|
)
|
|
@@ -14,6 +14,7 @@ from trilogy.core.models import (
|
|
|
14
14
|
Conditional,
|
|
15
15
|
Comparison,
|
|
16
16
|
Parenthetical,
|
|
17
|
+
Environment,
|
|
17
18
|
)
|
|
18
19
|
from trilogy.utility import unique
|
|
19
20
|
from trilogy.core.processing.nodes.base_node import (
|
|
@@ -108,6 +109,7 @@ class MergeNode(StrategyNode):
|
|
|
108
109
|
conditions: Conditional | Comparison | Parenthetical | None = None,
|
|
109
110
|
hidden_concepts: List[Concept] | None = None,
|
|
110
111
|
virtual_output_concepts: List[Concept] | None = None,
|
|
112
|
+
existence_concepts: List[Concept] | None = None,
|
|
111
113
|
):
|
|
112
114
|
super().__init__(
|
|
113
115
|
input_concepts=input_concepts,
|
|
@@ -123,18 +125,19 @@ class MergeNode(StrategyNode):
|
|
|
123
125
|
conditions=conditions,
|
|
124
126
|
hidden_concepts=hidden_concepts,
|
|
125
127
|
virtual_output_concepts=virtual_output_concepts,
|
|
128
|
+
existence_concepts=existence_concepts,
|
|
126
129
|
)
|
|
127
130
|
self.join_concepts = join_concepts
|
|
128
131
|
self.force_join_type = force_join_type
|
|
129
|
-
self.node_joins = node_joins
|
|
132
|
+
self.node_joins: List[NodeJoin] | None = node_joins
|
|
130
133
|
|
|
131
|
-
final_joins = []
|
|
132
|
-
if self.node_joins:
|
|
134
|
+
final_joins: List[NodeJoin] = []
|
|
135
|
+
if self.node_joins is not None:
|
|
133
136
|
for join in self.node_joins:
|
|
134
137
|
if join.left_node.resolve().name == join.right_node.resolve().name:
|
|
135
138
|
continue
|
|
136
139
|
final_joins.append(join)
|
|
137
|
-
|
|
140
|
+
self.node_joins = final_joins
|
|
138
141
|
|
|
139
142
|
def translate_node_joins(self, node_joins: List[NodeJoin]) -> List[BaseJoin]:
|
|
140
143
|
joins = []
|
|
@@ -149,6 +152,7 @@ class MergeNode(StrategyNode):
|
|
|
149
152
|
right_datasource=right,
|
|
150
153
|
join_type=join.join_type,
|
|
151
154
|
concepts=join.concepts,
|
|
155
|
+
concept_pairs=join.concept_pairs,
|
|
152
156
|
)
|
|
153
157
|
)
|
|
154
158
|
return joins
|
|
@@ -175,7 +179,12 @@ class MergeNode(StrategyNode):
|
|
|
175
179
|
return joins
|
|
176
180
|
|
|
177
181
|
def generate_joins(
|
|
178
|
-
self,
|
|
182
|
+
self,
|
|
183
|
+
final_datasets,
|
|
184
|
+
final_joins: List[NodeJoin] | None,
|
|
185
|
+
pregrain: Grain,
|
|
186
|
+
grain: Grain,
|
|
187
|
+
environment: Environment,
|
|
179
188
|
) -> List[BaseJoin]:
|
|
180
189
|
# only finally, join between them for unique values
|
|
181
190
|
dataset_list: List[QueryDatasource] = sorted(
|
|
@@ -186,15 +195,14 @@ class MergeNode(StrategyNode):
|
|
|
186
195
|
f"{self.logging_prefix}{LOGGER_PREFIX} Merge node has {len(dataset_list)} parents, starting merge"
|
|
187
196
|
)
|
|
188
197
|
for item in dataset_list:
|
|
189
|
-
logger.info(f"{self.logging_prefix}{LOGGER_PREFIX} for {item.full_name}")
|
|
190
198
|
logger.info(
|
|
191
|
-
f"{self.logging_prefix}{LOGGER_PREFIX} partial concepts {[x.address for x in item.partial_concepts]}"
|
|
199
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} for {item.full_name} partial concepts {[x.address for x in item.partial_concepts]}"
|
|
192
200
|
)
|
|
193
201
|
logger.info(
|
|
194
202
|
f"{self.logging_prefix}{LOGGER_PREFIX} potential merge keys {[x.address+str(x.purpose) for x in item.output_concepts]} partial {[x.address for x in item.partial_concepts]}"
|
|
195
203
|
)
|
|
196
204
|
|
|
197
|
-
if
|
|
205
|
+
if final_joins is None:
|
|
198
206
|
if not pregrain.components:
|
|
199
207
|
logger.info(
|
|
200
208
|
f"{self.logging_prefix}{LOGGER_PREFIX} no grain components, doing full join"
|
|
@@ -204,7 +212,7 @@ class MergeNode(StrategyNode):
|
|
|
204
212
|
logger.info(
|
|
205
213
|
f"{self.logging_prefix}{LOGGER_PREFIX} inferring node joins to target grain {str(grain)}"
|
|
206
214
|
)
|
|
207
|
-
joins = get_node_joins(dataset_list, grain.components)
|
|
215
|
+
joins = get_node_joins(dataset_list, grain.components, environment)
|
|
208
216
|
elif final_joins:
|
|
209
217
|
logger.info(
|
|
210
218
|
f"{self.logging_prefix}{LOGGER_PREFIX} translating provided node joins {len(final_joins)}"
|
|
@@ -224,7 +232,7 @@ class MergeNode(StrategyNode):
|
|
|
224
232
|
p.resolve() for p in self.parents
|
|
225
233
|
]
|
|
226
234
|
merged: dict[str, QueryDatasource | Datasource] = {}
|
|
227
|
-
final_joins = self.node_joins
|
|
235
|
+
final_joins: List[NodeJoin] | None = self.node_joins
|
|
228
236
|
for source in parent_sources:
|
|
229
237
|
if source.full_name in merged:
|
|
230
238
|
logger.info(
|
|
@@ -280,12 +288,16 @@ class MergeNode(StrategyNode):
|
|
|
280
288
|
for source in final_datasets:
|
|
281
289
|
pregrain += source.grain
|
|
282
290
|
|
|
283
|
-
grain =
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
291
|
+
grain = (
|
|
292
|
+
self.grain
|
|
293
|
+
if self.grain
|
|
294
|
+
else Grain(
|
|
295
|
+
components=[
|
|
296
|
+
c
|
|
297
|
+
for c in pregrain.components
|
|
298
|
+
if c.address in [x.address for x in self.output_concepts]
|
|
299
|
+
]
|
|
300
|
+
)
|
|
289
301
|
)
|
|
290
302
|
|
|
291
303
|
logger.info(
|
|
@@ -293,7 +305,9 @@ class MergeNode(StrategyNode):
|
|
|
293
305
|
)
|
|
294
306
|
|
|
295
307
|
if len(final_datasets) > 1:
|
|
296
|
-
joins = self.generate_joins(
|
|
308
|
+
joins = self.generate_joins(
|
|
309
|
+
final_datasets, final_joins, pregrain, grain, self.environment
|
|
310
|
+
)
|
|
297
311
|
else:
|
|
298
312
|
joins = []
|
|
299
313
|
|
|
@@ -318,7 +332,7 @@ class MergeNode(StrategyNode):
|
|
|
318
332
|
|
|
319
333
|
qd_joins: List[BaseJoin | UnnestJoin] = [*joins]
|
|
320
334
|
source_map = resolve_concept_map(
|
|
321
|
-
|
|
335
|
+
list(merged.values()),
|
|
322
336
|
targets=self.output_concepts,
|
|
323
337
|
inherited_inputs=self.input_concepts + self.existence_concepts,
|
|
324
338
|
full_joins=full_join_concepts,
|
|
@@ -13,6 +13,9 @@ from trilogy.core.models import (
|
|
|
13
13
|
Environment,
|
|
14
14
|
UnnestJoin,
|
|
15
15
|
Datasource,
|
|
16
|
+
Conditional,
|
|
17
|
+
Comparison,
|
|
18
|
+
Parenthetical,
|
|
16
19
|
)
|
|
17
20
|
from trilogy.utility import unique
|
|
18
21
|
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
@@ -43,6 +46,8 @@ class SelectNode(StrategyNode):
|
|
|
43
46
|
accept_partial: bool = False,
|
|
44
47
|
grain: Optional[Grain] = None,
|
|
45
48
|
force_group: bool | None = False,
|
|
49
|
+
conditions: Conditional | Comparison | Parenthetical | None = None,
|
|
50
|
+
hidden_concepts: List[Concept] | None = None,
|
|
46
51
|
):
|
|
47
52
|
super().__init__(
|
|
48
53
|
input_concepts=input_concepts,
|
|
@@ -55,6 +60,8 @@ class SelectNode(StrategyNode):
|
|
|
55
60
|
partial_concepts=partial_concepts,
|
|
56
61
|
force_group=force_group,
|
|
57
62
|
grain=grain,
|
|
63
|
+
conditions=conditions,
|
|
64
|
+
hidden_concepts=hidden_concepts,
|
|
58
65
|
)
|
|
59
66
|
self.accept_partial = accept_partial
|
|
60
67
|
self.datasource = datasource
|
|
@@ -82,11 +89,12 @@ class SelectNode(StrategyNode):
|
|
|
82
89
|
for x in c.alias.concept_arguments:
|
|
83
90
|
source_map[x.address] = {datasource}
|
|
84
91
|
for x in all_concepts_final:
|
|
85
|
-
# add in any derived concepts to support a merge node
|
|
86
92
|
if x.address not in source_map and x.derivation in (
|
|
87
93
|
PurposeLineage.MULTISELECT,
|
|
88
|
-
PurposeLineage.MERGE,
|
|
89
94
|
PurposeLineage.FILTER,
|
|
95
|
+
PurposeLineage.BASIC,
|
|
96
|
+
PurposeLineage.ROWSET,
|
|
97
|
+
PurposeLineage.BASIC,
|
|
90
98
|
):
|
|
91
99
|
source_map[x.address] = set()
|
|
92
100
|
|
|
@@ -111,6 +119,7 @@ class SelectNode(StrategyNode):
|
|
|
111
119
|
condition=self.conditions,
|
|
112
120
|
# select nodes should never group
|
|
113
121
|
force_group=self.force_group,
|
|
122
|
+
hidden_concepts=self.hidden_concepts,
|
|
114
123
|
)
|
|
115
124
|
|
|
116
125
|
def resolve_from_constant_datasources(self) -> QueryDatasource:
|
|
@@ -127,6 +136,7 @@ class SelectNode(StrategyNode):
|
|
|
127
136
|
joins=[],
|
|
128
137
|
partial_concepts=[],
|
|
129
138
|
source_type=SourceType.CONSTANT,
|
|
139
|
+
hidden_concepts=self.hidden_concepts,
|
|
130
140
|
)
|
|
131
141
|
|
|
132
142
|
def _resolve(self) -> QueryDatasource:
|
|
@@ -158,7 +168,7 @@ class SelectNode(StrategyNode):
|
|
|
158
168
|
return resolution
|
|
159
169
|
required = [c.address for c in self.all_concepts]
|
|
160
170
|
raise NoDatasourceException(
|
|
161
|
-
f"Could not find any way to
|
|
171
|
+
f"Could not find any way to resolve datasources for required concepts {required} with derivation {[x.derivation for x in self.all_concepts]}"
|
|
162
172
|
)
|
|
163
173
|
|
|
164
174
|
def copy(self) -> "SelectNode":
|
|
@@ -175,6 +185,8 @@ class SelectNode(StrategyNode):
|
|
|
175
185
|
accept_partial=self.accept_partial,
|
|
176
186
|
grain=self.grain,
|
|
177
187
|
force_group=self.force_group,
|
|
188
|
+
conditions=self.conditions,
|
|
189
|
+
hidden_concepts=self.hidden_concepts,
|
|
178
190
|
)
|
|
179
191
|
|
|
180
192
|
|
|
@@ -190,4 +202,6 @@ class ConstantNode(SelectNode):
|
|
|
190
202
|
datasource=self.datasource,
|
|
191
203
|
depth=self.depth,
|
|
192
204
|
partial_concepts=list(self.partial_concepts),
|
|
205
|
+
conditions=self.conditions,
|
|
206
|
+
hidden_concepts=self.hidden_concepts,
|
|
193
207
|
)
|