pytrilogy 0.0.2.50__py3-none-any.whl → 0.0.2.52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.52.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.52.dist-info}/RECORD +27 -25
- trilogy/__init__.py +1 -1
- trilogy/core/internal.py +5 -1
- trilogy/core/models.py +124 -263
- trilogy/core/processing/concept_strategies_v3.py +14 -4
- trilogy/core/processing/node_generators/basic_node.py +7 -3
- trilogy/core/processing/node_generators/common.py +8 -3
- trilogy/core/processing/node_generators/filter_node.py +5 -5
- trilogy/core/processing/node_generators/group_node.py +24 -8
- trilogy/core/processing/node_generators/multiselect_node.py +4 -3
- trilogy/core/processing/node_generators/node_merge_node.py +14 -2
- trilogy/core/processing/node_generators/rowset_node.py +3 -4
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +203 -0
- trilogy/core/processing/node_generators/select_merge_node.py +17 -9
- trilogy/core/processing/nodes/base_node.py +2 -33
- trilogy/core/processing/nodes/group_node.py +19 -10
- trilogy/core/processing/nodes/merge_node.py +2 -2
- trilogy/hooks/graph_hook.py +3 -1
- trilogy/parsing/common.py +54 -12
- trilogy/parsing/parse_engine.py +39 -20
- trilogy/parsing/render.py +17 -1
- {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.52.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.52.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.52.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.52.dist-info}/top_level.txt +0 -0
|
@@ -449,6 +449,7 @@ def generate_node(
|
|
|
449
449
|
conditions=conditions,
|
|
450
450
|
)
|
|
451
451
|
if not check:
|
|
452
|
+
|
|
452
453
|
logger.info(
|
|
453
454
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve root concepts, checking for expanded concepts"
|
|
454
455
|
)
|
|
@@ -470,7 +471,6 @@ def generate_node(
|
|
|
470
471
|
x
|
|
471
472
|
for x in ex_resolve.output_concepts
|
|
472
473
|
if x.address not in [y.address for y in root_targets]
|
|
473
|
-
and x not in ex_resolve.grain.components
|
|
474
474
|
]
|
|
475
475
|
|
|
476
476
|
pseudonyms = [
|
|
@@ -478,10 +478,19 @@ def generate_node(
|
|
|
478
478
|
for x in extra
|
|
479
479
|
if any(x.address in y.pseudonyms for y in root_targets)
|
|
480
480
|
]
|
|
481
|
-
|
|
482
|
-
|
|
481
|
+
logger.info(
|
|
482
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} reducing final outputs, was {[c.address for c in ex_resolve.output_concepts]} with extra {[c.address for c in extra]}"
|
|
483
|
+
)
|
|
484
|
+
base = [
|
|
485
|
+
x for x in ex_resolve.output_concepts if x.address not in extra
|
|
486
|
+
]
|
|
487
|
+
for x in root_targets:
|
|
488
|
+
if x.address not in base:
|
|
489
|
+
base.append(x)
|
|
490
|
+
expanded.set_output_concepts(base)
|
|
483
491
|
# but hide them
|
|
484
492
|
if pseudonyms:
|
|
493
|
+
expanded.add_output_concepts(pseudonyms)
|
|
485
494
|
logger.info(
|
|
486
495
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Hiding pseudonyms{[c.address for c in pseudonyms]}"
|
|
487
496
|
)
|
|
@@ -908,6 +917,7 @@ def _search_concepts(
|
|
|
908
917
|
parents=stack,
|
|
909
918
|
depth=depth,
|
|
910
919
|
)
|
|
920
|
+
|
|
911
921
|
# ensure we can resolve our final merge
|
|
912
922
|
output.resolve()
|
|
913
923
|
if condition_required and conditions:
|
|
@@ -917,7 +927,7 @@ def _search_concepts(
|
|
|
917
927
|
output, environment, g, where=conditions, history=history
|
|
918
928
|
)
|
|
919
929
|
logger.info(
|
|
920
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Graph is connected, returning
|
|
930
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Graph is connected, returning {type(output)} node partial {[c.address for c in output.partial_concepts]}"
|
|
921
931
|
)
|
|
922
932
|
return output
|
|
923
933
|
|
|
@@ -44,7 +44,7 @@ def gen_basic_node(
|
|
|
44
44
|
conditions: WhereClause | None = None,
|
|
45
45
|
):
|
|
46
46
|
depth_prefix = "\t" * depth
|
|
47
|
-
parent_concepts = resolve_function_parent_concepts(concept)
|
|
47
|
+
parent_concepts = resolve_function_parent_concepts(concept, environment=environment)
|
|
48
48
|
|
|
49
49
|
logger.info(
|
|
50
50
|
f"{depth_prefix}{LOGGER_PREFIX} basic node for {concept} has parents {[x.address for x in parent_concepts]}"
|
|
@@ -61,12 +61,16 @@ def gen_basic_node(
|
|
|
61
61
|
f"{depth_prefix}{LOGGER_PREFIX} basic node for {concept} has equivalent optional {[x.address for x in equivalent_optional]}"
|
|
62
62
|
)
|
|
63
63
|
for eo in equivalent_optional:
|
|
64
|
-
parent_concepts += resolve_function_parent_concepts(eo)
|
|
64
|
+
parent_concepts += resolve_function_parent_concepts(eo, environment=environment)
|
|
65
65
|
non_equivalent_optional = [
|
|
66
66
|
x for x in local_optional if x not in equivalent_optional
|
|
67
67
|
]
|
|
68
|
+
all_parents = parent_concepts + non_equivalent_optional
|
|
69
|
+
logger.info(
|
|
70
|
+
f"{depth_prefix}{LOGGER_PREFIX} Fetching parents {[x.address for x in all_parents]}"
|
|
71
|
+
)
|
|
68
72
|
parent_node: StrategyNode = source_concepts(
|
|
69
|
-
mandatory_list=
|
|
73
|
+
mandatory_list=all_parents,
|
|
70
74
|
environment=environment,
|
|
71
75
|
g=g,
|
|
72
76
|
depth=depth + 1,
|
|
@@ -20,12 +20,16 @@ from trilogy.core.processing.nodes.merge_node import MergeNode
|
|
|
20
20
|
from trilogy.utility import unique
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
def resolve_function_parent_concepts(
|
|
23
|
+
def resolve_function_parent_concepts(
|
|
24
|
+
concept: Concept, environment: Environment
|
|
25
|
+
) -> List[Concept]:
|
|
24
26
|
if not isinstance(concept.lineage, (Function, AggregateWrapper)):
|
|
25
27
|
raise ValueError(f"Concept {concept} lineage is not function or aggregate")
|
|
26
28
|
if concept.derivation == PurposeLineage.AGGREGATE:
|
|
27
29
|
if not concept.grain.abstract:
|
|
28
|
-
base = concept.lineage.concept_arguments +
|
|
30
|
+
base = concept.lineage.concept_arguments + [
|
|
31
|
+
environment.concepts[c] for c in concept.grain.components
|
|
32
|
+
]
|
|
29
33
|
# if the base concept being aggregated is a property with a key
|
|
30
34
|
# keep the key as a parent
|
|
31
35
|
else:
|
|
@@ -56,6 +60,7 @@ def resolve_condition_parent_concepts(
|
|
|
56
60
|
|
|
57
61
|
def resolve_filter_parent_concepts(
|
|
58
62
|
concept: Concept,
|
|
63
|
+
environment: Environment,
|
|
59
64
|
) -> Tuple[Concept, List[Concept], List[Tuple[Concept, ...]]]:
|
|
60
65
|
if not isinstance(concept.lineage, FilterItem):
|
|
61
66
|
raise ValueError(
|
|
@@ -70,7 +75,7 @@ def resolve_filter_parent_concepts(
|
|
|
70
75
|
base_rows += condition_rows
|
|
71
76
|
base_existence += condition_existence
|
|
72
77
|
if direct_parent.grain:
|
|
73
|
-
base_rows += direct_parent.grain.
|
|
78
|
+
base_rows += [environment.concepts[c] for c in direct_parent.grain.components]
|
|
74
79
|
if (
|
|
75
80
|
isinstance(direct_parent, Concept)
|
|
76
81
|
and direct_parent.purpose == Purpose.PROPERTY
|
|
@@ -28,7 +28,7 @@ def gen_filter_node(
|
|
|
28
28
|
conditions: WhereClause | None = None,
|
|
29
29
|
) -> StrategyNode | None:
|
|
30
30
|
immediate_parent, parent_row_concepts, parent_existence_concepts = (
|
|
31
|
-
resolve_filter_parent_concepts(concept)
|
|
31
|
+
resolve_filter_parent_concepts(concept, environment)
|
|
32
32
|
)
|
|
33
33
|
if not isinstance(concept.lineage, FilterItem):
|
|
34
34
|
raise SyntaxError('Filter node must have a lineage of type "FilterItem"')
|
|
@@ -136,8 +136,8 @@ def gen_filter_node(
|
|
|
136
136
|
parent.add_existence_concepts(flattened_existence, False).set_output_concepts(
|
|
137
137
|
expected_output, False
|
|
138
138
|
)
|
|
139
|
-
parent.grain = Grain(
|
|
140
|
-
|
|
139
|
+
parent.grain = Grain.from_concepts(
|
|
140
|
+
(
|
|
141
141
|
list(immediate_parent.keys)
|
|
142
142
|
if immediate_parent.keys
|
|
143
143
|
else [immediate_parent]
|
|
@@ -161,8 +161,8 @@ def gen_filter_node(
|
|
|
161
161
|
output_concepts=[concept, immediate_parent] + parent_row_concepts,
|
|
162
162
|
environment=environment,
|
|
163
163
|
parents=core_parents,
|
|
164
|
-
grain=Grain(
|
|
165
|
-
|
|
164
|
+
grain=Grain.from_concepts(
|
|
165
|
+
[immediate_parent] + parent_row_concepts,
|
|
166
166
|
),
|
|
167
167
|
preexisting_conditions=conditions.conditional if conditions else None,
|
|
168
168
|
)
|
|
@@ -34,7 +34,7 @@ def gen_group_node(
|
|
|
34
34
|
# aggregates MUST always group to the proper grain
|
|
35
35
|
# except when the
|
|
36
36
|
parent_concepts: List[Concept] = unique(
|
|
37
|
-
resolve_function_parent_concepts(concept), "address"
|
|
37
|
+
resolve_function_parent_concepts(concept, environment=environment), "address"
|
|
38
38
|
)
|
|
39
39
|
logger.info(
|
|
40
40
|
f"{padding(depth)}{LOGGER_PREFIX} parent concepts are {[x.address for x in parent_concepts]} from group grain {concept.grain}"
|
|
@@ -43,18 +43,28 @@ def gen_group_node(
|
|
|
43
43
|
# if the aggregation has a grain, we need to ensure these are the ONLY optional in the output of the select
|
|
44
44
|
output_concepts = [concept]
|
|
45
45
|
|
|
46
|
-
if
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
46
|
+
if (
|
|
47
|
+
concept.grain
|
|
48
|
+
and len(concept.grain.components) > 0
|
|
49
|
+
and not concept.grain.abstract
|
|
50
|
+
):
|
|
51
|
+
grain_components = [environment.concepts[c] for c in concept.grain.components]
|
|
50
52
|
parent_concepts += grain_components
|
|
51
53
|
output_concepts += grain_components
|
|
52
54
|
for possible_agg in local_optional:
|
|
55
|
+
|
|
53
56
|
if not isinstance(possible_agg.lineage, (AggregateWrapper, Function)):
|
|
54
57
|
continue
|
|
58
|
+
logger.info(possible_agg)
|
|
59
|
+
if possible_agg.grain and possible_agg.grain != concept.grain:
|
|
60
|
+
logger.info(
|
|
61
|
+
f"{padding(depth)}{LOGGER_PREFIX} mismatched equivalent group by with grain {possible_agg.grain} for {concept.address}"
|
|
62
|
+
)
|
|
63
|
+
|
|
55
64
|
if possible_agg.grain and possible_agg.grain == concept.grain:
|
|
56
65
|
agg_parents: List[Concept] = resolve_function_parent_concepts(
|
|
57
|
-
possible_agg
|
|
66
|
+
possible_agg,
|
|
67
|
+
environment=environment,
|
|
58
68
|
)
|
|
59
69
|
if set([x.address for x in agg_parents]).issubset(
|
|
60
70
|
set([x.address for x in parent_concepts])
|
|
@@ -63,13 +73,19 @@ def gen_group_node(
|
|
|
63
73
|
logger.info(
|
|
64
74
|
f"{padding(depth)}{LOGGER_PREFIX} found equivalent group by optional concept {possible_agg.address} for {concept.address}"
|
|
65
75
|
)
|
|
66
|
-
elif Grain(
|
|
76
|
+
elif Grain.from_concepts(agg_parents) == Grain.from_concepts(
|
|
77
|
+
parent_concepts
|
|
78
|
+
):
|
|
67
79
|
extra = [x for x in agg_parents if x.address not in parent_concepts]
|
|
68
80
|
parent_concepts += extra
|
|
69
81
|
output_concepts.append(possible_agg)
|
|
70
82
|
logger.info(
|
|
71
83
|
f"{padding(depth)}{LOGGER_PREFIX} found equivalent group by optional concept {possible_agg.address} for {concept.address}"
|
|
72
84
|
)
|
|
85
|
+
else:
|
|
86
|
+
logger.info(
|
|
87
|
+
f"{padding(depth)}{LOGGER_PREFIX} mismatched grain {Grain.from_concepts(agg_parents)} vs {Grain.from_concepts(parent_concepts)}"
|
|
88
|
+
)
|
|
73
89
|
if parent_concepts:
|
|
74
90
|
logger.info(
|
|
75
91
|
f"{padding(depth)}{LOGGER_PREFIX} fetching group node parents {LooseConceptList(concepts=parent_concepts)}"
|
|
@@ -94,7 +110,7 @@ def gen_group_node(
|
|
|
94
110
|
|
|
95
111
|
# the keys we group by
|
|
96
112
|
# are what we can use for enrichment
|
|
97
|
-
group_key_parents = concept.grain.
|
|
113
|
+
group_key_parents = [environment.concepts[c] for c in concept.grain.components]
|
|
98
114
|
|
|
99
115
|
group_node = GroupNode(
|
|
100
116
|
output_concepts=output_concepts,
|
|
@@ -8,12 +8,13 @@ from trilogy.core.models import (
|
|
|
8
8
|
Concept,
|
|
9
9
|
Conditional,
|
|
10
10
|
Environment,
|
|
11
|
+
Grain,
|
|
11
12
|
MultiSelectStatement,
|
|
12
13
|
WhereClause,
|
|
13
14
|
)
|
|
14
15
|
from trilogy.core.processing.node_generators.common import resolve_join_order
|
|
15
16
|
from trilogy.core.processing.nodes import History, MergeNode, NodeJoin
|
|
16
|
-
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
17
|
+
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
17
18
|
from trilogy.core.processing.utility import concept_to_relevant_joins, padding
|
|
18
19
|
|
|
19
20
|
LOGGER_PREFIX = "[GEN_MULTISELECT_NODE]"
|
|
@@ -137,8 +138,8 @@ def gen_multiselect_node(
|
|
|
137
138
|
|
|
138
139
|
# assume grain to be output of select
|
|
139
140
|
# but don't include anything aggregate at this point
|
|
140
|
-
node.resolution_cache.grain =
|
|
141
|
-
node.output_concepts,
|
|
141
|
+
node.resolution_cache.grain = Grain.from_concepts(
|
|
142
|
+
node.output_concepts,
|
|
142
143
|
)
|
|
143
144
|
possible_joins = concept_to_relevant_joins(additional_relevant)
|
|
144
145
|
if not local_optional:
|
|
@@ -327,11 +327,17 @@ def subgraphs_to_merge_node(
|
|
|
327
327
|
for y in x.output_concepts:
|
|
328
328
|
input_c.append(y)
|
|
329
329
|
if len(parents) == 1 and enable_early_exit:
|
|
330
|
+
logger.info(
|
|
331
|
+
f"{padding(depth)}{LOGGER_PREFIX} only one parent node, exiting early w/ {[c.address for c in parents[0].output_concepts]}"
|
|
332
|
+
)
|
|
330
333
|
return parents[0]
|
|
331
|
-
|
|
334
|
+
base_output = [x for x in all_concepts]
|
|
335
|
+
# for x in base_output:
|
|
336
|
+
# if x not in input_c:
|
|
337
|
+
# input_c.append(x)
|
|
332
338
|
return MergeNode(
|
|
333
339
|
input_concepts=unique(input_c, "address"),
|
|
334
|
-
output_concepts=
|
|
340
|
+
output_concepts=base_output,
|
|
335
341
|
environment=environment,
|
|
336
342
|
parents=parents,
|
|
337
343
|
depth=depth,
|
|
@@ -368,6 +374,12 @@ def gen_merge_node(
|
|
|
368
374
|
logger.info(
|
|
369
375
|
f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
|
|
370
376
|
)
|
|
377
|
+
for flat in log_graph:
|
|
378
|
+
if set(flat) == set([x.address for x in all_concepts]):
|
|
379
|
+
logger.info(
|
|
380
|
+
f"{padding(depth)}{LOGGER_PREFIX} expanded concept resolution was identical to search resolution; breaking to avoid recursion error."
|
|
381
|
+
)
|
|
382
|
+
return None
|
|
371
383
|
return subgraphs_to_merge_node(
|
|
372
384
|
weak_resolve,
|
|
373
385
|
depth=depth,
|
|
@@ -5,6 +5,7 @@ from trilogy.core.enums import PurposeLineage
|
|
|
5
5
|
from trilogy.core.models import (
|
|
6
6
|
Concept,
|
|
7
7
|
Environment,
|
|
8
|
+
Grain,
|
|
8
9
|
MultiSelectStatement,
|
|
9
10
|
RowsetDerivationStatement,
|
|
10
11
|
RowsetItem,
|
|
@@ -12,7 +13,6 @@ from trilogy.core.models import (
|
|
|
12
13
|
WhereClause,
|
|
13
14
|
)
|
|
14
15
|
from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
|
|
15
|
-
from trilogy.core.processing.nodes.base_node import concept_list_to_grain
|
|
16
16
|
from trilogy.core.processing.utility import concept_to_relevant_joins, padding
|
|
17
17
|
|
|
18
18
|
LOGGER_PREFIX = "[GEN_ROWSET_NODE]"
|
|
@@ -74,7 +74,7 @@ def gen_rowset_node(
|
|
|
74
74
|
assert node.resolution_cache
|
|
75
75
|
# assume grain to be output of select
|
|
76
76
|
# but don't include anything hidden(the non-rowset concepts)
|
|
77
|
-
node.grain =
|
|
77
|
+
node.grain = Grain.from_concepts(
|
|
78
78
|
[
|
|
79
79
|
x
|
|
80
80
|
for x in node.output_concepts
|
|
@@ -83,7 +83,6 @@ def gen_rowset_node(
|
|
|
83
83
|
y for y in node.hidden_concepts if y.derivation != PurposeLineage.ROWSET
|
|
84
84
|
]
|
|
85
85
|
],
|
|
86
|
-
parent_sources=node.resolution_cache.datasources,
|
|
87
86
|
)
|
|
88
87
|
|
|
89
88
|
node.rebuild_cache()
|
|
@@ -92,7 +91,7 @@ def gen_rowset_node(
|
|
|
92
91
|
x.address in node.output_concepts for x in local_optional
|
|
93
92
|
):
|
|
94
93
|
logger.info(
|
|
95
|
-
f"{padding(depth)}{LOGGER_PREFIX} no enrichment required for rowset node as all optional found or no optional; exiting early."
|
|
94
|
+
f"{padding(depth)}{LOGGER_PREFIX} no enrichment required for rowset node as all optional {[x.address for x in local_optional]} found or no optional; exiting early."
|
|
96
95
|
)
|
|
97
96
|
return node
|
|
98
97
|
possible_joins = concept_to_relevant_joins(
|
|
File without changes
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from datetime import date, datetime, timedelta
|
|
3
|
+
from typing import List, Tuple, TypeVar
|
|
4
|
+
|
|
5
|
+
from trilogy.core.enums import ComparisonOperator
|
|
6
|
+
from trilogy.core.models import (
|
|
7
|
+
Comparison,
|
|
8
|
+
Concept,
|
|
9
|
+
Conditional,
|
|
10
|
+
Datasource,
|
|
11
|
+
DataType,
|
|
12
|
+
Function,
|
|
13
|
+
FunctionType,
|
|
14
|
+
Parenthetical,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Define a generic type that ensures start and end are the same type
|
|
18
|
+
T = TypeVar("T", int, date, datetime)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def reduce_expression(
|
|
22
|
+
var: Concept, group_tuple: list[tuple[ComparisonOperator, T]]
|
|
23
|
+
) -> bool:
|
|
24
|
+
# Track ranges
|
|
25
|
+
lower_check: T
|
|
26
|
+
upper_check: T
|
|
27
|
+
|
|
28
|
+
# if var.datatype in (DataType.FLOAT,):
|
|
29
|
+
# lower_check = float("-inf") # type: ignore
|
|
30
|
+
# upper_check = float("inf") # type: ignore
|
|
31
|
+
if var.datatype == DataType.INTEGER:
|
|
32
|
+
lower_check = float("-inf") # type: ignore
|
|
33
|
+
upper_check = float("inf") # type: ignore
|
|
34
|
+
elif var.datatype == DataType.DATE:
|
|
35
|
+
lower_check = date.min # type: ignore
|
|
36
|
+
upper_check = date.max # type: ignore
|
|
37
|
+
|
|
38
|
+
elif var.datatype == DataType.DATETIME:
|
|
39
|
+
lower_check = datetime.min # type: ignore
|
|
40
|
+
upper_check = datetime.max # type: ignore
|
|
41
|
+
else:
|
|
42
|
+
raise ValueError(f"Invalid datatype: {var.datatype}")
|
|
43
|
+
|
|
44
|
+
ranges: list[Tuple[T, T]] = []
|
|
45
|
+
for op, value in group_tuple:
|
|
46
|
+
increment: int | timedelta
|
|
47
|
+
if isinstance(value, date):
|
|
48
|
+
increment = timedelta(days=1)
|
|
49
|
+
elif isinstance(value, datetime):
|
|
50
|
+
increment = timedelta(seconds=1)
|
|
51
|
+
elif isinstance(value, int):
|
|
52
|
+
increment = 1
|
|
53
|
+
# elif isinstance(value, float):
|
|
54
|
+
# value = Decimal(value)
|
|
55
|
+
# increment = Decimal(0.0000000001)
|
|
56
|
+
|
|
57
|
+
if op == ">":
|
|
58
|
+
ranges.append(
|
|
59
|
+
(
|
|
60
|
+
value + increment,
|
|
61
|
+
upper_check,
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
elif op == ">=":
|
|
65
|
+
ranges.append(
|
|
66
|
+
(
|
|
67
|
+
value,
|
|
68
|
+
upper_check,
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
elif op == "<":
|
|
72
|
+
ranges.append(
|
|
73
|
+
(
|
|
74
|
+
lower_check,
|
|
75
|
+
value - increment,
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
elif op == "<=":
|
|
79
|
+
ranges.append(
|
|
80
|
+
(
|
|
81
|
+
lower_check,
|
|
82
|
+
value,
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
elif op == "=":
|
|
86
|
+
ranges.append(
|
|
87
|
+
(
|
|
88
|
+
value,
|
|
89
|
+
value,
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
raise ValueError(f"Invalid operator: {op}")
|
|
94
|
+
return is_fully_covered(lower_check, upper_check, ranges, increment)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def simplify_conditions(
|
|
98
|
+
conditions: list[Comparison | Conditional | Parenthetical],
|
|
99
|
+
) -> bool:
|
|
100
|
+
# Group conditions by variable
|
|
101
|
+
grouped: dict[Concept, list[tuple[ComparisonOperator, datetime | int | date]]] = (
|
|
102
|
+
defaultdict(list)
|
|
103
|
+
)
|
|
104
|
+
for condition in conditions:
|
|
105
|
+
if not isinstance(condition, Comparison):
|
|
106
|
+
return False
|
|
107
|
+
if not isinstance(
|
|
108
|
+
condition.left, (int, date, datetime, Function)
|
|
109
|
+
) and not isinstance(condition.right, (int, date, datetime, Function)):
|
|
110
|
+
return False
|
|
111
|
+
if not isinstance(condition.left, Concept) and not isinstance(
|
|
112
|
+
condition.right, Concept
|
|
113
|
+
):
|
|
114
|
+
return False
|
|
115
|
+
vars = [condition.left, condition.right]
|
|
116
|
+
concept = [x for x in vars if isinstance(x, Concept)][0]
|
|
117
|
+
comparison = [x for x in vars if not isinstance(x, Concept)][0]
|
|
118
|
+
if isinstance(comparison, Function):
|
|
119
|
+
if not comparison.operator == FunctionType.CONSTANT:
|
|
120
|
+
return False
|
|
121
|
+
first_arg = comparison.arguments[0]
|
|
122
|
+
if not isinstance(first_arg, (int, date, datetime)):
|
|
123
|
+
return False
|
|
124
|
+
comparison = first_arg
|
|
125
|
+
if not isinstance(comparison, (int, date, datetime)):
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
var = concept
|
|
129
|
+
op = condition.operator
|
|
130
|
+
grouped[var].append((op, comparison))
|
|
131
|
+
|
|
132
|
+
simplified = []
|
|
133
|
+
for var, group_tuple in grouped.items():
|
|
134
|
+
simplified.append(reduce_expression(var, group_tuple)) # type: ignore
|
|
135
|
+
|
|
136
|
+
# Final simplification
|
|
137
|
+
return True if all(isinstance(s, bool) and s for s in simplified) else False
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def is_fully_covered(
|
|
141
|
+
start: T,
|
|
142
|
+
end: T,
|
|
143
|
+
ranges: List[Tuple[T, T]],
|
|
144
|
+
increment: int | timedelta,
|
|
145
|
+
):
|
|
146
|
+
"""
|
|
147
|
+
Check if the list of range pairs fully covers the set [start, end].
|
|
148
|
+
|
|
149
|
+
Parameters:
|
|
150
|
+
- start (int or float): The starting value of the set to cover.
|
|
151
|
+
- end (int or float): The ending value of the set to cover.
|
|
152
|
+
- ranges (list of tuples): List of range pairs [(start1, end1), (start2, end2), ...].
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
- bool: True if the ranges fully cover [start, end], False otherwise.
|
|
156
|
+
"""
|
|
157
|
+
# Sort ranges by their start values (and by end values for ties)
|
|
158
|
+
ranges.sort()
|
|
159
|
+
|
|
160
|
+
# Check for gaps
|
|
161
|
+
current_end = start
|
|
162
|
+
print(ranges)
|
|
163
|
+
for r_start, r_end in ranges:
|
|
164
|
+
print(r_start, r_end)
|
|
165
|
+
# If there's a gap between the current range and the previous coverage
|
|
166
|
+
print(r_start - current_end)
|
|
167
|
+
if (r_start - current_end) > increment: # type: ignore
|
|
168
|
+
print("gap")
|
|
169
|
+
return False
|
|
170
|
+
print("okay")
|
|
171
|
+
# Extend the current coverage
|
|
172
|
+
current_end = max(current_end, r_end)
|
|
173
|
+
|
|
174
|
+
# If the loop ends and we haven't reached the end, return False
|
|
175
|
+
print(current_end, end)
|
|
176
|
+
print(current_end >= end)
|
|
177
|
+
return current_end >= end
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def get_union_sources(datasources: list[Datasource], concepts: list[Concept]):
|
|
181
|
+
candidates: list[Datasource] = []
|
|
182
|
+
for x in datasources:
|
|
183
|
+
if all([c.address in x.output_concepts for c in concepts]):
|
|
184
|
+
if (
|
|
185
|
+
any([c.address in x.partial_concepts for c in concepts])
|
|
186
|
+
and x.non_partial_for
|
|
187
|
+
):
|
|
188
|
+
candidates.append(x)
|
|
189
|
+
|
|
190
|
+
assocs: dict[str, list[Datasource]] = defaultdict(list[Datasource])
|
|
191
|
+
for x in candidates:
|
|
192
|
+
if not x.non_partial_for:
|
|
193
|
+
continue
|
|
194
|
+
if not len(x.non_partial_for.concept_arguments) == 1:
|
|
195
|
+
continue
|
|
196
|
+
merge_key = x.non_partial_for.concept_arguments[0]
|
|
197
|
+
assocs[merge_key.address].append(x)
|
|
198
|
+
final: list[list[Datasource]] = []
|
|
199
|
+
for _, dses in assocs.items():
|
|
200
|
+
conditions = [c.non_partial_for.conditional for c in dses if c.non_partial_for]
|
|
201
|
+
if simplify_conditions(conditions):
|
|
202
|
+
final.append(dses)
|
|
203
|
+
return final
|
|
@@ -23,9 +23,6 @@ from trilogy.core.processing.nodes import (
|
|
|
23
23
|
SelectNode,
|
|
24
24
|
StrategyNode,
|
|
25
25
|
)
|
|
26
|
-
from trilogy.core.processing.nodes.base_node import (
|
|
27
|
-
concept_list_to_grain,
|
|
28
|
-
)
|
|
29
26
|
from trilogy.core.processing.utility import padding
|
|
30
27
|
|
|
31
28
|
LOGGER_PREFIX = "[GEN_ROOT_MERGE_NODE]"
|
|
@@ -79,6 +76,7 @@ def create_pruned_concept_graph(
|
|
|
79
76
|
datasources: list[Datasource],
|
|
80
77
|
accept_partial: bool = False,
|
|
81
78
|
conditions: WhereClause | None = None,
|
|
79
|
+
depth: int = 0,
|
|
82
80
|
) -> nx.DiGraph:
|
|
83
81
|
orig_g = g
|
|
84
82
|
g = g.copy()
|
|
@@ -104,6 +102,8 @@ def create_pruned_concept_graph(
|
|
|
104
102
|
# filter out synonyms
|
|
105
103
|
if (x := concepts.get(n, None)) and x.address in target_addresses
|
|
106
104
|
}
|
|
105
|
+
# from trilogy.hooks.graph_hook import GraphHook
|
|
106
|
+
# GraphHook().query_graph_built(g)
|
|
107
107
|
relevant_concepts: list[str] = list(relevant_concepts_pre.keys())
|
|
108
108
|
relevent_datasets: list[str] = []
|
|
109
109
|
if not accept_partial:
|
|
@@ -159,8 +159,14 @@ def create_pruned_concept_graph(
|
|
|
159
159
|
|
|
160
160
|
subgraphs = list(nx.connected_components(g.to_undirected()))
|
|
161
161
|
if not subgraphs:
|
|
162
|
+
logger.info(
|
|
163
|
+
f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - no subgraphs after node prune"
|
|
164
|
+
)
|
|
162
165
|
return None
|
|
163
166
|
if subgraphs and len(subgraphs) != 1:
|
|
167
|
+
logger.info(
|
|
168
|
+
f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - subgraphs are split - have {len(subgraphs)} from {subgraphs}"
|
|
169
|
+
)
|
|
164
170
|
return None
|
|
165
171
|
# add back any relevant edges that might have been partially filtered
|
|
166
172
|
relevant = set(relevant_concepts + relevent_datasets)
|
|
@@ -169,6 +175,9 @@ def create_pruned_concept_graph(
|
|
|
169
175
|
g.add_edge(edge[0], edge[1])
|
|
170
176
|
# if we have no ds nodes at all, for non constant, we can't find it
|
|
171
177
|
if not any([n.startswith("ds~") for n in g.nodes]):
|
|
178
|
+
logger.info(
|
|
179
|
+
f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - No datasource nodes found"
|
|
180
|
+
)
|
|
172
181
|
return None
|
|
173
182
|
return g
|
|
174
183
|
|
|
@@ -231,7 +240,7 @@ def create_datasource_node(
|
|
|
231
240
|
depth: int,
|
|
232
241
|
conditions: WhereClause | None = None,
|
|
233
242
|
) -> tuple[StrategyNode, bool]:
|
|
234
|
-
target_grain = Grain(
|
|
243
|
+
target_grain = Grain.from_concepts(all_concepts)
|
|
235
244
|
force_group = False
|
|
236
245
|
if not datasource.grain.issubset(target_grain):
|
|
237
246
|
force_group = True
|
|
@@ -261,7 +270,7 @@ def create_datasource_node(
|
|
|
261
270
|
nullable_concepts=[c for c in all_concepts if c in nullable_lcl],
|
|
262
271
|
accept_partial=accept_partial,
|
|
263
272
|
datasource=datasource,
|
|
264
|
-
grain=Grain(
|
|
273
|
+
grain=Grain.from_concepts(all_concepts),
|
|
265
274
|
conditions=datasource.where.conditional if datasource.where else None,
|
|
266
275
|
preexisting_conditions=(
|
|
267
276
|
conditions.conditional if partial_is_full and conditions else None
|
|
@@ -383,6 +392,7 @@ def gen_select_merge_node(
|
|
|
383
392
|
accept_partial=attempt,
|
|
384
393
|
conditions=conditions,
|
|
385
394
|
datasources=list(environment.datasources.values()),
|
|
395
|
+
depth=depth,
|
|
386
396
|
)
|
|
387
397
|
if pruned_concept_graph:
|
|
388
398
|
logger.info(
|
|
@@ -391,9 +401,7 @@ def gen_select_merge_node(
|
|
|
391
401
|
break
|
|
392
402
|
|
|
393
403
|
if not pruned_concept_graph:
|
|
394
|
-
logger.info(
|
|
395
|
-
f"{padding(depth)}{LOGGER_PREFIX} no covering graph found {attempt}"
|
|
396
|
-
)
|
|
404
|
+
logger.info(f"{padding(depth)}{LOGGER_PREFIX} no covering graph found.")
|
|
397
405
|
return None
|
|
398
406
|
|
|
399
407
|
sub_nodes = resolve_subgraphs(pruned_concept_graph, conditions)
|
|
@@ -446,7 +454,7 @@ def gen_select_merge_node(
|
|
|
446
454
|
parents=parents,
|
|
447
455
|
preexisting_conditions=preexisting_conditions,
|
|
448
456
|
)
|
|
449
|
-
target_grain =
|
|
457
|
+
target_grain = Grain.from_concepts(all_concepts)
|
|
450
458
|
if not base.resolve().grain.issubset(target_grain):
|
|
451
459
|
return GroupNode(
|
|
452
460
|
output_concepts=all_concepts,
|
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import List, Optional
|
|
4
4
|
|
|
5
5
|
from trilogy.core.enums import (
|
|
6
6
|
BooleanOperator,
|
|
7
|
-
Granularity,
|
|
8
7
|
JoinType,
|
|
9
|
-
Purpose,
|
|
10
8
|
PurposeLineage,
|
|
11
9
|
)
|
|
12
10
|
from trilogy.core.models import (
|
|
@@ -26,31 +24,6 @@ from trilogy.core.models import (
|
|
|
26
24
|
from trilogy.utility import unique
|
|
27
25
|
|
|
28
26
|
|
|
29
|
-
def concept_list_to_grain(
|
|
30
|
-
inputs: List[Concept], parent_sources: Sequence[QueryDatasource | Datasource]
|
|
31
|
-
) -> Grain:
|
|
32
|
-
candidates = [
|
|
33
|
-
c
|
|
34
|
-
for c in inputs
|
|
35
|
-
if c.purpose == Purpose.KEY and c.granularity != Granularity.SINGLE_ROW
|
|
36
|
-
]
|
|
37
|
-
for x in inputs:
|
|
38
|
-
if x.granularity == Granularity.SINGLE_ROW:
|
|
39
|
-
continue
|
|
40
|
-
if x.purpose == Purpose.PROPERTY and not any(
|
|
41
|
-
[key in candidates for key in (x.keys or [])]
|
|
42
|
-
):
|
|
43
|
-
candidates.append(x)
|
|
44
|
-
elif x.purpose == Purpose.CONSTANT:
|
|
45
|
-
candidates.append(x)
|
|
46
|
-
elif x.purpose == Purpose.METRIC:
|
|
47
|
-
# metrics that were previously calculated must be included in grain
|
|
48
|
-
if any([x in parent.output_concepts for parent in parent_sources]):
|
|
49
|
-
candidates.append(x)
|
|
50
|
-
|
|
51
|
-
return Grain(components=candidates)
|
|
52
|
-
|
|
53
|
-
|
|
54
27
|
def resolve_concept_map(
|
|
55
28
|
inputs: List[QueryDatasource | Datasource],
|
|
56
29
|
targets: List[Concept],
|
|
@@ -351,11 +324,7 @@ class StrategyNode:
|
|
|
351
324
|
p.resolve() for p in self.parents
|
|
352
325
|
]
|
|
353
326
|
|
|
354
|
-
grain = (
|
|
355
|
-
self.grain
|
|
356
|
-
if self.grain
|
|
357
|
-
else concept_list_to_grain(self.output_concepts, [])
|
|
358
|
-
)
|
|
327
|
+
grain = self.grain if self.grain else Grain.from_concepts(self.output_concepts)
|
|
359
328
|
source_map = resolve_concept_map(
|
|
360
329
|
parent_sources,
|
|
361
330
|
targets=self.output_concepts,
|