pytrilogy 0.3.142__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cp312-win_amd64.pyd +0 -0
- pytrilogy-0.3.142.dist-info/METADATA +555 -0
- pytrilogy-0.3.142.dist-info/RECORD +200 -0
- pytrilogy-0.3.142.dist-info/WHEEL +4 -0
- pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +16 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +113 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +443 -0
- trilogy/core/env_processor.py +120 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1227 -0
- trilogy/core/graph_models.py +139 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2669 -0
- trilogy/core/models/build.py +2521 -0
- trilogy/core/models/build_environment.py +180 -0
- trilogy/core/models/core.py +501 -0
- trilogy/core/models/datasource.py +322 -0
- trilogy/core/models/environment.py +751 -0
- trilogy/core/models/execute.py +1177 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +268 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +205 -0
- trilogy/core/processing/node_generators/node_merge_node.py +653 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +748 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +519 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +596 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1392 -0
- trilogy/dialect/bigquery.py +308 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +144 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +231 -0
- trilogy/dialect/enums.py +147 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/config.py +75 -0
- trilogy/executor.py +568 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +139 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2813 -0
- trilogy/parsing/render.py +769 -0
- trilogy/parsing/trilogy.lark +540 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +42 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +303 -0
- trilogy/scripts/common.py +355 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +303 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +512 -0
- trilogy/scripts/environment.py +46 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +471 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +713 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/run.py +63 -0
- trilogy/scripts/serve.py +140 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +119 -0
- trilogy/scripts/trilogy.py +68 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from trilogy.core.enums import (
|
|
4
|
+
SourceType,
|
|
5
|
+
)
|
|
6
|
+
from trilogy.core.models.build import (
|
|
7
|
+
BuildComparison,
|
|
8
|
+
BuildConcept,
|
|
9
|
+
BuildConditional,
|
|
10
|
+
BuildGrain,
|
|
11
|
+
BuildParenthetical,
|
|
12
|
+
)
|
|
13
|
+
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FilterNode(StrategyNode):
|
|
17
|
+
"""Filter nodes represent a restriction operation
|
|
18
|
+
on a concept that creates a new derived concept.
|
|
19
|
+
|
|
20
|
+
They should only output a concept and it's filtered
|
|
21
|
+
version, but will have parents that provide all required
|
|
22
|
+
filtering keys as inputs.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
source_type = SourceType.FILTER
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
input_concepts: List[BuildConcept],
|
|
30
|
+
output_concepts: List[BuildConcept],
|
|
31
|
+
environment,
|
|
32
|
+
whole_grain: bool = False,
|
|
33
|
+
parents: List["StrategyNode"] | None = None,
|
|
34
|
+
depth: int = 0,
|
|
35
|
+
conditions: (
|
|
36
|
+
BuildConditional | BuildComparison | BuildParenthetical | None
|
|
37
|
+
) = None,
|
|
38
|
+
preexisting_conditions: (
|
|
39
|
+
BuildConditional | BuildComparison | BuildParenthetical | None
|
|
40
|
+
) = None,
|
|
41
|
+
partial_concepts: List[BuildConcept] | None = None,
|
|
42
|
+
force_group: bool | None = False,
|
|
43
|
+
grain: BuildGrain | None = None,
|
|
44
|
+
existence_concepts: List[BuildConcept] | None = None,
|
|
45
|
+
):
|
|
46
|
+
super().__init__(
|
|
47
|
+
output_concepts=output_concepts,
|
|
48
|
+
environment=environment,
|
|
49
|
+
whole_grain=whole_grain,
|
|
50
|
+
parents=parents,
|
|
51
|
+
depth=depth,
|
|
52
|
+
input_concepts=input_concepts,
|
|
53
|
+
conditions=conditions,
|
|
54
|
+
preexisting_conditions=preexisting_conditions,
|
|
55
|
+
partial_concepts=partial_concepts,
|
|
56
|
+
force_group=force_group,
|
|
57
|
+
grain=grain,
|
|
58
|
+
existence_concepts=existence_concepts,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def copy(self) -> "FilterNode":
|
|
62
|
+
return FilterNode(
|
|
63
|
+
input_concepts=list(self.input_concepts),
|
|
64
|
+
output_concepts=list(self.output_concepts),
|
|
65
|
+
environment=self.environment,
|
|
66
|
+
whole_grain=self.whole_grain,
|
|
67
|
+
parents=self.parents,
|
|
68
|
+
depth=self.depth,
|
|
69
|
+
conditions=self.conditions,
|
|
70
|
+
preexisting_conditions=self.preexisting_conditions,
|
|
71
|
+
partial_concepts=list(self.partial_concepts),
|
|
72
|
+
force_group=self.force_group,
|
|
73
|
+
grain=self.grain,
|
|
74
|
+
existence_concepts=list(self.existence_concepts),
|
|
75
|
+
)
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from trilogy.constants import logger
|
|
4
|
+
from trilogy.core.enums import SourceType
|
|
5
|
+
from trilogy.core.models.build import (
|
|
6
|
+
BuildComparison,
|
|
7
|
+
BuildConcept,
|
|
8
|
+
BuildConditional,
|
|
9
|
+
BuildDatasource,
|
|
10
|
+
BuildOrderBy,
|
|
11
|
+
BuildParenthetical,
|
|
12
|
+
)
|
|
13
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
14
|
+
from trilogy.core.models.execute import QueryDatasource
|
|
15
|
+
from trilogy.core.processing.nodes.base_node import (
|
|
16
|
+
StrategyNode,
|
|
17
|
+
resolve_concept_map,
|
|
18
|
+
)
|
|
19
|
+
from trilogy.core.processing.utility import (
|
|
20
|
+
GroupRequiredResponse,
|
|
21
|
+
find_nullable_concepts,
|
|
22
|
+
is_scalar_condition,
|
|
23
|
+
)
|
|
24
|
+
from trilogy.utility import unique
|
|
25
|
+
|
|
26
|
+
LOGGER_PREFIX = "[CONCEPT DETAIL - GROUP NODE]"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class GroupNode(StrategyNode):
|
|
30
|
+
source_type = SourceType.GROUP
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
output_concepts: List[BuildConcept],
|
|
35
|
+
input_concepts: List[BuildConcept],
|
|
36
|
+
environment: BuildEnvironment,
|
|
37
|
+
whole_grain: bool = False,
|
|
38
|
+
parents: List["StrategyNode"] | None = None,
|
|
39
|
+
depth: int = 0,
|
|
40
|
+
partial_concepts: Optional[List[BuildConcept]] = None,
|
|
41
|
+
nullable_concepts: Optional[List[BuildConcept]] = None,
|
|
42
|
+
force_group: bool | None = None,
|
|
43
|
+
conditions: (
|
|
44
|
+
BuildConditional | BuildComparison | BuildParenthetical | None
|
|
45
|
+
) = None,
|
|
46
|
+
preexisting_conditions: (
|
|
47
|
+
BuildConditional | BuildComparison | BuildParenthetical | None
|
|
48
|
+
) = None,
|
|
49
|
+
existence_concepts: List[BuildConcept] | None = None,
|
|
50
|
+
hidden_concepts: set[str] | None = None,
|
|
51
|
+
ordering: BuildOrderBy | None = None,
|
|
52
|
+
required_outputs: List[BuildConcept] | None = None,
|
|
53
|
+
):
|
|
54
|
+
super().__init__(
|
|
55
|
+
input_concepts=input_concepts,
|
|
56
|
+
output_concepts=output_concepts,
|
|
57
|
+
environment=environment,
|
|
58
|
+
whole_grain=whole_grain,
|
|
59
|
+
parents=parents,
|
|
60
|
+
depth=depth,
|
|
61
|
+
partial_concepts=partial_concepts,
|
|
62
|
+
nullable_concepts=nullable_concepts,
|
|
63
|
+
force_group=force_group,
|
|
64
|
+
conditions=conditions,
|
|
65
|
+
existence_concepts=existence_concepts,
|
|
66
|
+
preexisting_conditions=preexisting_conditions,
|
|
67
|
+
hidden_concepts=hidden_concepts,
|
|
68
|
+
ordering=ordering,
|
|
69
|
+
)
|
|
70
|
+
# the set of concepts required to preserve grain
|
|
71
|
+
# set by group by node generation with aggregates
|
|
72
|
+
self.required_outputs = required_outputs
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def check_if_required(
|
|
76
|
+
cls,
|
|
77
|
+
downstream_concepts: List[BuildConcept],
|
|
78
|
+
parents: list[QueryDatasource | BuildDatasource],
|
|
79
|
+
environment: BuildEnvironment,
|
|
80
|
+
depth: int = 0,
|
|
81
|
+
) -> GroupRequiredResponse:
|
|
82
|
+
from trilogy.core.processing.discovery_utility import check_if_group_required
|
|
83
|
+
|
|
84
|
+
return check_if_group_required(downstream_concepts, parents, environment, depth)
|
|
85
|
+
|
|
86
|
+
def _resolve(self) -> QueryDatasource:
|
|
87
|
+
parent_sources: List[QueryDatasource | BuildDatasource] = [
|
|
88
|
+
p.resolve() for p in self.parents
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
grains = self.check_if_required(
|
|
92
|
+
self.output_concepts, parent_sources, self.environment, self.depth
|
|
93
|
+
)
|
|
94
|
+
target_grain = grains.target
|
|
95
|
+
comp_grain = grains.upstream
|
|
96
|
+
# dynamically select if we need to group
|
|
97
|
+
# because sometimes, we are already at required grain
|
|
98
|
+
if not grains.required and self.force_group is not True:
|
|
99
|
+
# otherwise if no group by, just treat it as a select
|
|
100
|
+
source_type = SourceType.SELECT
|
|
101
|
+
else:
|
|
102
|
+
logger.info(
|
|
103
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} Group node has different grain than parents; group is required."
|
|
104
|
+
f" Upstream grains {[str(source.grain) for source in parent_sources]}"
|
|
105
|
+
f" with final grain {comp_grain} vs"
|
|
106
|
+
f" target grain {target_grain}"
|
|
107
|
+
f" delta: {comp_grain - target_grain}"
|
|
108
|
+
)
|
|
109
|
+
source_type = SourceType.GROUP
|
|
110
|
+
source_map = resolve_concept_map(
|
|
111
|
+
parent_sources,
|
|
112
|
+
targets=(
|
|
113
|
+
unique(
|
|
114
|
+
self.output_concepts + self.conditions.concept_arguments,
|
|
115
|
+
"address",
|
|
116
|
+
)
|
|
117
|
+
if self.conditions
|
|
118
|
+
else self.output_concepts
|
|
119
|
+
),
|
|
120
|
+
inherited_inputs=self.input_concepts + self.existence_concepts,
|
|
121
|
+
)
|
|
122
|
+
nullable_addresses = find_nullable_concepts(
|
|
123
|
+
source_map=source_map, joins=[], datasources=parent_sources
|
|
124
|
+
)
|
|
125
|
+
nullable_concepts = [
|
|
126
|
+
x for x in self.output_concepts if x.address in nullable_addresses
|
|
127
|
+
]
|
|
128
|
+
base = QueryDatasource(
|
|
129
|
+
input_concepts=self.input_concepts,
|
|
130
|
+
output_concepts=self.output_concepts,
|
|
131
|
+
datasources=parent_sources,
|
|
132
|
+
source_type=source_type,
|
|
133
|
+
source_map=source_map,
|
|
134
|
+
joins=[],
|
|
135
|
+
grain=target_grain,
|
|
136
|
+
partial_concepts=self.partial_concepts,
|
|
137
|
+
nullable_concepts=nullable_concepts,
|
|
138
|
+
hidden_concepts=self.hidden_concepts,
|
|
139
|
+
condition=self.conditions,
|
|
140
|
+
ordering=self.ordering,
|
|
141
|
+
)
|
|
142
|
+
# if there is a condition on a group node and it's not scalar
|
|
143
|
+
# inject an additional CTE
|
|
144
|
+
if self.conditions and not is_scalar_condition(self.conditions):
|
|
145
|
+
base.condition = None
|
|
146
|
+
base.output_concepts = unique(
|
|
147
|
+
list(base.output_concepts) + list(self.conditions.row_arguments),
|
|
148
|
+
"address",
|
|
149
|
+
)
|
|
150
|
+
# re-visible any hidden concepts
|
|
151
|
+
base.hidden_concepts = set(
|
|
152
|
+
[x for x in base.hidden_concepts if x not in base.output_concepts]
|
|
153
|
+
)
|
|
154
|
+
source_map = resolve_concept_map(
|
|
155
|
+
[base],
|
|
156
|
+
targets=self.output_concepts,
|
|
157
|
+
inherited_inputs=base.output_concepts,
|
|
158
|
+
)
|
|
159
|
+
return QueryDatasource(
|
|
160
|
+
input_concepts=base.output_concepts,
|
|
161
|
+
output_concepts=self.output_concepts,
|
|
162
|
+
datasources=[base],
|
|
163
|
+
source_type=SourceType.SELECT,
|
|
164
|
+
source_map=source_map,
|
|
165
|
+
joins=[],
|
|
166
|
+
grain=target_grain,
|
|
167
|
+
nullable_concepts=base.nullable_concepts,
|
|
168
|
+
partial_concepts=self.partial_concepts,
|
|
169
|
+
condition=self.conditions,
|
|
170
|
+
hidden_concepts=self.hidden_concepts,
|
|
171
|
+
ordering=self.ordering,
|
|
172
|
+
)
|
|
173
|
+
return base
|
|
174
|
+
|
|
175
|
+
def copy(self) -> "GroupNode":
|
|
176
|
+
return GroupNode(
|
|
177
|
+
input_concepts=list(self.input_concepts),
|
|
178
|
+
output_concepts=list(self.output_concepts),
|
|
179
|
+
environment=self.environment,
|
|
180
|
+
whole_grain=self.whole_grain,
|
|
181
|
+
parents=self.parents,
|
|
182
|
+
depth=self.depth,
|
|
183
|
+
partial_concepts=list(self.partial_concepts),
|
|
184
|
+
nullable_concepts=list(self.nullable_concepts),
|
|
185
|
+
force_group=self.force_group,
|
|
186
|
+
conditions=self.conditions,
|
|
187
|
+
preexisting_conditions=self.preexisting_conditions,
|
|
188
|
+
existence_concepts=list(self.existence_concepts),
|
|
189
|
+
hidden_concepts=set(self.hidden_concepts),
|
|
190
|
+
ordering=self.ordering,
|
|
191
|
+
required_outputs=(
|
|
192
|
+
list(self.required_outputs) if self.required_outputs else None
|
|
193
|
+
),
|
|
194
|
+
)
|
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
from typing import List, Optional, Tuple
|
|
2
|
+
|
|
3
|
+
from trilogy.constants import logger
|
|
4
|
+
from trilogy.core.enums import (
|
|
5
|
+
JoinType,
|
|
6
|
+
SourceType,
|
|
7
|
+
)
|
|
8
|
+
from trilogy.core.models.build import (
|
|
9
|
+
BuildComparison,
|
|
10
|
+
BuildConcept,
|
|
11
|
+
BuildConditional,
|
|
12
|
+
BuildDatasource,
|
|
13
|
+
BuildGrain,
|
|
14
|
+
BuildOrderBy,
|
|
15
|
+
BuildParenthetical,
|
|
16
|
+
)
|
|
17
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
18
|
+
from trilogy.core.models.execute import BaseJoin, QueryDatasource, UnnestJoin
|
|
19
|
+
from trilogy.core.processing.nodes.base_node import (
|
|
20
|
+
NodeJoin,
|
|
21
|
+
StrategyNode,
|
|
22
|
+
resolve_concept_map,
|
|
23
|
+
)
|
|
24
|
+
from trilogy.core.processing.utility import find_nullable_concepts, get_node_joins
|
|
25
|
+
from trilogy.utility import unique
|
|
26
|
+
|
|
27
|
+
LOGGER_PREFIX = "[CONCEPT DETAIL - MERGE NODE]"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def deduplicate_nodes(
|
|
31
|
+
merged: dict[str, QueryDatasource | BuildDatasource],
|
|
32
|
+
logging_prefix: str,
|
|
33
|
+
environment: BuildEnvironment,
|
|
34
|
+
) -> tuple[bool, dict[str, QueryDatasource | BuildDatasource], set[str]]:
|
|
35
|
+
duplicates = False
|
|
36
|
+
removed: set[str] = set()
|
|
37
|
+
set_map: dict[str, set[str]] = {}
|
|
38
|
+
for k, v in merged.items():
|
|
39
|
+
unique_outputs = [
|
|
40
|
+
# the concept may be a in a different environment for a rowset.
|
|
41
|
+
(environment.concepts.get(x.address) or x).address
|
|
42
|
+
for x in v.output_concepts
|
|
43
|
+
if x not in v.partial_concepts
|
|
44
|
+
]
|
|
45
|
+
set_map[k] = set(unique_outputs)
|
|
46
|
+
for k1, v1 in set_map.items():
|
|
47
|
+
found = False
|
|
48
|
+
for k2, v2 in set_map.items():
|
|
49
|
+
if k1 == k2:
|
|
50
|
+
continue
|
|
51
|
+
if (
|
|
52
|
+
v1.issubset(v2)
|
|
53
|
+
and merged[k1].grain.issubset(merged[k2].grain)
|
|
54
|
+
and not merged[k2].partial_concepts
|
|
55
|
+
and not merged[k1].partial_concepts
|
|
56
|
+
and not merged[k2].condition
|
|
57
|
+
and not merged[k1].condition
|
|
58
|
+
):
|
|
59
|
+
og = merged[k1]
|
|
60
|
+
subset_to = merged[k2]
|
|
61
|
+
logger.info(
|
|
62
|
+
f"{logging_prefix}{LOGGER_PREFIX} extraneous parent node that is subset of another parent node {og.grain.issubset(subset_to.grain)} {og.grain.components} {subset_to.grain.components}"
|
|
63
|
+
)
|
|
64
|
+
merged = {k: v for k, v in merged.items() if k != k1}
|
|
65
|
+
removed.add(k1)
|
|
66
|
+
duplicates = True
|
|
67
|
+
found = True
|
|
68
|
+
break
|
|
69
|
+
if found:
|
|
70
|
+
break
|
|
71
|
+
|
|
72
|
+
return duplicates, merged, removed
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def deduplicate_nodes_and_joins(
|
|
76
|
+
joins: List[NodeJoin] | None,
|
|
77
|
+
merged: dict[str, QueryDatasource | BuildDatasource],
|
|
78
|
+
logging_prefix: str,
|
|
79
|
+
environment: BuildEnvironment,
|
|
80
|
+
) -> Tuple[List[NodeJoin] | None, dict[str, QueryDatasource | BuildDatasource]]:
|
|
81
|
+
# it's possible that we have more sources than we need
|
|
82
|
+
duplicates = True
|
|
83
|
+
while duplicates:
|
|
84
|
+
duplicates = False
|
|
85
|
+
duplicates, merged, removed = deduplicate_nodes(
|
|
86
|
+
merged, logging_prefix, environment=environment
|
|
87
|
+
)
|
|
88
|
+
# filter out any removed joins
|
|
89
|
+
if joins is not None:
|
|
90
|
+
joins = [
|
|
91
|
+
j
|
|
92
|
+
for j in joins
|
|
93
|
+
if j.left_node.resolve().identifier not in removed
|
|
94
|
+
and j.right_node.resolve().identifier not in removed
|
|
95
|
+
]
|
|
96
|
+
return joins, merged
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class MergeNode(StrategyNode):
|
|
100
|
+
source_type = SourceType.MERGE
|
|
101
|
+
|
|
102
|
+
def __init__(
|
|
103
|
+
self,
|
|
104
|
+
input_concepts: List[BuildConcept],
|
|
105
|
+
output_concepts: List[BuildConcept],
|
|
106
|
+
environment,
|
|
107
|
+
whole_grain: bool = False,
|
|
108
|
+
parents: List["StrategyNode"] | None = None,
|
|
109
|
+
node_joins: List[NodeJoin] | None = None,
|
|
110
|
+
join_concepts: Optional[List] = None,
|
|
111
|
+
force_join_type: Optional[JoinType] = None,
|
|
112
|
+
partial_concepts: Optional[List[BuildConcept]] = None,
|
|
113
|
+
nullable_concepts: Optional[List[BuildConcept]] = None,
|
|
114
|
+
force_group: bool | None = None,
|
|
115
|
+
depth: int = 0,
|
|
116
|
+
grain: BuildGrain | None = None,
|
|
117
|
+
conditions: (
|
|
118
|
+
BuildConditional | BuildComparison | BuildParenthetical | None
|
|
119
|
+
) = None,
|
|
120
|
+
preexisting_conditions: (
|
|
121
|
+
BuildConditional | BuildComparison | BuildParenthetical | None
|
|
122
|
+
) = None,
|
|
123
|
+
hidden_concepts: set[str] | None = None,
|
|
124
|
+
virtual_output_concepts: List[BuildConcept] | None = None,
|
|
125
|
+
existence_concepts: List[BuildConcept] | None = None,
|
|
126
|
+
ordering: BuildOrderBy | None = None,
|
|
127
|
+
):
|
|
128
|
+
super().__init__(
|
|
129
|
+
input_concepts=input_concepts,
|
|
130
|
+
output_concepts=output_concepts,
|
|
131
|
+
environment=environment,
|
|
132
|
+
whole_grain=whole_grain,
|
|
133
|
+
parents=parents,
|
|
134
|
+
depth=depth,
|
|
135
|
+
partial_concepts=partial_concepts,
|
|
136
|
+
nullable_concepts=nullable_concepts,
|
|
137
|
+
force_group=force_group,
|
|
138
|
+
grain=grain,
|
|
139
|
+
conditions=conditions,
|
|
140
|
+
preexisting_conditions=preexisting_conditions,
|
|
141
|
+
hidden_concepts=hidden_concepts,
|
|
142
|
+
virtual_output_concepts=virtual_output_concepts,
|
|
143
|
+
existence_concepts=existence_concepts,
|
|
144
|
+
ordering=ordering,
|
|
145
|
+
)
|
|
146
|
+
self.join_concepts = join_concepts
|
|
147
|
+
self.force_join_type = force_join_type
|
|
148
|
+
self.node_joins: List[NodeJoin] | None = node_joins
|
|
149
|
+
|
|
150
|
+
final_joins: List[NodeJoin] = []
|
|
151
|
+
if self.node_joins is not None:
|
|
152
|
+
for join in self.node_joins:
|
|
153
|
+
if join.left_node.resolve().name == join.right_node.resolve().name:
|
|
154
|
+
continue
|
|
155
|
+
final_joins.append(join)
|
|
156
|
+
self.node_joins = final_joins
|
|
157
|
+
|
|
158
|
+
def translate_node_joins(self, node_joins: List[NodeJoin]) -> List[BaseJoin]:
|
|
159
|
+
joins = []
|
|
160
|
+
for join in node_joins:
|
|
161
|
+
left = join.left_node.resolve()
|
|
162
|
+
right = join.right_node.resolve()
|
|
163
|
+
if left.identifier == right.identifier:
|
|
164
|
+
raise SyntaxError(f"Cannot join node {left.identifier} to itself")
|
|
165
|
+
joins.append(
|
|
166
|
+
BaseJoin(
|
|
167
|
+
left_datasource=left,
|
|
168
|
+
right_datasource=right,
|
|
169
|
+
join_type=join.join_type,
|
|
170
|
+
concepts=join.concepts,
|
|
171
|
+
concept_pairs=join.concept_pairs,
|
|
172
|
+
modifiers=join.modifiers,
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
return joins
|
|
176
|
+
|
|
177
|
+
def create_full_joins(self, dataset_list: List[QueryDatasource | BuildDatasource]):
|
|
178
|
+
joins = []
|
|
179
|
+
seen = set()
|
|
180
|
+
for left_value in dataset_list:
|
|
181
|
+
for right_value in dataset_list:
|
|
182
|
+
if left_value.identifier == right_value.identifier:
|
|
183
|
+
continue
|
|
184
|
+
if left_value.identifier in seen and right_value.identifier in seen:
|
|
185
|
+
continue
|
|
186
|
+
joins.append(
|
|
187
|
+
BaseJoin(
|
|
188
|
+
left_datasource=left_value,
|
|
189
|
+
right_datasource=right_value,
|
|
190
|
+
join_type=JoinType.FULL,
|
|
191
|
+
concepts=[],
|
|
192
|
+
)
|
|
193
|
+
)
|
|
194
|
+
seen.add(left_value.identifier)
|
|
195
|
+
seen.add(right_value.identifier)
|
|
196
|
+
return joins
|
|
197
|
+
|
|
198
|
+
def generate_joins(
|
|
199
|
+
self,
|
|
200
|
+
final_datasets,
|
|
201
|
+
final_joins: List[NodeJoin] | None,
|
|
202
|
+
pregrain: BuildGrain,
|
|
203
|
+
grain: BuildGrain,
|
|
204
|
+
environment: BuildEnvironment,
|
|
205
|
+
) -> List[BaseJoin | UnnestJoin]:
|
|
206
|
+
# only finally, join between them for unique values
|
|
207
|
+
dataset_list: List[QueryDatasource | BuildDatasource] = sorted(
|
|
208
|
+
final_datasets, key=lambda x: -len(x.grain.components)
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
logger.info(
|
|
212
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} Merge node has {len(dataset_list)} parents, starting merge"
|
|
213
|
+
)
|
|
214
|
+
if final_joins is None:
|
|
215
|
+
if not pregrain.components:
|
|
216
|
+
logger.info(
|
|
217
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} no grain components, doing full join"
|
|
218
|
+
)
|
|
219
|
+
joins = self.create_full_joins(dataset_list)
|
|
220
|
+
else:
|
|
221
|
+
logger.info(
|
|
222
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} inferring node joins to target grain {str(grain)}"
|
|
223
|
+
)
|
|
224
|
+
joins = get_node_joins(dataset_list, environment=environment)
|
|
225
|
+
elif final_joins:
|
|
226
|
+
logger.info(
|
|
227
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} translating provided node joins {len(final_joins)}"
|
|
228
|
+
)
|
|
229
|
+
joins = self.translate_node_joins(final_joins)
|
|
230
|
+
else:
|
|
231
|
+
logger.info(
|
|
232
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} Final joins is not null {final_joins} but is empty, skipping join generation"
|
|
233
|
+
)
|
|
234
|
+
return []
|
|
235
|
+
return joins
|
|
236
|
+
|
|
237
|
+
def _resolve(self) -> QueryDatasource:
|
|
238
|
+
parent_sources: List[QueryDatasource | BuildDatasource] = [
|
|
239
|
+
p.resolve() for p in self.parents
|
|
240
|
+
]
|
|
241
|
+
merged: dict[str, QueryDatasource | BuildDatasource] = {}
|
|
242
|
+
final_joins: List[NodeJoin] | None = self.node_joins
|
|
243
|
+
for source in parent_sources:
|
|
244
|
+
if source.identifier in merged:
|
|
245
|
+
logger.info(
|
|
246
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} merging parent node with {source.identifier} into existing"
|
|
247
|
+
)
|
|
248
|
+
merged[source.identifier] = merged[source.identifier] + source
|
|
249
|
+
else:
|
|
250
|
+
merged[source.identifier] = source
|
|
251
|
+
|
|
252
|
+
# it's possible that we have more sources than we need
|
|
253
|
+
final_joins, merged = deduplicate_nodes_and_joins(
|
|
254
|
+
final_joins, merged, self.logging_prefix, self.environment
|
|
255
|
+
)
|
|
256
|
+
# early exit if we can just return the parent
|
|
257
|
+
final_datasets: List[QueryDatasource | BuildDatasource] = list(merged.values())
|
|
258
|
+
|
|
259
|
+
existence_final = [
|
|
260
|
+
x
|
|
261
|
+
for x in final_datasets
|
|
262
|
+
if all([y in self.existence_concepts for y in x.output_concepts])
|
|
263
|
+
]
|
|
264
|
+
if len(merged.keys()) == 1:
|
|
265
|
+
final: QueryDatasource | BuildDatasource = list(merged.values())[0]
|
|
266
|
+
if (
|
|
267
|
+
set([c.address for c in final.output_concepts])
|
|
268
|
+
== set([c.address for c in self.output_concepts])
|
|
269
|
+
and not self.conditions
|
|
270
|
+
and isinstance(final, QueryDatasource)
|
|
271
|
+
):
|
|
272
|
+
logger.info(
|
|
273
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} Merge node has only one parent with the same"
|
|
274
|
+
" outputs as this merge node, dropping merge node "
|
|
275
|
+
)
|
|
276
|
+
# push up any conditions we need
|
|
277
|
+
final.ordering = self.ordering
|
|
278
|
+
return final
|
|
279
|
+
|
|
280
|
+
# if we have multiple candidates, see if one is good enough
|
|
281
|
+
for dataset in final_datasets:
|
|
282
|
+
output_set = set(
|
|
283
|
+
[
|
|
284
|
+
c.address
|
|
285
|
+
for c in dataset.output_concepts
|
|
286
|
+
if c.address not in [x.address for x in dataset.partial_concepts]
|
|
287
|
+
]
|
|
288
|
+
)
|
|
289
|
+
if (
|
|
290
|
+
all([c.address in output_set for c in self.all_concepts])
|
|
291
|
+
and not self.conditions
|
|
292
|
+
and isinstance(dataset, QueryDatasource)
|
|
293
|
+
):
|
|
294
|
+
logger.info(
|
|
295
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} Merge node not required as parent node {dataset.source_type}"
|
|
296
|
+
f" has all required output properties with partial {[c.address for c in dataset.partial_concepts]}"
|
|
297
|
+
f" and self has no conditions ({self.conditions})"
|
|
298
|
+
)
|
|
299
|
+
dataset.ordering = self.ordering
|
|
300
|
+
return dataset
|
|
301
|
+
|
|
302
|
+
pregrain = BuildGrain()
|
|
303
|
+
|
|
304
|
+
for source in final_datasets:
|
|
305
|
+
if all(
|
|
306
|
+
[x.address in self.existence_concepts for x in source.output_concepts]
|
|
307
|
+
):
|
|
308
|
+
logger.info(
|
|
309
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} skipping existence only source with {source.output_concepts} from grain accumulation"
|
|
310
|
+
)
|
|
311
|
+
continue
|
|
312
|
+
logger.info(
|
|
313
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} adding source grain {source.grain} from source {source.identifier} to pregrain"
|
|
314
|
+
)
|
|
315
|
+
pregrain += source.grain
|
|
316
|
+
logger.info(
|
|
317
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} pregrain is now {pregrain}"
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
pregrain = BuildGrain.from_concepts(
|
|
321
|
+
pregrain.components, environment=self.environment
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
grain = self.grain if self.grain else pregrain
|
|
325
|
+
logger.info(
|
|
326
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} has pre grain {pregrain} and final merge node grain {grain}"
|
|
327
|
+
)
|
|
328
|
+
join_candidates = [x for x in final_datasets if x not in existence_final]
|
|
329
|
+
if len(join_candidates) > 1:
|
|
330
|
+
joins: List[BaseJoin | UnnestJoin] = self.generate_joins(
|
|
331
|
+
join_candidates, final_joins, pregrain, grain, self.environment
|
|
332
|
+
)
|
|
333
|
+
else:
|
|
334
|
+
joins = []
|
|
335
|
+
|
|
336
|
+
logger.info(
|
|
337
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} Final join count for CTE parent count {len(join_candidates)} is {len(joins)}"
|
|
338
|
+
)
|
|
339
|
+
full_join_concepts = []
|
|
340
|
+
for join in joins:
|
|
341
|
+
if isinstance(join, BaseJoin) and join.join_type == JoinType.FULL:
|
|
342
|
+
full_join_concepts += join.input_concepts
|
|
343
|
+
|
|
344
|
+
if self.force_group is True:
|
|
345
|
+
|
|
346
|
+
force_group = True
|
|
347
|
+
elif self.whole_grain:
|
|
348
|
+
force_group = False
|
|
349
|
+
elif self.force_group is False:
|
|
350
|
+
force_group = False
|
|
351
|
+
elif not any(
|
|
352
|
+
[d.grain.issubset(grain) for d in final_datasets]
|
|
353
|
+
) and not pregrain.issubset(grain):
|
|
354
|
+
logger.info(
|
|
355
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} no parents include full grain {grain} and pregrain {pregrain} does not match, assume must group to grain. Have {[str(d.grain) for d in final_datasets]}"
|
|
356
|
+
)
|
|
357
|
+
force_group = True
|
|
358
|
+
else:
|
|
359
|
+
force_group = None
|
|
360
|
+
|
|
361
|
+
qd_joins: List[BaseJoin | UnnestJoin] = [*joins]
|
|
362
|
+
|
|
363
|
+
source_map = resolve_concept_map(
|
|
364
|
+
final_datasets,
|
|
365
|
+
targets=self.output_concepts,
|
|
366
|
+
inherited_inputs=self.input_concepts + self.existence_concepts,
|
|
367
|
+
full_joins=full_join_concepts,
|
|
368
|
+
)
|
|
369
|
+
nullable_concepts = find_nullable_concepts(
|
|
370
|
+
source_map=source_map, joins=joins, datasources=final_datasets
|
|
371
|
+
)
|
|
372
|
+
if force_group:
|
|
373
|
+
|
|
374
|
+
grain = BuildGrain.from_concepts(
|
|
375
|
+
self.output_concepts, environment=self.environment
|
|
376
|
+
)
|
|
377
|
+
logger.info(
|
|
378
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} forcing group by to achieve grain {grain}"
|
|
379
|
+
)
|
|
380
|
+
qds = QueryDatasource(
|
|
381
|
+
input_concepts=unique(self.input_concepts, "address"),
|
|
382
|
+
output_concepts=unique(self.output_concepts, "address"),
|
|
383
|
+
datasources=final_datasets,
|
|
384
|
+
source_type=self.source_type,
|
|
385
|
+
source_map=source_map,
|
|
386
|
+
joins=qd_joins,
|
|
387
|
+
grain=grain,
|
|
388
|
+
nullable_concepts=[
|
|
389
|
+
x for x in self.output_concepts if x.address in nullable_concepts
|
|
390
|
+
],
|
|
391
|
+
partial_concepts=self.partial_concepts,
|
|
392
|
+
force_group=force_group,
|
|
393
|
+
condition=self.conditions,
|
|
394
|
+
hidden_concepts=self.hidden_concepts,
|
|
395
|
+
ordering=self.ordering,
|
|
396
|
+
)
|
|
397
|
+
return qds
|
|
398
|
+
|
|
399
|
+
def copy(self) -> "MergeNode":
|
|
400
|
+
return MergeNode(
|
|
401
|
+
input_concepts=list(self.input_concepts),
|
|
402
|
+
output_concepts=list(self.output_concepts),
|
|
403
|
+
environment=self.environment,
|
|
404
|
+
whole_grain=self.whole_grain,
|
|
405
|
+
parents=self.parents,
|
|
406
|
+
depth=self.depth,
|
|
407
|
+
partial_concepts=list(self.partial_concepts),
|
|
408
|
+
force_group=self.force_group,
|
|
409
|
+
grain=self.grain,
|
|
410
|
+
conditions=self.conditions,
|
|
411
|
+
preexisting_conditions=self.preexisting_conditions,
|
|
412
|
+
nullable_concepts=list(self.nullable_concepts),
|
|
413
|
+
hidden_concepts=set(self.hidden_concepts),
|
|
414
|
+
virtual_output_concepts=list(self.virtual_output_concepts),
|
|
415
|
+
node_joins=list(self.node_joins) if self.node_joins else None,
|
|
416
|
+
join_concepts=list(self.join_concepts) if self.join_concepts else None,
|
|
417
|
+
force_join_type=self.force_join_type,
|
|
418
|
+
existence_concepts=list(self.existence_concepts),
|
|
419
|
+
ordering=self.ordering,
|
|
420
|
+
)
|