pytrilogy 0.3.149__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cp313-win_amd64.pyd +0 -0
- pytrilogy-0.3.149.dist-info/METADATA +555 -0
- pytrilogy-0.3.149.dist-info/RECORD +207 -0
- pytrilogy-0.3.149.dist-info/WHEEL +4 -0
- pytrilogy-0.3.149.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.149.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +27 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +119 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +454 -0
- trilogy/core/env_processor.py +239 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1240 -0
- trilogy/core/graph_models.py +142 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2670 -0
- trilogy/core/models/build.py +2603 -0
- trilogy/core/models/build_environment.py +165 -0
- trilogy/core/models/core.py +506 -0
- trilogy/core/models/datasource.py +436 -0
- trilogy/core/models/environment.py +756 -0
- trilogy/core/models/execute.py +1213 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +270 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +207 -0
- trilogy/core/processing/node_generators/node_merge_node.py +695 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +846 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +522 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +604 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1432 -0
- trilogy/dialect/bigquery.py +314 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +159 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +397 -0
- trilogy/dialect/enums.py +151 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/__init__.py +17 -0
- trilogy/execution/config.py +119 -0
- trilogy/execution/state/__init__.py +0 -0
- trilogy/execution/state/exceptions.py +26 -0
- trilogy/execution/state/file_state_store.py +0 -0
- trilogy/execution/state/sqllite_state_store.py +0 -0
- trilogy/execution/state/state_store.py +406 -0
- trilogy/executor.py +692 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +135 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2876 -0
- trilogy/parsing/render.py +775 -0
- trilogy/parsing/trilogy.lark +546 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +45 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +306 -0
- trilogy/scripts/common.py +432 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +311 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +555 -0
- trilogy/scripts/environment.py +59 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +487 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +762 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/refresh.py +161 -0
- trilogy/scripts/run.py +79 -0
- trilogy/scripts/serve.py +202 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +143 -0
- trilogy/scripts/trilogy.py +75 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
2
|
+
|
|
3
|
+
from trilogy.core.exceptions import UnresolvableQueryException
|
|
4
|
+
from trilogy.core.models.author import Concept
|
|
5
|
+
from trilogy.core.models.build import BuildConcept, BuildWhereClause
|
|
6
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
7
|
+
from trilogy.core.models.environment import Environment
|
|
8
|
+
|
|
9
|
+
from .base_node import NodeJoin, StrategyNode, WhereSafetyNode
|
|
10
|
+
from .filter_node import FilterNode
|
|
11
|
+
from .group_node import GroupNode
|
|
12
|
+
from .merge_node import MergeNode
|
|
13
|
+
from .recursive_node import RecursiveNode
|
|
14
|
+
from .select_node_v2 import ConstantNode, SelectNode
|
|
15
|
+
from .union_node import UnionNode
|
|
16
|
+
from .unnest_node import UnnestNode
|
|
17
|
+
from .window_node import WindowNode
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class History(BaseModel):
|
|
21
|
+
base_environment: Environment
|
|
22
|
+
local_base_concepts: dict[str, Concept] = Field(default_factory=dict)
|
|
23
|
+
history: dict[str, StrategyNode | None] = Field(default_factory=dict)
|
|
24
|
+
select_history: dict[str, StrategyNode | None] = Field(default_factory=dict)
|
|
25
|
+
started: dict[str, int] = Field(default_factory=dict)
|
|
26
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
27
|
+
|
|
28
|
+
def _concepts_to_lookup(
|
|
29
|
+
self,
|
|
30
|
+
search: list[BuildConcept],
|
|
31
|
+
accept_partial: bool,
|
|
32
|
+
conditions: BuildWhereClause | None = None,
|
|
33
|
+
) -> str:
|
|
34
|
+
base = sorted([c.address for c in search])
|
|
35
|
+
if conditions:
|
|
36
|
+
return "-".join(base) + str(accept_partial) + str(conditions)
|
|
37
|
+
return "-".join(base) + str(accept_partial)
|
|
38
|
+
|
|
39
|
+
def search_to_history(
|
|
40
|
+
self,
|
|
41
|
+
search: list[BuildConcept],
|
|
42
|
+
accept_partial: bool,
|
|
43
|
+
output: StrategyNode | None,
|
|
44
|
+
conditions: BuildWhereClause | None = None,
|
|
45
|
+
):
|
|
46
|
+
self.history[
|
|
47
|
+
self._concepts_to_lookup(search, accept_partial, conditions=conditions)
|
|
48
|
+
] = output
|
|
49
|
+
self.log_end(
|
|
50
|
+
search,
|
|
51
|
+
accept_partial=accept_partial,
|
|
52
|
+
conditions=conditions,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def get_history(
|
|
56
|
+
self,
|
|
57
|
+
search: list[BuildConcept],
|
|
58
|
+
conditions: BuildWhereClause | None = None,
|
|
59
|
+
accept_partial: bool = False,
|
|
60
|
+
parent_key: str = "",
|
|
61
|
+
) -> StrategyNode | None | bool:
|
|
62
|
+
key = self._concepts_to_lookup(
|
|
63
|
+
search,
|
|
64
|
+
accept_partial,
|
|
65
|
+
conditions,
|
|
66
|
+
)
|
|
67
|
+
if parent_key and parent_key == key:
|
|
68
|
+
raise ValueError(
|
|
69
|
+
f"Parent key {parent_key} is the same as the current key {key}"
|
|
70
|
+
)
|
|
71
|
+
if key in self.history:
|
|
72
|
+
node = self.history[key]
|
|
73
|
+
if node:
|
|
74
|
+
return node.copy()
|
|
75
|
+
return node
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
def log_start(
|
|
79
|
+
self,
|
|
80
|
+
search: list[BuildConcept],
|
|
81
|
+
accept_partial: bool = False,
|
|
82
|
+
conditions: BuildWhereClause | None = None,
|
|
83
|
+
):
|
|
84
|
+
key = self._concepts_to_lookup(
|
|
85
|
+
search,
|
|
86
|
+
accept_partial=accept_partial,
|
|
87
|
+
conditions=conditions,
|
|
88
|
+
)
|
|
89
|
+
if key in self.started:
|
|
90
|
+
self.started[key] += 1
|
|
91
|
+
else:
|
|
92
|
+
self.started[key] = 1
|
|
93
|
+
if self.started[key] > 5:
|
|
94
|
+
raise UnresolvableQueryException(
|
|
95
|
+
f"Was unable to resolve datasources to serve this query from model; unresolvable set was {search}. You may be querying unrelated concepts."
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def log_end(
|
|
99
|
+
self,
|
|
100
|
+
search: list[BuildConcept],
|
|
101
|
+
accept_partial: bool = False,
|
|
102
|
+
conditions: BuildWhereClause | None = None,
|
|
103
|
+
):
|
|
104
|
+
key = self._concepts_to_lookup(
|
|
105
|
+
search,
|
|
106
|
+
accept_partial=accept_partial,
|
|
107
|
+
conditions=conditions,
|
|
108
|
+
)
|
|
109
|
+
if key in self.started:
|
|
110
|
+
del self.started[key]
|
|
111
|
+
|
|
112
|
+
def check_started(
|
|
113
|
+
self,
|
|
114
|
+
search: list[BuildConcept],
|
|
115
|
+
accept_partial: bool = False,
|
|
116
|
+
conditions: BuildWhereClause | None = None,
|
|
117
|
+
):
|
|
118
|
+
return (
|
|
119
|
+
self._concepts_to_lookup(
|
|
120
|
+
search,
|
|
121
|
+
accept_partial,
|
|
122
|
+
conditions=conditions,
|
|
123
|
+
)
|
|
124
|
+
in self.started
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def gen_select_node(
|
|
128
|
+
self,
|
|
129
|
+
concepts: list[BuildConcept],
|
|
130
|
+
environment: BuildEnvironment,
|
|
131
|
+
g,
|
|
132
|
+
depth: int,
|
|
133
|
+
fail_if_not_found: bool = False,
|
|
134
|
+
accept_partial: bool = False,
|
|
135
|
+
conditions: BuildWhereClause | None = None,
|
|
136
|
+
) -> StrategyNode | None:
|
|
137
|
+
from trilogy.core.processing.node_generators.select_node import gen_select_node
|
|
138
|
+
|
|
139
|
+
fingerprint = self._concepts_to_lookup(
|
|
140
|
+
concepts,
|
|
141
|
+
accept_partial,
|
|
142
|
+
conditions=conditions,
|
|
143
|
+
)
|
|
144
|
+
if fingerprint in self.select_history:
|
|
145
|
+
rval = self.select_history[fingerprint]
|
|
146
|
+
if rval:
|
|
147
|
+
# all nodes must be copied before returning
|
|
148
|
+
return rval.copy()
|
|
149
|
+
return rval
|
|
150
|
+
gen = gen_select_node(
|
|
151
|
+
concepts,
|
|
152
|
+
environment,
|
|
153
|
+
g,
|
|
154
|
+
depth + 1,
|
|
155
|
+
fail_if_not_found=fail_if_not_found,
|
|
156
|
+
accept_partial=accept_partial,
|
|
157
|
+
conditions=conditions,
|
|
158
|
+
)
|
|
159
|
+
self.select_history[fingerprint] = gen
|
|
160
|
+
if gen:
|
|
161
|
+
return gen.copy()
|
|
162
|
+
return gen
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
__all__ = [
|
|
166
|
+
"FilterNode",
|
|
167
|
+
"GroupNode",
|
|
168
|
+
"MergeNode",
|
|
169
|
+
"SelectNode",
|
|
170
|
+
"WindowNode",
|
|
171
|
+
"StrategyNode",
|
|
172
|
+
"NodeJoin",
|
|
173
|
+
"UnnestNode",
|
|
174
|
+
"ConstantNode",
|
|
175
|
+
"UnionNode",
|
|
176
|
+
"History",
|
|
177
|
+
"WhereSafetyNode",
|
|
178
|
+
"RecursiveNode",
|
|
179
|
+
]
|
|
@@ -0,0 +1,522 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from trilogy.core.enums import (
|
|
6
|
+
BooleanOperator,
|
|
7
|
+
Derivation,
|
|
8
|
+
JoinType,
|
|
9
|
+
Modifier,
|
|
10
|
+
SourceType,
|
|
11
|
+
)
|
|
12
|
+
from trilogy.core.models.build import (
|
|
13
|
+
BuildComparison,
|
|
14
|
+
BuildConcept,
|
|
15
|
+
BuildConditional,
|
|
16
|
+
BuildDatasource,
|
|
17
|
+
BuildGrain,
|
|
18
|
+
BuildOrderBy,
|
|
19
|
+
BuildParenthetical,
|
|
20
|
+
LooseBuildConceptList,
|
|
21
|
+
)
|
|
22
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
23
|
+
from trilogy.core.models.execute import ConceptPair, QueryDatasource, UnnestJoin
|
|
24
|
+
from trilogy.utility import unique
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def resolve_concept_map(
|
|
28
|
+
inputs: List[QueryDatasource | BuildDatasource],
|
|
29
|
+
targets: List[BuildConcept],
|
|
30
|
+
inherited_inputs: List[BuildConcept],
|
|
31
|
+
full_joins: List[BuildConcept] | None = None,
|
|
32
|
+
) -> dict[str, set[BuildDatasource | QueryDatasource | UnnestJoin]]:
|
|
33
|
+
|
|
34
|
+
targets = targets or []
|
|
35
|
+
concept_map: dict[str, set[BuildDatasource | QueryDatasource | UnnestJoin]] = (
|
|
36
|
+
defaultdict(set)
|
|
37
|
+
)
|
|
38
|
+
full_addresses = {c.address for c in full_joins} if full_joins else set()
|
|
39
|
+
inherited = set([t.address for t in inherited_inputs])
|
|
40
|
+
for input in inputs:
|
|
41
|
+
for concept in input.output_concepts:
|
|
42
|
+
if concept.address not in input.full_concepts:
|
|
43
|
+
continue
|
|
44
|
+
if (
|
|
45
|
+
isinstance(input, QueryDatasource)
|
|
46
|
+
and concept.address in input.hidden_concepts
|
|
47
|
+
):
|
|
48
|
+
continue
|
|
49
|
+
if concept.address in full_addresses:
|
|
50
|
+
concept_map[concept.address].add(input)
|
|
51
|
+
elif concept.address not in concept_map:
|
|
52
|
+
# equi_targets = [x for x in targets if concept.address in x.pseudonyms or x.address in concept.pseudonyms]
|
|
53
|
+
# if equi_targets:
|
|
54
|
+
# for equi in equi_targets:
|
|
55
|
+
# concept_map[equi.address] = set()
|
|
56
|
+
concept_map[concept.address].add(input)
|
|
57
|
+
|
|
58
|
+
# second loop, include partials
|
|
59
|
+
for input in inputs:
|
|
60
|
+
for concept in input.output_concepts:
|
|
61
|
+
if concept.address not in inherited and not (
|
|
62
|
+
concept.pseudonyms and any(s in inherited for s in concept.pseudonyms)
|
|
63
|
+
):
|
|
64
|
+
continue
|
|
65
|
+
if (
|
|
66
|
+
isinstance(input, QueryDatasource)
|
|
67
|
+
and concept.address in input.hidden_concepts
|
|
68
|
+
):
|
|
69
|
+
continue
|
|
70
|
+
if len(concept_map.get(concept.address, [])) == 0:
|
|
71
|
+
concept_map[concept.address].add(input)
|
|
72
|
+
# this adds our new derived metrics, which are not created in this CTE
|
|
73
|
+
for target in targets:
|
|
74
|
+
if target.address not in inherited:
|
|
75
|
+
# an empty source means it is defined in this CTE
|
|
76
|
+
concept_map[target.address] = set()
|
|
77
|
+
return concept_map
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_all_parent_partial(
|
|
81
|
+
all_concepts: List[BuildConcept], parents: List["StrategyNode"]
|
|
82
|
+
) -> List[BuildConcept]:
|
|
83
|
+
return unique(
|
|
84
|
+
[
|
|
85
|
+
c
|
|
86
|
+
for c in all_concepts
|
|
87
|
+
if len(
|
|
88
|
+
[
|
|
89
|
+
p
|
|
90
|
+
for p in parents
|
|
91
|
+
if c.address in [x.address for x in p.partial_concepts]
|
|
92
|
+
]
|
|
93
|
+
)
|
|
94
|
+
>= 1
|
|
95
|
+
and all(
|
|
96
|
+
[
|
|
97
|
+
c.address in p.partial_lcl
|
|
98
|
+
for p in parents
|
|
99
|
+
if c.address in p.output_lcl
|
|
100
|
+
]
|
|
101
|
+
)
|
|
102
|
+
],
|
|
103
|
+
"address",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_all_parent_nullable(
|
|
108
|
+
all_concepts: List[BuildConcept], parents: List["StrategyNode"]
|
|
109
|
+
) -> List[BuildConcept]:
|
|
110
|
+
for x in parents:
|
|
111
|
+
if not x:
|
|
112
|
+
raise ValueError(parents)
|
|
113
|
+
return unique(
|
|
114
|
+
[
|
|
115
|
+
c
|
|
116
|
+
for c in all_concepts
|
|
117
|
+
if len(
|
|
118
|
+
[
|
|
119
|
+
p
|
|
120
|
+
for p in parents
|
|
121
|
+
if c.address in [x.address for x in p.nullable_concepts]
|
|
122
|
+
]
|
|
123
|
+
)
|
|
124
|
+
>= 1
|
|
125
|
+
],
|
|
126
|
+
"address",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class StrategyNode:
|
|
131
|
+
source_type = SourceType.ABSTRACT
|
|
132
|
+
|
|
133
|
+
def __init__(
|
|
134
|
+
self,
|
|
135
|
+
input_concepts: List[BuildConcept],
|
|
136
|
+
output_concepts: List[BuildConcept],
|
|
137
|
+
environment: BuildEnvironment,
|
|
138
|
+
whole_grain: bool = False,
|
|
139
|
+
parents: List["StrategyNode"] | None = None,
|
|
140
|
+
partial_concepts: List[BuildConcept] | None = None,
|
|
141
|
+
nullable_concepts: List[BuildConcept] | None = None,
|
|
142
|
+
depth: int = 0,
|
|
143
|
+
conditions: (
|
|
144
|
+
BuildConditional | BuildComparison | BuildParenthetical | None
|
|
145
|
+
) = None,
|
|
146
|
+
preexisting_conditions: (
|
|
147
|
+
BuildConditional | BuildComparison | BuildParenthetical | None
|
|
148
|
+
) = None,
|
|
149
|
+
force_group: bool | None = None,
|
|
150
|
+
grain: Optional[BuildGrain] = None,
|
|
151
|
+
hidden_concepts: set[str] | None = None,
|
|
152
|
+
existence_concepts: List[BuildConcept] | None = None,
|
|
153
|
+
virtual_output_concepts: List[BuildConcept] | None = None,
|
|
154
|
+
ordering: BuildOrderBy | None = None,
|
|
155
|
+
):
|
|
156
|
+
self.input_concepts: List[BuildConcept] = (
|
|
157
|
+
unique(input_concepts, "address") if input_concepts else []
|
|
158
|
+
)
|
|
159
|
+
self.input_lcl = LooseBuildConceptList(concepts=self.input_concepts)
|
|
160
|
+
self.output_concepts: List[BuildConcept] = unique(output_concepts, "address")
|
|
161
|
+
self.output_lcl = LooseBuildConceptList(concepts=self.output_concepts)
|
|
162
|
+
|
|
163
|
+
self.environment = environment
|
|
164
|
+
self.whole_grain = whole_grain
|
|
165
|
+
self.parents = parents or []
|
|
166
|
+
self.resolution_cache: Optional[QueryDatasource] = None
|
|
167
|
+
|
|
168
|
+
self.nullable_concepts = nullable_concepts or get_all_parent_nullable(
|
|
169
|
+
self.output_concepts, self.parents
|
|
170
|
+
)
|
|
171
|
+
self.ordering = ordering
|
|
172
|
+
self.depth = depth
|
|
173
|
+
self.conditions = conditions
|
|
174
|
+
self.grain = grain
|
|
175
|
+
self.force_group = force_group
|
|
176
|
+
self.tainted = False
|
|
177
|
+
self.hidden_concepts = hidden_concepts or set()
|
|
178
|
+
self.existence_concepts = existence_concepts or []
|
|
179
|
+
self.virtual_output_concepts = virtual_output_concepts or []
|
|
180
|
+
self.preexisting_conditions = preexisting_conditions
|
|
181
|
+
if self.conditions and not self.preexisting_conditions:
|
|
182
|
+
self.preexisting_conditions = self.conditions
|
|
183
|
+
elif (
|
|
184
|
+
self.conditions
|
|
185
|
+
and self.preexisting_conditions
|
|
186
|
+
and self.conditions != self.preexisting_conditions
|
|
187
|
+
):
|
|
188
|
+
self.preexisting_conditions = BuildConditional(
|
|
189
|
+
left=self.conditions,
|
|
190
|
+
right=self.preexisting_conditions,
|
|
191
|
+
operator=BooleanOperator.AND,
|
|
192
|
+
)
|
|
193
|
+
self.partial_concepts: list[BuildConcept] = self.derive_partials(
|
|
194
|
+
partial_concepts
|
|
195
|
+
)
|
|
196
|
+
self.validate_inputs()
|
|
197
|
+
self.log = True
|
|
198
|
+
|
|
199
|
+
def validate_inputs(self):
|
|
200
|
+
if not self.parents:
|
|
201
|
+
return
|
|
202
|
+
non_hidden = set()
|
|
203
|
+
hidden = set()
|
|
204
|
+
usable_outputs = set()
|
|
205
|
+
for x in self.parents:
|
|
206
|
+
for z in x.usable_outputs:
|
|
207
|
+
usable_outputs.add(z.address)
|
|
208
|
+
non_hidden.add(z.address)
|
|
209
|
+
for psd in z.pseudonyms:
|
|
210
|
+
non_hidden.add(psd)
|
|
211
|
+
for z in x.hidden_concepts:
|
|
212
|
+
hidden.add(z)
|
|
213
|
+
if not all([x.address in non_hidden for x in self.input_concepts]):
|
|
214
|
+
missing = [x for x in self.input_concepts if x.address not in non_hidden]
|
|
215
|
+
raise ValueError(
|
|
216
|
+
f"Invalid input concepts; {missing} are missing non-hidden parent nodes; have {non_hidden} and hidden {hidden} from root {usable_outputs}"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
def add_parents(self, parents: list["StrategyNode"]):
|
|
220
|
+
self.parents += parents
|
|
221
|
+
self.partial_concepts = self.derive_partials(None)
|
|
222
|
+
return self
|
|
223
|
+
|
|
224
|
+
def set_preexisting_conditions(
|
|
225
|
+
self, conditions: BuildConditional | BuildComparison | BuildParenthetical
|
|
226
|
+
):
|
|
227
|
+
self.preexisting_conditions = conditions
|
|
228
|
+
return self
|
|
229
|
+
|
|
230
|
+
def add_condition(
|
|
231
|
+
self, condition: BuildConditional | BuildComparison | BuildParenthetical
|
|
232
|
+
):
|
|
233
|
+
if self.conditions and condition == self.conditions:
|
|
234
|
+
return self
|
|
235
|
+
if self.conditions:
|
|
236
|
+
self.conditions = BuildConditional(
|
|
237
|
+
left=self.conditions, right=condition, operator=BooleanOperator.AND
|
|
238
|
+
)
|
|
239
|
+
else:
|
|
240
|
+
self.conditions = condition
|
|
241
|
+
self.set_preexisting_conditions(condition)
|
|
242
|
+
self.rebuild_cache()
|
|
243
|
+
return self
|
|
244
|
+
|
|
245
|
+
def derive_partials(
|
|
246
|
+
self, partial_concepts: List[BuildConcept] | None = None
|
|
247
|
+
) -> List[BuildConcept]:
|
|
248
|
+
# validate parents exist
|
|
249
|
+
# assign partial values where needed
|
|
250
|
+
for parent in self.parents:
|
|
251
|
+
if not parent:
|
|
252
|
+
raise SyntaxError("Unresolvable parent")
|
|
253
|
+
|
|
254
|
+
# TODO: make this accurate
|
|
255
|
+
if self.parents and partial_concepts is None:
|
|
256
|
+
partials = get_all_parent_partial(self.output_concepts, self.parents)
|
|
257
|
+
elif partial_concepts is None:
|
|
258
|
+
partials = []
|
|
259
|
+
else:
|
|
260
|
+
partials = partial_concepts
|
|
261
|
+
self.partial_lcl = LooseBuildConceptList(concepts=partials)
|
|
262
|
+
return partials
|
|
263
|
+
|
|
264
|
+
def add_output_concepts(
|
|
265
|
+
self, concepts: List[BuildConcept], rebuild: bool = True, unhide: bool = True
|
|
266
|
+
):
|
|
267
|
+
for concept in concepts:
|
|
268
|
+
if concept.address not in self.output_lcl.addresses:
|
|
269
|
+
self.output_concepts.append(concept)
|
|
270
|
+
if unhide and concept.address in self.hidden_concepts:
|
|
271
|
+
self.hidden_concepts.remove(concept.address)
|
|
272
|
+
self.output_lcl = LooseBuildConceptList(concepts=self.output_concepts)
|
|
273
|
+
if rebuild:
|
|
274
|
+
self.rebuild_cache()
|
|
275
|
+
return self
|
|
276
|
+
|
|
277
|
+
def add_partial_concepts(self, concepts: List[BuildConcept], rebuild: bool = True):
|
|
278
|
+
for concept in concepts:
|
|
279
|
+
if concept.address not in self.partial_lcl.addresses:
|
|
280
|
+
self.partial_concepts.append(concept)
|
|
281
|
+
self.partial_lcl = LooseBuildConceptList(concepts=self.partial_concepts)
|
|
282
|
+
if rebuild:
|
|
283
|
+
self.rebuild_cache()
|
|
284
|
+
return self
|
|
285
|
+
|
|
286
|
+
def add_existence_concepts(
|
|
287
|
+
self, concepts: List[BuildConcept], rebuild: bool = True
|
|
288
|
+
):
|
|
289
|
+
for concept in concepts:
|
|
290
|
+
if concept.address not in self.output_concepts:
|
|
291
|
+
self.existence_concepts.append(concept)
|
|
292
|
+
if rebuild:
|
|
293
|
+
self.rebuild_cache()
|
|
294
|
+
return self
|
|
295
|
+
|
|
296
|
+
def set_visible_concepts(self, concepts: List[BuildConcept]):
|
|
297
|
+
for x in self.output_concepts:
|
|
298
|
+
if x.address not in [c.address for c in concepts]:
|
|
299
|
+
self.hidden_concepts.add(x.address)
|
|
300
|
+
return self
|
|
301
|
+
|
|
302
|
+
def set_output_concepts(
|
|
303
|
+
self,
|
|
304
|
+
concepts: List[BuildConcept],
|
|
305
|
+
rebuild: bool = True,
|
|
306
|
+
change_visibility: bool = True,
|
|
307
|
+
):
|
|
308
|
+
# exit if no changes
|
|
309
|
+
if self.output_concepts == concepts:
|
|
310
|
+
return self
|
|
311
|
+
self.output_concepts = concepts
|
|
312
|
+
if self.hidden_concepts and change_visibility:
|
|
313
|
+
self.hidden_concepts = set(
|
|
314
|
+
x for x in self.hidden_concepts if x not in concepts
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
self.output_lcl = LooseBuildConceptList(concepts=self.output_concepts)
|
|
318
|
+
|
|
319
|
+
if rebuild:
|
|
320
|
+
self.rebuild_cache()
|
|
321
|
+
return self
|
|
322
|
+
|
|
323
|
+
def add_output_concept(self, concept: BuildConcept, rebuild: bool = True):
|
|
324
|
+
return self.add_output_concepts([concept], rebuild)
|
|
325
|
+
|
|
326
|
+
def hide_output_concepts(
|
|
327
|
+
self, concepts: List[BuildConcept] | list[str] | set[str], rebuild: bool = True
|
|
328
|
+
):
|
|
329
|
+
for x in concepts:
|
|
330
|
+
if isinstance(x, BuildConcept):
|
|
331
|
+
self.hidden_concepts.add(x.address)
|
|
332
|
+
else:
|
|
333
|
+
self.hidden_concepts.add(x)
|
|
334
|
+
if rebuild:
|
|
335
|
+
self.rebuild_cache()
|
|
336
|
+
return self
|
|
337
|
+
|
|
338
|
+
def unhide_output_concepts(
|
|
339
|
+
self, concepts: List[BuildConcept], rebuild: bool = True
|
|
340
|
+
):
|
|
341
|
+
self.hidden_concepts = set(x for x in self.hidden_concepts if x not in concepts)
|
|
342
|
+
if rebuild:
|
|
343
|
+
self.rebuild_cache()
|
|
344
|
+
return self
|
|
345
|
+
|
|
346
|
+
@property
|
|
347
|
+
def usable_outputs(self) -> list[BuildConcept]:
|
|
348
|
+
return [
|
|
349
|
+
x for x in self.output_concepts if x.address not in self.hidden_concepts
|
|
350
|
+
]
|
|
351
|
+
|
|
352
|
+
@property
|
|
353
|
+
def logging_prefix(self) -> str:
|
|
354
|
+
return "\t" * self.depth
|
|
355
|
+
|
|
356
|
+
@property
|
|
357
|
+
def all_concepts(self) -> list[BuildConcept]:
|
|
358
|
+
return [*self.output_concepts]
|
|
359
|
+
|
|
360
|
+
@property
|
|
361
|
+
def all_used_concepts(self) -> list[BuildConcept]:
|
|
362
|
+
return [*self.input_concepts, *self.existence_concepts]
|
|
363
|
+
|
|
364
|
+
def __repr__(self):
|
|
365
|
+
concepts = self.all_concepts
|
|
366
|
+
addresses = [c.address for c in concepts]
|
|
367
|
+
contents = ",".join(sorted(addresses[:3]))
|
|
368
|
+
if len(addresses) > 3:
|
|
369
|
+
extra = len(addresses) - 3
|
|
370
|
+
contents += f"...{extra} more"
|
|
371
|
+
return f"{self.__class__.__name__}<{contents}>"
|
|
372
|
+
|
|
373
|
+
def _resolve(self) -> QueryDatasource:
|
|
374
|
+
parent_sources: List[QueryDatasource | BuildDatasource] = [
|
|
375
|
+
p.resolve() for p in self.parents
|
|
376
|
+
]
|
|
377
|
+
|
|
378
|
+
grain = (
|
|
379
|
+
self.grain if self.grain else BuildGrain.from_concepts(self.output_concepts)
|
|
380
|
+
)
|
|
381
|
+
source_map = resolve_concept_map(
|
|
382
|
+
parent_sources,
|
|
383
|
+
targets=self.output_concepts,
|
|
384
|
+
inherited_inputs=self.input_concepts + self.existence_concepts,
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
return QueryDatasource(
|
|
388
|
+
input_concepts=self.input_concepts,
|
|
389
|
+
output_concepts=self.output_concepts,
|
|
390
|
+
datasources=parent_sources,
|
|
391
|
+
source_type=self.source_type,
|
|
392
|
+
source_map=source_map,
|
|
393
|
+
joins=[],
|
|
394
|
+
grain=grain,
|
|
395
|
+
condition=self.conditions,
|
|
396
|
+
partial_concepts=self.partial_concepts,
|
|
397
|
+
nullable_concepts=self.nullable_concepts,
|
|
398
|
+
force_group=self.force_group,
|
|
399
|
+
hidden_concepts=self.hidden_concepts,
|
|
400
|
+
ordering=self.ordering,
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
def rebuild_cache(self) -> QueryDatasource:
|
|
404
|
+
self.tainted = True
|
|
405
|
+
self.output_lcl = LooseBuildConceptList(concepts=self.output_concepts)
|
|
406
|
+
if not self.resolution_cache:
|
|
407
|
+
return self.resolve()
|
|
408
|
+
self.resolution_cache = None
|
|
409
|
+
return self.resolve()
|
|
410
|
+
|
|
411
|
+
def resolve(self) -> QueryDatasource:
|
|
412
|
+
if self.resolution_cache:
|
|
413
|
+
return self.resolution_cache
|
|
414
|
+
qds = self._resolve()
|
|
415
|
+
self.resolution_cache = qds
|
|
416
|
+
return qds
|
|
417
|
+
|
|
418
|
+
def copy(self) -> "StrategyNode":
|
|
419
|
+
return self.__class__(
|
|
420
|
+
input_concepts=list(self.input_concepts),
|
|
421
|
+
output_concepts=list(self.output_concepts),
|
|
422
|
+
environment=self.environment,
|
|
423
|
+
whole_grain=self.whole_grain,
|
|
424
|
+
parents=list(self.parents),
|
|
425
|
+
partial_concepts=list(self.partial_concepts),
|
|
426
|
+
nullable_concepts=list(self.nullable_concepts),
|
|
427
|
+
depth=self.depth,
|
|
428
|
+
conditions=self.conditions,
|
|
429
|
+
preexisting_conditions=self.preexisting_conditions,
|
|
430
|
+
force_group=self.force_group,
|
|
431
|
+
grain=self.grain,
|
|
432
|
+
hidden_concepts=set(self.hidden_concepts),
|
|
433
|
+
existence_concepts=list(self.existence_concepts),
|
|
434
|
+
virtual_output_concepts=list(self.virtual_output_concepts),
|
|
435
|
+
ordering=self.ordering,
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
@dataclass
|
|
440
|
+
class NodeJoin:
|
|
441
|
+
left_node: StrategyNode
|
|
442
|
+
right_node: StrategyNode
|
|
443
|
+
concepts: List[BuildConcept]
|
|
444
|
+
join_type: JoinType
|
|
445
|
+
filter_to_mutual: bool = False
|
|
446
|
+
concept_pairs: list[ConceptPair] | None = None
|
|
447
|
+
modifiers: List[Modifier] = field(default_factory=list)
|
|
448
|
+
|
|
449
|
+
def __post_init__(self):
|
|
450
|
+
if self.left_node == self.right_node:
|
|
451
|
+
raise SyntaxError("Invalid join, left and right nodes are the same")
|
|
452
|
+
if self.concept_pairs:
|
|
453
|
+
return
|
|
454
|
+
final_concepts = []
|
|
455
|
+
for concept in self.concepts:
|
|
456
|
+
include = True
|
|
457
|
+
for ds in [self.left_node, self.right_node]:
|
|
458
|
+
if concept.address not in [c.address for c in ds.all_concepts]:
|
|
459
|
+
if self.filter_to_mutual:
|
|
460
|
+
include = False
|
|
461
|
+
else:
|
|
462
|
+
raise SyntaxError(
|
|
463
|
+
f"Invalid join, missing {concept} on {str(ds)}, have"
|
|
464
|
+
f" {[c.address for c in ds.all_concepts]}"
|
|
465
|
+
)
|
|
466
|
+
if include:
|
|
467
|
+
final_concepts.append(concept)
|
|
468
|
+
if not final_concepts and self.concepts:
|
|
469
|
+
# if one datasource only has constants
|
|
470
|
+
# we can join on 1=1
|
|
471
|
+
for ds in [self.left_node, self.right_node]:
|
|
472
|
+
if all([c.derivation == Derivation.CONSTANT for c in ds.all_concepts]):
|
|
473
|
+
self.concepts = []
|
|
474
|
+
return
|
|
475
|
+
|
|
476
|
+
left_keys = [c.address for c in self.left_node.all_concepts]
|
|
477
|
+
right_keys = [c.address for c in self.right_node.all_concepts]
|
|
478
|
+
match_concepts = [c.address for c in self.concepts]
|
|
479
|
+
raise SyntaxError(
|
|
480
|
+
"No mutual join keys found between"
|
|
481
|
+
f" {self.left_node} and"
|
|
482
|
+
f" {self.right_node}, left_keys {left_keys},"
|
|
483
|
+
f" right_keys {right_keys},"
|
|
484
|
+
f" provided join concepts {match_concepts}"
|
|
485
|
+
)
|
|
486
|
+
self.concepts = final_concepts
|
|
487
|
+
|
|
488
|
+
@property
|
|
489
|
+
def unique_id(self) -> str:
|
|
490
|
+
nodes = sorted([self.left_node, self.right_node], key=lambda x: str(x))
|
|
491
|
+
return str(nodes) + self.join_type.value
|
|
492
|
+
|
|
493
|
+
def __str__(self):
|
|
494
|
+
return (
|
|
495
|
+
f"{self.join_type.value} JOIN {self.left_node} and"
|
|
496
|
+
f" {self.right_node} on"
|
|
497
|
+
f" {','.join([str(k) for k in self.concepts])}"
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
class WhereSafetyNode(StrategyNode):
|
|
502
|
+
"""Specialized node to be used to pad certain
|
|
503
|
+
select outputs that can't be immediately used in a where
|
|
504
|
+
clause; eg window functions. Will remove itself if not required."""
|
|
505
|
+
|
|
506
|
+
def resolve(self) -> QueryDatasource:
|
|
507
|
+
if not self.conditions and len(self.parents) == 1:
|
|
508
|
+
parent = self.parents[0]
|
|
509
|
+
parent = parent.copy()
|
|
510
|
+
# avoid performance hit by not rebuilding until end
|
|
511
|
+
parent.set_output_concepts(self.output_concepts, rebuild=False)
|
|
512
|
+
|
|
513
|
+
# these conditions
|
|
514
|
+
if self.preexisting_conditions:
|
|
515
|
+
parent.set_preexisting_conditions(self.preexisting_conditions)
|
|
516
|
+
# TODO: add a helper for this
|
|
517
|
+
parent.ordering = self.ordering
|
|
518
|
+
|
|
519
|
+
# actually build the node
|
|
520
|
+
parent.rebuild_cache()
|
|
521
|
+
return parent.resolve()
|
|
522
|
+
return super().resolve()
|