pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
- pytrilogy-0.3.148.dist-info/METADATA +555 -0
- pytrilogy-0.3.148.dist-info/RECORD +206 -0
- pytrilogy-0.3.148.dist-info/WHEEL +5 -0
- pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +27 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +119 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +454 -0
- trilogy/core/env_processor.py +239 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1240 -0
- trilogy/core/graph_models.py +142 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2662 -0
- trilogy/core/models/build.py +2603 -0
- trilogy/core/models/build_environment.py +165 -0
- trilogy/core/models/core.py +506 -0
- trilogy/core/models/datasource.py +434 -0
- trilogy/core/models/environment.py +756 -0
- trilogy/core/models/execute.py +1213 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +270 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +207 -0
- trilogy/core/processing/node_generators/node_merge_node.py +695 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +786 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +522 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +604 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1431 -0
- trilogy/dialect/bigquery.py +314 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +159 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +376 -0
- trilogy/dialect/enums.py +149 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/__init__.py +17 -0
- trilogy/execution/config.py +119 -0
- trilogy/execution/state/__init__.py +0 -0
- trilogy/execution/state/file_state_store.py +0 -0
- trilogy/execution/state/sqllite_state_store.py +0 -0
- trilogy/execution/state/state_store.py +301 -0
- trilogy/executor.py +656 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +135 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2863 -0
- trilogy/parsing/render.py +773 -0
- trilogy/parsing/trilogy.lark +544 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +45 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +306 -0
- trilogy/scripts/common.py +430 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +311 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +555 -0
- trilogy/scripts/environment.py +59 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +472 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +748 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/refresh.py +106 -0
- trilogy/scripts/run.py +79 -0
- trilogy/scripts/serve.py +202 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +129 -0
- trilogy/scripts/trilogy.py +75 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from trilogy.core.models.build import (
|
|
6
|
+
BuildConcept,
|
|
7
|
+
BuildWhereClause,
|
|
8
|
+
)
|
|
9
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
10
|
+
from trilogy.core.processing.nodes import (
|
|
11
|
+
StrategyNode,
|
|
12
|
+
)
|
|
13
|
+
from trilogy.core.processing.utility import (
|
|
14
|
+
get_disconnected_components,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ValidationResult(Enum):
|
|
19
|
+
COMPLETE = 1
|
|
20
|
+
DISCONNECTED = 2
|
|
21
|
+
INCOMPLETE = 3
|
|
22
|
+
INCOMPLETE_CONDITION = 4
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def validate_concept(
|
|
26
|
+
concept: BuildConcept,
|
|
27
|
+
node: StrategyNode,
|
|
28
|
+
found_addresses: set[str],
|
|
29
|
+
non_partial_addresses: set[str],
|
|
30
|
+
partial_addresses: set[str],
|
|
31
|
+
virtual_addresses: set[str],
|
|
32
|
+
found_map: dict[str, set[BuildConcept]],
|
|
33
|
+
accept_partial: bool,
|
|
34
|
+
seen: set[str],
|
|
35
|
+
environment: BuildEnvironment,
|
|
36
|
+
):
|
|
37
|
+
# logger.debug(
|
|
38
|
+
# f"Validating concept {concept.address} with accept_partial={accept_partial}"
|
|
39
|
+
# )
|
|
40
|
+
found_map[str(node)].add(concept)
|
|
41
|
+
seen.add(concept.address)
|
|
42
|
+
if concept not in node.partial_concepts:
|
|
43
|
+
found_addresses.add(concept.address)
|
|
44
|
+
non_partial_addresses.add(concept.address)
|
|
45
|
+
# remove it from our partial tracking
|
|
46
|
+
if concept.address in partial_addresses:
|
|
47
|
+
partial_addresses.remove(concept.address)
|
|
48
|
+
if concept.address in virtual_addresses:
|
|
49
|
+
virtual_addresses.remove(concept.address)
|
|
50
|
+
if concept in node.partial_concepts:
|
|
51
|
+
if concept.address in non_partial_addresses:
|
|
52
|
+
return None
|
|
53
|
+
partial_addresses.add(concept.address)
|
|
54
|
+
if accept_partial:
|
|
55
|
+
found_addresses.add(concept.address)
|
|
56
|
+
found_map[str(node)].add(concept)
|
|
57
|
+
for v_address in concept.pseudonyms:
|
|
58
|
+
if v_address in seen:
|
|
59
|
+
continue
|
|
60
|
+
if v_address in environment.alias_origin_lookup:
|
|
61
|
+
# logger.debug(
|
|
62
|
+
# f"Found alias origin for {v_address}: {environment.alias_origin_lookup[v_address]} mapped to {environment.concepts[v_address]}")
|
|
63
|
+
v = environment.alias_origin_lookup[v_address]
|
|
64
|
+
else:
|
|
65
|
+
v = environment.concepts[v_address]
|
|
66
|
+
|
|
67
|
+
if v.address in seen:
|
|
68
|
+
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
if v.address == concept.address:
|
|
72
|
+
|
|
73
|
+
continue
|
|
74
|
+
validate_concept(
|
|
75
|
+
v,
|
|
76
|
+
node,
|
|
77
|
+
found_addresses,
|
|
78
|
+
non_partial_addresses,
|
|
79
|
+
partial_addresses,
|
|
80
|
+
virtual_addresses,
|
|
81
|
+
found_map,
|
|
82
|
+
accept_partial,
|
|
83
|
+
seen=seen,
|
|
84
|
+
environment=environment,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def validate_stack(
|
|
89
|
+
environment: BuildEnvironment,
|
|
90
|
+
stack: List[StrategyNode],
|
|
91
|
+
concepts: List[BuildConcept],
|
|
92
|
+
mandatory_with_filter: List[BuildConcept],
|
|
93
|
+
conditions: BuildWhereClause | None = None,
|
|
94
|
+
accept_partial: bool = False,
|
|
95
|
+
) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
|
|
96
|
+
found_map: dict[str, set[BuildConcept]] = defaultdict(set)
|
|
97
|
+
found_addresses: set[str] = set()
|
|
98
|
+
non_partial_addresses: set[str] = set()
|
|
99
|
+
partial_addresses: set[str] = set()
|
|
100
|
+
virtual_addresses: set[str] = set()
|
|
101
|
+
seen: set[str] = set()
|
|
102
|
+
|
|
103
|
+
for node in stack:
|
|
104
|
+
resolved = node.resolve()
|
|
105
|
+
|
|
106
|
+
for concept in resolved.output_concepts:
|
|
107
|
+
if concept.address in resolved.hidden_concepts:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
validate_concept(
|
|
111
|
+
concept,
|
|
112
|
+
node,
|
|
113
|
+
found_addresses,
|
|
114
|
+
non_partial_addresses,
|
|
115
|
+
partial_addresses,
|
|
116
|
+
virtual_addresses,
|
|
117
|
+
found_map,
|
|
118
|
+
accept_partial,
|
|
119
|
+
seen,
|
|
120
|
+
environment,
|
|
121
|
+
)
|
|
122
|
+
for concept in node.virtual_output_concepts:
|
|
123
|
+
if concept.address in non_partial_addresses:
|
|
124
|
+
continue
|
|
125
|
+
found_addresses.add(concept.address)
|
|
126
|
+
virtual_addresses.add(concept.address)
|
|
127
|
+
if not conditions:
|
|
128
|
+
conditions_met = True
|
|
129
|
+
else:
|
|
130
|
+
conditions_met = all(
|
|
131
|
+
[node.preexisting_conditions == conditions.conditional for node in stack]
|
|
132
|
+
) or all([c.address in found_addresses for c in mandatory_with_filter])
|
|
133
|
+
# zip in those we know we found
|
|
134
|
+
if not all([c.address in found_addresses for c in concepts]) or not conditions_met:
|
|
135
|
+
if not all([c.address in found_addresses for c in concepts]):
|
|
136
|
+
return (
|
|
137
|
+
ValidationResult.INCOMPLETE,
|
|
138
|
+
found_addresses,
|
|
139
|
+
{c.address for c in concepts if c.address not in found_addresses},
|
|
140
|
+
partial_addresses,
|
|
141
|
+
virtual_addresses,
|
|
142
|
+
)
|
|
143
|
+
return (
|
|
144
|
+
ValidationResult.INCOMPLETE_CONDITION,
|
|
145
|
+
found_addresses,
|
|
146
|
+
{c.address for c in concepts if c.address not in mandatory_with_filter},
|
|
147
|
+
partial_addresses,
|
|
148
|
+
virtual_addresses,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
graph_count, _ = get_disconnected_components(found_map)
|
|
152
|
+
if graph_count in (0, 1):
|
|
153
|
+
return (
|
|
154
|
+
ValidationResult.COMPLETE,
|
|
155
|
+
found_addresses,
|
|
156
|
+
set(),
|
|
157
|
+
partial_addresses,
|
|
158
|
+
virtual_addresses,
|
|
159
|
+
)
|
|
160
|
+
# if we have too many subgraphs, we need to keep searching
|
|
161
|
+
return (
|
|
162
|
+
ValidationResult.DISCONNECTED,
|
|
163
|
+
found_addresses,
|
|
164
|
+
set(),
|
|
165
|
+
partial_addresses,
|
|
166
|
+
virtual_addresses,
|
|
167
|
+
)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from typing import Dict, List
|
|
3
|
+
|
|
4
|
+
from trilogy.core.models.author import Concept
|
|
5
|
+
from trilogy.utility import unique
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def extract_required_subgraphs(
|
|
9
|
+
assocs: defaultdict[str, list], path: List[str]
|
|
10
|
+
) -> defaultdict[str, list]:
|
|
11
|
+
ds = path[0]
|
|
12
|
+
current: list[str] = []
|
|
13
|
+
for idx, val in enumerate(path):
|
|
14
|
+
if val.startswith("ds~"):
|
|
15
|
+
if current:
|
|
16
|
+
assocs[ds] += current
|
|
17
|
+
current = [path[idx - 1]] if idx > 0 else []
|
|
18
|
+
ds = val
|
|
19
|
+
else:
|
|
20
|
+
current.append(val)
|
|
21
|
+
else:
|
|
22
|
+
if current:
|
|
23
|
+
assocs[ds] += current
|
|
24
|
+
|
|
25
|
+
return assocs
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def extract_mandatory_subgraphs(paths: Dict[str, List[str]], g) -> List[List[Concept]]:
|
|
29
|
+
final: list[list[str]] = []
|
|
30
|
+
assocs: defaultdict[str, list] = defaultdict(list)
|
|
31
|
+
for path in paths.values():
|
|
32
|
+
extract_required_subgraphs(assocs, path)
|
|
33
|
+
|
|
34
|
+
for _, v in assocs.items():
|
|
35
|
+
final.append(v)
|
|
36
|
+
final_concepts = []
|
|
37
|
+
for value in final:
|
|
38
|
+
final_concepts.append(
|
|
39
|
+
unique(
|
|
40
|
+
[g.nodes[v]["concept"] for v in value if v.startswith("c~")], "address"
|
|
41
|
+
)
|
|
42
|
+
)
|
|
43
|
+
return final_concepts
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
For any specialized node, unpack the specialized concept X and fetch the rest.
|
|
5
|
+
|
|
6
|
+
For unspecialized node, attempt to fetch concept X and the rest. If cannot, attempt to see
|
|
7
|
+
if all combinations of others + X can be found, and return a merge node with all of those.
|
|
8
|
+
|
|
9
|
+
If not all combinations can be found, return what can be found.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from .basic_node import gen_basic_node
|
|
2
|
+
from .constant_node import gen_constant_node
|
|
3
|
+
from .filter_node import gen_filter_node
|
|
4
|
+
from .group_node import gen_group_node
|
|
5
|
+
from .group_to_node import gen_group_to_node
|
|
6
|
+
from .multiselect_node import gen_multiselect_node
|
|
7
|
+
from .node_merge_node import gen_merge_node
|
|
8
|
+
from .recursive_node import gen_recursive_node
|
|
9
|
+
from .rowset_node import gen_rowset_node
|
|
10
|
+
from .select_node import gen_select_node
|
|
11
|
+
from .synonym_node import gen_synonym_node
|
|
12
|
+
from .union_node import gen_union_node
|
|
13
|
+
from .unnest_node import gen_unnest_node
|
|
14
|
+
from .window_node import gen_window_node
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"gen_filter_node",
|
|
18
|
+
"gen_window_node",
|
|
19
|
+
"gen_group_node",
|
|
20
|
+
"gen_select_node",
|
|
21
|
+
"gen_basic_node",
|
|
22
|
+
"gen_unnest_node",
|
|
23
|
+
"gen_union_node",
|
|
24
|
+
"gen_merge_node",
|
|
25
|
+
"gen_group_to_node",
|
|
26
|
+
"gen_rowset_node",
|
|
27
|
+
"gen_multiselect_node",
|
|
28
|
+
"gen_synonym_node",
|
|
29
|
+
"gen_recursive_node",
|
|
30
|
+
"gen_constant_node",
|
|
31
|
+
]
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from trilogy.constants import logger
|
|
4
|
+
from trilogy.core.enums import FunctionClass, FunctionType, SourceType
|
|
5
|
+
from trilogy.core.models.build import BuildConcept, BuildFunction, BuildWhereClause
|
|
6
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
7
|
+
from trilogy.core.processing.node_generators.common import (
|
|
8
|
+
resolve_function_parent_concepts,
|
|
9
|
+
)
|
|
10
|
+
from trilogy.core.processing.nodes import ConstantNode, History, StrategyNode
|
|
11
|
+
from trilogy.utility import unique
|
|
12
|
+
|
|
13
|
+
LOGGER_PREFIX = "[GEN_BASIC_NODE]"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def is_equivalent_basic_function_lineage(
|
|
17
|
+
x: BuildConcept,
|
|
18
|
+
y: BuildConcept,
|
|
19
|
+
):
|
|
20
|
+
if not isinstance(x.lineage, BuildFunction) or not isinstance(
|
|
21
|
+
y.lineage, BuildFunction
|
|
22
|
+
):
|
|
23
|
+
return False
|
|
24
|
+
if x.lineage.operator == y.lineage.operator == FunctionType.ATTR_ACCESS:
|
|
25
|
+
return x.lineage.concept_arguments == y.lineage.concept_arguments
|
|
26
|
+
if x.lineage.operator == y.lineage.operator:
|
|
27
|
+
return True
|
|
28
|
+
if (
|
|
29
|
+
y.lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
|
|
30
|
+
or y.lineage.operator in FunctionClass.ONE_TO_MANY.value
|
|
31
|
+
):
|
|
32
|
+
return False
|
|
33
|
+
return True
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def gen_basic_node(
|
|
37
|
+
concept: BuildConcept,
|
|
38
|
+
local_optional: List[BuildConcept],
|
|
39
|
+
environment: BuildEnvironment,
|
|
40
|
+
g,
|
|
41
|
+
depth: int,
|
|
42
|
+
source_concepts,
|
|
43
|
+
history: History | None = None,
|
|
44
|
+
conditions: BuildWhereClause | None = None,
|
|
45
|
+
):
|
|
46
|
+
depth_prefix = "\t" * depth
|
|
47
|
+
parent_concepts = resolve_function_parent_concepts(concept, environment=environment)
|
|
48
|
+
|
|
49
|
+
logger.info(
|
|
50
|
+
f"{depth_prefix}{LOGGER_PREFIX} basic node for {concept} with lineage {concept.lineage} has parents {[x for x in parent_concepts]}"
|
|
51
|
+
)
|
|
52
|
+
synonyms: list[BuildConcept] = []
|
|
53
|
+
ignored_optional: set[str] = set()
|
|
54
|
+
|
|
55
|
+
# when we are getting an attribute, if there is anything else
|
|
56
|
+
# that is an attribute of the same struct in local optional
|
|
57
|
+
# select that value for discovery as well
|
|
58
|
+
if (
|
|
59
|
+
isinstance(concept.lineage, BuildFunction)
|
|
60
|
+
and concept.lineage.operator == FunctionType.ATTR_ACCESS
|
|
61
|
+
):
|
|
62
|
+
logger.info(
|
|
63
|
+
f"{depth_prefix}{LOGGER_PREFIX} checking for synonyms for attribute access"
|
|
64
|
+
)
|
|
65
|
+
for x in local_optional:
|
|
66
|
+
found = False
|
|
67
|
+
for z in x.pseudonyms:
|
|
68
|
+
# gate to ensure we don't match to multiple synonyms
|
|
69
|
+
if found:
|
|
70
|
+
continue
|
|
71
|
+
if z in environment.concepts:
|
|
72
|
+
s_concept = environment.concepts[z]
|
|
73
|
+
else:
|
|
74
|
+
s_concept = environment.alias_origin_lookup[z]
|
|
75
|
+
if is_equivalent_basic_function_lineage(concept, s_concept):
|
|
76
|
+
found = True
|
|
77
|
+
synonyms.append(s_concept)
|
|
78
|
+
ignored_optional.add(x.address)
|
|
79
|
+
equivalent_optional = [
|
|
80
|
+
x
|
|
81
|
+
for x in local_optional
|
|
82
|
+
if is_equivalent_basic_function_lineage(concept, x)
|
|
83
|
+
and x.address != concept.address
|
|
84
|
+
] + synonyms
|
|
85
|
+
|
|
86
|
+
if equivalent_optional:
|
|
87
|
+
logger.info(
|
|
88
|
+
f"{depth_prefix}{LOGGER_PREFIX} basic node for {concept} has equivalent optional {[x.address for x in equivalent_optional]}"
|
|
89
|
+
)
|
|
90
|
+
for eo in equivalent_optional:
|
|
91
|
+
new_parents = resolve_function_parent_concepts(eo, environment=environment)
|
|
92
|
+
logger.info(
|
|
93
|
+
f"{depth_prefix}{LOGGER_PREFIX} equivalent optional {eo.address} has parents {[x.address for x in new_parents]}"
|
|
94
|
+
)
|
|
95
|
+
parent_concepts += new_parents
|
|
96
|
+
non_equivalent_optional = [
|
|
97
|
+
x
|
|
98
|
+
for x in local_optional
|
|
99
|
+
if x not in equivalent_optional
|
|
100
|
+
and not any(x.address in y.pseudonyms for y in equivalent_optional)
|
|
101
|
+
and x.address not in ignored_optional
|
|
102
|
+
]
|
|
103
|
+
logger.info(
|
|
104
|
+
f"{depth_prefix}{LOGGER_PREFIX} basic node for {concept} has non-equivalent optional {[x.address for x in non_equivalent_optional]}"
|
|
105
|
+
)
|
|
106
|
+
all_parents: list[BuildConcept] = unique(
|
|
107
|
+
parent_concepts + non_equivalent_optional, "address"
|
|
108
|
+
)
|
|
109
|
+
logger.info(
|
|
110
|
+
f"{depth_prefix}{LOGGER_PREFIX} Fetching parents {[x.address for x in all_parents]} with conditions {conditions}"
|
|
111
|
+
)
|
|
112
|
+
if all_parents:
|
|
113
|
+
parent_node: StrategyNode | None = source_concepts(
|
|
114
|
+
mandatory_list=all_parents,
|
|
115
|
+
environment=environment,
|
|
116
|
+
g=g,
|
|
117
|
+
depth=depth + 1,
|
|
118
|
+
history=history,
|
|
119
|
+
conditions=conditions,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if not parent_node:
|
|
123
|
+
logger.info(
|
|
124
|
+
f"{depth_prefix}{LOGGER_PREFIX} No basic node could be generated for {concept}"
|
|
125
|
+
)
|
|
126
|
+
return None
|
|
127
|
+
else:
|
|
128
|
+
return ConstantNode(
|
|
129
|
+
input_concepts=[],
|
|
130
|
+
output_concepts=[concept],
|
|
131
|
+
environment=environment,
|
|
132
|
+
depth=depth,
|
|
133
|
+
)
|
|
134
|
+
if parent_node.source_type != SourceType.CONSTANT:
|
|
135
|
+
parent_node.source_type = SourceType.BASIC
|
|
136
|
+
parent_node.add_output_concept(concept)
|
|
137
|
+
for x in equivalent_optional:
|
|
138
|
+
parent_node.add_output_concept(x)
|
|
139
|
+
|
|
140
|
+
logger.info(
|
|
141
|
+
f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept}: output {[x.address for x in parent_node.output_concepts]}"
|
|
142
|
+
)
|
|
143
|
+
# if it's a constant, don't prune outputs
|
|
144
|
+
if parent_node.source_type == SourceType.CONSTANT:
|
|
145
|
+
return parent_node
|
|
146
|
+
targets = [concept] + local_optional + equivalent_optional
|
|
147
|
+
targets = [
|
|
148
|
+
s
|
|
149
|
+
for s in parent_node.output_concepts
|
|
150
|
+
if any(s.address in y.pseudonyms for y in targets)
|
|
151
|
+
] + targets
|
|
152
|
+
hidden = [x for x in parent_node.output_concepts if x.address not in targets]
|
|
153
|
+
parent_node.hide_output_concepts(hidden)
|
|
154
|
+
parent_node.source_type = SourceType.BASIC
|
|
155
|
+
|
|
156
|
+
logger.info(
|
|
157
|
+
f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept}: input: {[x.address for x in parent_node.input_concepts]} output {[x.address for x in parent_node.output_concepts]} hidden {[x for x in parent_node.hidden_concepts]}"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return parent_node
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from typing import Callable, List, Tuple
|
|
3
|
+
|
|
4
|
+
from trilogy.core.enums import Derivation, Purpose
|
|
5
|
+
from trilogy.core.models.build import (
|
|
6
|
+
BuildAggregateWrapper,
|
|
7
|
+
BuildComparison,
|
|
8
|
+
BuildConcept,
|
|
9
|
+
BuildFilterItem,
|
|
10
|
+
BuildFunction,
|
|
11
|
+
BuildWhereClause,
|
|
12
|
+
LooseBuildConceptList,
|
|
13
|
+
)
|
|
14
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
15
|
+
from trilogy.core.processing.nodes import (
|
|
16
|
+
History,
|
|
17
|
+
NodeJoin,
|
|
18
|
+
)
|
|
19
|
+
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
20
|
+
from trilogy.core.processing.nodes.merge_node import MergeNode
|
|
21
|
+
from trilogy.utility import unique
|
|
22
|
+
|
|
23
|
+
AGGREGATE_TYPES = (BuildAggregateWrapper,)
|
|
24
|
+
FUNCTION_TYPES = (BuildFunction,)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def resolve_function_parent_concepts(
|
|
28
|
+
concept: BuildConcept, environment: BuildEnvironment
|
|
29
|
+
) -> List[BuildConcept]:
|
|
30
|
+
if not isinstance(
|
|
31
|
+
concept.lineage, (*FUNCTION_TYPES, *AGGREGATE_TYPES, BuildComparison)
|
|
32
|
+
):
|
|
33
|
+
raise ValueError(
|
|
34
|
+
f"Concept {concept} lineage is not function or aggregate, is {type(concept.lineage)}"
|
|
35
|
+
)
|
|
36
|
+
if concept.derivation == Derivation.AGGREGATE:
|
|
37
|
+
base: list[BuildConcept] = []
|
|
38
|
+
if not concept.grain.abstract:
|
|
39
|
+
base = concept.lineage.concept_arguments + [
|
|
40
|
+
environment.concepts[c] for c in concept.grain.components
|
|
41
|
+
]
|
|
42
|
+
# if the base concept being aggregated is a property with a key
|
|
43
|
+
# keep the key as a parent
|
|
44
|
+
else:
|
|
45
|
+
base = concept.lineage.concept_arguments
|
|
46
|
+
if isinstance(concept.lineage, AGGREGATE_TYPES):
|
|
47
|
+
# for aggregate wrapper, don't include the by
|
|
48
|
+
extra_property_grain = concept.lineage.function.concept_arguments
|
|
49
|
+
else:
|
|
50
|
+
extra_property_grain = concept.lineage.concept_arguments
|
|
51
|
+
for x in extra_property_grain:
|
|
52
|
+
if isinstance(x, BuildConcept) and x.purpose == Purpose.PROPERTY and x.keys:
|
|
53
|
+
base += [environment.concepts[c] for c in x.keys]
|
|
54
|
+
return unique(base, "address")
|
|
55
|
+
# TODO: handle basic lineage chains?
|
|
56
|
+
return unique(concept.lineage.concept_arguments, "address")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def resolve_condition_parent_concepts(
|
|
60
|
+
condition: BuildWhereClause,
|
|
61
|
+
) -> Tuple[List[BuildConcept], List[Tuple[BuildConcept, ...]]]:
|
|
62
|
+
base_existence = []
|
|
63
|
+
base_rows: list[BuildConcept] = []
|
|
64
|
+
base_rows += condition.row_arguments
|
|
65
|
+
for ctuple in condition.existence_arguments:
|
|
66
|
+
base_existence.append(ctuple)
|
|
67
|
+
return unique(base_rows, "address"), base_existence
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def resolve_filter_parent_concepts(
|
|
71
|
+
concept: BuildConcept,
|
|
72
|
+
environment: BuildEnvironment,
|
|
73
|
+
) -> Tuple[List[BuildConcept], List[Tuple[BuildConcept, ...]]]:
|
|
74
|
+
if not isinstance(concept.lineage, (BuildFilterItem,)):
|
|
75
|
+
raise ValueError(
|
|
76
|
+
f"Concept {concept} lineage is not filter item, is {type(concept.lineage)}"
|
|
77
|
+
)
|
|
78
|
+
direct_parent = concept.lineage.content
|
|
79
|
+
base_existence = []
|
|
80
|
+
base_rows = [direct_parent] if isinstance(direct_parent, BuildConcept) else []
|
|
81
|
+
condition_rows, condition_existence = resolve_condition_parent_concepts(
|
|
82
|
+
concept.lineage.where
|
|
83
|
+
)
|
|
84
|
+
base_rows += condition_rows
|
|
85
|
+
base_existence += condition_existence
|
|
86
|
+
# this is required so that
|
|
87
|
+
if (
|
|
88
|
+
isinstance(direct_parent, BuildConcept)
|
|
89
|
+
and direct_parent.purpose in (Purpose.PROPERTY, Purpose.METRIC)
|
|
90
|
+
and direct_parent.keys
|
|
91
|
+
):
|
|
92
|
+
base_rows += [environment.concepts[c] for c in direct_parent.keys]
|
|
93
|
+
|
|
94
|
+
if concept.lineage.where.existence_arguments:
|
|
95
|
+
return (
|
|
96
|
+
unique(base_rows, "address"),
|
|
97
|
+
base_existence,
|
|
98
|
+
)
|
|
99
|
+
return unique(base_rows, "address"), []
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def gen_property_enrichment_node(
|
|
103
|
+
base_node: StrategyNode,
|
|
104
|
+
extra_properties: list[BuildConcept],
|
|
105
|
+
history: History,
|
|
106
|
+
environment: BuildEnvironment,
|
|
107
|
+
g,
|
|
108
|
+
depth: int,
|
|
109
|
+
source_concepts,
|
|
110
|
+
log_lambda: Callable,
|
|
111
|
+
conditions: BuildWhereClause | None = None,
|
|
112
|
+
):
|
|
113
|
+
required_keys: dict[str, set[str]] = defaultdict(set)
|
|
114
|
+
for x in extra_properties:
|
|
115
|
+
if not x.keys:
|
|
116
|
+
raise SyntaxError(f"Property {x.address} missing keys in lookup")
|
|
117
|
+
keys = "-".join([y for y in x.keys])
|
|
118
|
+
required_keys[keys].add(x.address)
|
|
119
|
+
final_nodes = []
|
|
120
|
+
for _k, vs in required_keys.items():
|
|
121
|
+
log_lambda(f"Generating enrichment node for {_k} with {vs}")
|
|
122
|
+
ks = _k.split("-")
|
|
123
|
+
enrich_node: StrategyNode = source_concepts(
|
|
124
|
+
mandatory_list=[environment.concepts[k] for k in ks]
|
|
125
|
+
+ [environment.concepts[v] for v in vs],
|
|
126
|
+
environment=environment,
|
|
127
|
+
g=g,
|
|
128
|
+
depth=depth + 1,
|
|
129
|
+
history=history,
|
|
130
|
+
conditions=conditions,
|
|
131
|
+
)
|
|
132
|
+
if not enrich_node:
|
|
133
|
+
return None
|
|
134
|
+
final_nodes.append(enrich_node)
|
|
135
|
+
return MergeNode(
|
|
136
|
+
input_concepts=unique(
|
|
137
|
+
base_node.output_concepts
|
|
138
|
+
+ extra_properties
|
|
139
|
+
+ [
|
|
140
|
+
environment.concepts[v]
|
|
141
|
+
for k, values in required_keys.items()
|
|
142
|
+
for v in values
|
|
143
|
+
],
|
|
144
|
+
"address",
|
|
145
|
+
),
|
|
146
|
+
output_concepts=base_node.output_concepts + extra_properties,
|
|
147
|
+
environment=environment,
|
|
148
|
+
parents=[
|
|
149
|
+
base_node,
|
|
150
|
+
]
|
|
151
|
+
+ final_nodes,
|
|
152
|
+
preexisting_conditions=conditions.conditional if conditions else None,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def gen_enrichment_node(
|
|
157
|
+
base_node: StrategyNode,
|
|
158
|
+
join_keys: List[BuildConcept],
|
|
159
|
+
local_optional: list[BuildConcept],
|
|
160
|
+
environment: BuildEnvironment,
|
|
161
|
+
g,
|
|
162
|
+
depth: int,
|
|
163
|
+
source_concepts,
|
|
164
|
+
log_lambda,
|
|
165
|
+
history: History,
|
|
166
|
+
conditions: BuildWhereClause | None = None,
|
|
167
|
+
):
|
|
168
|
+
local_opts = LooseBuildConceptList(concepts=local_optional)
|
|
169
|
+
|
|
170
|
+
extra_required = [
|
|
171
|
+
x
|
|
172
|
+
for x in local_opts
|
|
173
|
+
if x not in base_node.output_lcl or x in base_node.partial_lcl
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
# property lookup optimization
|
|
177
|
+
# this helps create ergonomic merge nodes when evaluating a normalized star schema
|
|
178
|
+
# as we only want to lookup the missing properties based on the relevant keys
|
|
179
|
+
if all([x.purpose == Purpose.PROPERTY for x in extra_required]):
|
|
180
|
+
if all(
|
|
181
|
+
x.keys and all([key in base_node.output_lcl for key in x.keys])
|
|
182
|
+
for x in extra_required
|
|
183
|
+
):
|
|
184
|
+
log_lambda(
|
|
185
|
+
f"{str(type(base_node).__name__)} returning property optimized enrichment node for {extra_required[0].keys}"
|
|
186
|
+
)
|
|
187
|
+
return gen_property_enrichment_node(
|
|
188
|
+
base_node,
|
|
189
|
+
extra_required,
|
|
190
|
+
environment=environment,
|
|
191
|
+
g=g,
|
|
192
|
+
depth=depth,
|
|
193
|
+
source_concepts=source_concepts,
|
|
194
|
+
history=history,
|
|
195
|
+
conditions=conditions,
|
|
196
|
+
log_lambda=log_lambda,
|
|
197
|
+
)
|
|
198
|
+
log_lambda(
|
|
199
|
+
f"{str(type(base_node).__name__)} searching for join keys {LooseBuildConceptList(concepts=join_keys)} and extra required {local_opts}"
|
|
200
|
+
)
|
|
201
|
+
enrich_node: StrategyNode = source_concepts( # this fetches the parent + join keys
|
|
202
|
+
# to then connect to the rest of the query
|
|
203
|
+
mandatory_list=join_keys + extra_required,
|
|
204
|
+
environment=environment,
|
|
205
|
+
g=g,
|
|
206
|
+
depth=depth + 1,
|
|
207
|
+
history=history,
|
|
208
|
+
conditions=conditions,
|
|
209
|
+
)
|
|
210
|
+
if not enrich_node:
|
|
211
|
+
log_lambda(
|
|
212
|
+
f"{str(type(base_node).__name__)} enrichment node unresolvable, returning just group node"
|
|
213
|
+
)
|
|
214
|
+
return base_node
|
|
215
|
+
log_lambda(
|
|
216
|
+
f"{str(type(base_node).__name__)} returning merge node with group node + enrichment node"
|
|
217
|
+
)
|
|
218
|
+
non_hidden = [
|
|
219
|
+
x
|
|
220
|
+
for x in base_node.output_concepts
|
|
221
|
+
if x.address not in base_node.hidden_concepts
|
|
222
|
+
]
|
|
223
|
+
return MergeNode(
|
|
224
|
+
input_concepts=unique(join_keys + extra_required + non_hidden, "address"),
|
|
225
|
+
output_concepts=unique(join_keys + extra_required + non_hidden, "address"),
|
|
226
|
+
environment=environment,
|
|
227
|
+
parents=[enrich_node, base_node],
|
|
228
|
+
force_group=False,
|
|
229
|
+
preexisting_conditions=conditions.conditional if conditions else None,
|
|
230
|
+
depth=depth,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def resolve_join_order(joins: List[NodeJoin]) -> List[NodeJoin]:
|
|
235
|
+
if not joins:
|
|
236
|
+
return []
|
|
237
|
+
available_aliases: set[StrategyNode] = set()
|
|
238
|
+
final_joins_pre = [*joins]
|
|
239
|
+
final_joins = []
|
|
240
|
+
left = set()
|
|
241
|
+
right = set()
|
|
242
|
+
for join in joins:
|
|
243
|
+
left.add(join.left_node)
|
|
244
|
+
right.add(join.right_node)
|
|
245
|
+
|
|
246
|
+
potential_basis = left.difference(right)
|
|
247
|
+
base_candidates = [x for x in final_joins_pre if x.left_node in potential_basis]
|
|
248
|
+
if not base_candidates:
|
|
249
|
+
raise SyntaxError(
|
|
250
|
+
f"Unresolvable join dependencies, left requires {left} and right requires {right}"
|
|
251
|
+
)
|
|
252
|
+
base = base_candidates[0]
|
|
253
|
+
final_joins.append(base)
|
|
254
|
+
available_aliases.add(base.left_node)
|
|
255
|
+
available_aliases.add(base.right_node)
|
|
256
|
+
while final_joins_pre:
|
|
257
|
+
new_final_joins_pre: List[NodeJoin] = []
|
|
258
|
+
for join in final_joins_pre:
|
|
259
|
+
if join.left_node in available_aliases:
|
|
260
|
+
# we don't need to join twice
|
|
261
|
+
# so whatever join we found first, works
|
|
262
|
+
if join.right_node in available_aliases:
|
|
263
|
+
continue
|
|
264
|
+
final_joins.append(join)
|
|
265
|
+
available_aliases.add(join.left_node)
|
|
266
|
+
available_aliases.add(join.right_node)
|
|
267
|
+
else:
|
|
268
|
+
new_final_joins_pre.append(join)
|
|
269
|
+
final_joins_pre = new_final_joins_pre
|
|
270
|
+
return final_joins
|