pytrilogy 0.0.1.102__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- pytrilogy-0.0.1.102.dist-info/LICENSE.md +19 -0
- pytrilogy-0.0.1.102.dist-info/METADATA +277 -0
- pytrilogy-0.0.1.102.dist-info/RECORD +77 -0
- pytrilogy-0.0.1.102.dist-info/WHEEL +5 -0
- pytrilogy-0.0.1.102.dist-info/entry_points.txt +2 -0
- pytrilogy-0.0.1.102.dist-info/top_level.txt +1 -0
- trilogy/__init__.py +8 -0
- trilogy/compiler.py +0 -0
- trilogy/constants.py +30 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +3 -0
- trilogy/core/enums.py +270 -0
- trilogy/core/env_processor.py +33 -0
- trilogy/core/environment_helpers.py +156 -0
- trilogy/core/ergonomics.py +187 -0
- trilogy/core/exceptions.py +23 -0
- trilogy/core/functions.py +320 -0
- trilogy/core/graph_models.py +55 -0
- trilogy/core/internal.py +37 -0
- trilogy/core/models.py +3145 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +603 -0
- trilogy/core/processing/graph_utils.py +44 -0
- trilogy/core/processing/node_generators/__init__.py +25 -0
- trilogy/core/processing/node_generators/basic_node.py +71 -0
- trilogy/core/processing/node_generators/common.py +239 -0
- trilogy/core/processing/node_generators/concept_merge.py +152 -0
- trilogy/core/processing/node_generators/filter_node.py +83 -0
- trilogy/core/processing/node_generators/group_node.py +92 -0
- trilogy/core/processing/node_generators/group_to_node.py +99 -0
- trilogy/core/processing/node_generators/merge_node.py +148 -0
- trilogy/core/processing/node_generators/multiselect_node.py +189 -0
- trilogy/core/processing/node_generators/rowset_node.py +130 -0
- trilogy/core/processing/node_generators/select_node.py +328 -0
- trilogy/core/processing/node_generators/unnest_node.py +37 -0
- trilogy/core/processing/node_generators/window_node.py +85 -0
- trilogy/core/processing/nodes/__init__.py +76 -0
- trilogy/core/processing/nodes/base_node.py +251 -0
- trilogy/core/processing/nodes/filter_node.py +49 -0
- trilogy/core/processing/nodes/group_node.py +110 -0
- trilogy/core/processing/nodes/merge_node.py +326 -0
- trilogy/core/processing/nodes/select_node_v2.py +198 -0
- trilogy/core/processing/nodes/unnest_node.py +54 -0
- trilogy/core/processing/nodes/window_node.py +34 -0
- trilogy/core/processing/utility.py +278 -0
- trilogy/core/query_processor.py +331 -0
- trilogy/dialect/__init__.py +0 -0
- trilogy/dialect/base.py +679 -0
- trilogy/dialect/bigquery.py +80 -0
- trilogy/dialect/common.py +43 -0
- trilogy/dialect/config.py +55 -0
- trilogy/dialect/duckdb.py +83 -0
- trilogy/dialect/enums.py +95 -0
- trilogy/dialect/postgres.py +86 -0
- trilogy/dialect/presto.py +82 -0
- trilogy/dialect/snowflake.py +82 -0
- trilogy/dialect/sql_server.py +89 -0
- trilogy/docs/__init__.py +0 -0
- trilogy/engine.py +48 -0
- trilogy/executor.py +242 -0
- trilogy/hooks/__init__.py +0 -0
- trilogy/hooks/base_hook.py +37 -0
- trilogy/hooks/graph_hook.py +24 -0
- trilogy/hooks/query_debugger.py +133 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +176 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +2 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +1951 -0
- trilogy/parsing/render.py +483 -0
- trilogy/py.typed +0 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/trilogy.py +127 -0
- trilogy/utility.py +31 -0
|
File without changes
|
|
@@ -0,0 +1,603 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from typing import List, Optional, Callable
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
from trilogy.constants import logger
|
|
6
|
+
from trilogy.core.enums import PurposeLineage, Granularity, FunctionType
|
|
7
|
+
from trilogy.core.env_processor import generate_graph
|
|
8
|
+
from trilogy.core.graph_models import ReferenceGraph
|
|
9
|
+
from trilogy.core.models import Concept, Environment, Function, Grain
|
|
10
|
+
from trilogy.core.processing.utility import (
|
|
11
|
+
get_disconnected_components,
|
|
12
|
+
)
|
|
13
|
+
from trilogy.utility import unique
|
|
14
|
+
from trilogy.core.processing.nodes import (
|
|
15
|
+
ConstantNode,
|
|
16
|
+
MergeNode,
|
|
17
|
+
GroupNode,
|
|
18
|
+
StrategyNode,
|
|
19
|
+
History,
|
|
20
|
+
)
|
|
21
|
+
from trilogy.core.processing.node_generators import (
|
|
22
|
+
gen_filter_node,
|
|
23
|
+
gen_window_node,
|
|
24
|
+
gen_group_node,
|
|
25
|
+
gen_basic_node,
|
|
26
|
+
gen_select_node,
|
|
27
|
+
gen_unnest_node,
|
|
28
|
+
gen_merge_node,
|
|
29
|
+
gen_group_to_node,
|
|
30
|
+
gen_rowset_node,
|
|
31
|
+
gen_multiselect_node,
|
|
32
|
+
gen_concept_merge_node,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
from enum import Enum
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ValidationResult(Enum):
|
|
39
|
+
COMPLETE = 1
|
|
40
|
+
DISCONNECTED = 2
|
|
41
|
+
INCOMPLETE = 3
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
LOGGER_PREFIX = "[CONCEPT DETAIL]"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_upstream_concepts(base: Concept, nested: bool = False) -> set[str]:
|
|
48
|
+
upstream = set()
|
|
49
|
+
if nested:
|
|
50
|
+
upstream.add(base.address)
|
|
51
|
+
if not base.lineage:
|
|
52
|
+
return upstream
|
|
53
|
+
for x in base.lineage.concept_arguments:
|
|
54
|
+
upstream = upstream.union(get_upstream_concepts(x, nested=True))
|
|
55
|
+
return upstream
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_priority_concept(
|
|
59
|
+
all_concepts: List[Concept],
|
|
60
|
+
attempted_addresses: set[str],
|
|
61
|
+
found_concepts: set[str],
|
|
62
|
+
depth: int,
|
|
63
|
+
) -> Concept:
|
|
64
|
+
# optimized search for missing concepts
|
|
65
|
+
pass_one = [
|
|
66
|
+
c
|
|
67
|
+
for c in all_concepts
|
|
68
|
+
if c.address not in attempted_addresses and c.address not in found_concepts
|
|
69
|
+
]
|
|
70
|
+
# sometimes we need to scan intermediate concepts to get merge keys, so fall back
|
|
71
|
+
# to exhaustive search
|
|
72
|
+
pass_two = [c for c in all_concepts if c.address not in attempted_addresses]
|
|
73
|
+
|
|
74
|
+
for remaining_concept in (pass_one, pass_two):
|
|
75
|
+
priority = (
|
|
76
|
+
# find anything that needs no joins first, so we can exit early
|
|
77
|
+
[
|
|
78
|
+
c
|
|
79
|
+
for c in remaining_concept
|
|
80
|
+
if c.derivation == PurposeLineage.CONSTANT
|
|
81
|
+
and c.granularity == Granularity.SINGLE_ROW
|
|
82
|
+
]
|
|
83
|
+
+
|
|
84
|
+
# anything that requires merging concept universes
|
|
85
|
+
[c for c in remaining_concept if c.derivation == PurposeLineage.MERGE]
|
|
86
|
+
+
|
|
87
|
+
# then multiselects to remove them from scope
|
|
88
|
+
[c for c in remaining_concept if c.derivation == PurposeLineage.MULTISELECT]
|
|
89
|
+
+
|
|
90
|
+
# then rowsets to remove them from scope, as they cannot get partials
|
|
91
|
+
[c for c in remaining_concept if c.derivation == PurposeLineage.ROWSET]
|
|
92
|
+
# we should be home-free here
|
|
93
|
+
+
|
|
94
|
+
# then aggregates to remove them from scope, as they cannot get partials
|
|
95
|
+
[
|
|
96
|
+
c
|
|
97
|
+
for c in remaining_concept
|
|
98
|
+
if c.derivation == PurposeLineage.AGGREGATE
|
|
99
|
+
and not c.granularity == Granularity.SINGLE_ROW
|
|
100
|
+
]
|
|
101
|
+
# then windows to remove them from scope, as they cannot get partials
|
|
102
|
+
+ [
|
|
103
|
+
c
|
|
104
|
+
for c in remaining_concept
|
|
105
|
+
if c.derivation == PurposeLineage.WINDOW
|
|
106
|
+
and not c.granularity == Granularity.SINGLE_ROW
|
|
107
|
+
]
|
|
108
|
+
# then filters to remove them from scope, also cannot get partials
|
|
109
|
+
+ [
|
|
110
|
+
c
|
|
111
|
+
for c in remaining_concept
|
|
112
|
+
if c.derivation == PurposeLineage.FILTER
|
|
113
|
+
and not c.granularity == Granularity.SINGLE_ROW
|
|
114
|
+
]
|
|
115
|
+
# unnests are weird?
|
|
116
|
+
+ [
|
|
117
|
+
c
|
|
118
|
+
for c in remaining_concept
|
|
119
|
+
if c.derivation == PurposeLineage.UNNEST
|
|
120
|
+
and not c.granularity == Granularity.SINGLE_ROW
|
|
121
|
+
]
|
|
122
|
+
+ [
|
|
123
|
+
c
|
|
124
|
+
for c in remaining_concept
|
|
125
|
+
if c.derivation == PurposeLineage.BASIC
|
|
126
|
+
and not c.granularity == Granularity.SINGLE_ROW
|
|
127
|
+
]
|
|
128
|
+
# finally our plain selects
|
|
129
|
+
+ [
|
|
130
|
+
c
|
|
131
|
+
for c in remaining_concept
|
|
132
|
+
if c.derivation == PurposeLineage.ROOT
|
|
133
|
+
and not c.granularity == Granularity.SINGLE_ROW
|
|
134
|
+
]
|
|
135
|
+
# and any non-single row constants
|
|
136
|
+
+ [
|
|
137
|
+
c
|
|
138
|
+
for c in remaining_concept
|
|
139
|
+
if c.derivation == PurposeLineage.CONSTANT
|
|
140
|
+
and not c.granularity == Granularity.SINGLE_ROW
|
|
141
|
+
]
|
|
142
|
+
# catch all
|
|
143
|
+
+ [
|
|
144
|
+
c
|
|
145
|
+
for c in remaining_concept
|
|
146
|
+
if c.derivation != PurposeLineage.CONSTANT
|
|
147
|
+
and c.granularity == Granularity.SINGLE_ROW
|
|
148
|
+
]
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
priority += [
|
|
152
|
+
c
|
|
153
|
+
for c in remaining_concept
|
|
154
|
+
if c.address not in [x.address for x in priority]
|
|
155
|
+
]
|
|
156
|
+
final = []
|
|
157
|
+
# if any thing is derived from another concept
|
|
158
|
+
# get the derived copy first
|
|
159
|
+
# as this will usually resolve cleaner
|
|
160
|
+
for x in priority:
|
|
161
|
+
if any([x.address in get_upstream_concepts(c) for c in priority]):
|
|
162
|
+
logger.info(
|
|
163
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} delaying fetch of {x.address} as parent of another concept"
|
|
164
|
+
)
|
|
165
|
+
continue
|
|
166
|
+
final.append(x)
|
|
167
|
+
# then append anything we didn't get
|
|
168
|
+
for x2 in priority:
|
|
169
|
+
if x2 not in final:
|
|
170
|
+
final.append(x2)
|
|
171
|
+
if final:
|
|
172
|
+
return final[0]
|
|
173
|
+
raise ValueError(
|
|
174
|
+
f"Cannot resolve query. No remaining priority concepts, have attempted {attempted_addresses}"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def generate_candidates_restrictive(
|
|
179
|
+
priority_concept: Concept, candidates: list[Concept], exhausted: set[str]
|
|
180
|
+
) -> List[List[Concept]]:
|
|
181
|
+
# if it's single row, joins are irrelevant. Fetch without keys.
|
|
182
|
+
if priority_concept.granularity == Granularity.SINGLE_ROW:
|
|
183
|
+
return [[]]
|
|
184
|
+
|
|
185
|
+
local_candidates = [
|
|
186
|
+
x
|
|
187
|
+
for x in list(candidates)
|
|
188
|
+
if x.address not in exhausted and x.granularity != Granularity.SINGLE_ROW
|
|
189
|
+
]
|
|
190
|
+
combos: list[list[Concept]] = []
|
|
191
|
+
# for simple operations these, fetch as much as possible.
|
|
192
|
+
if priority_concept.derivation in (PurposeLineage.BASIC, PurposeLineage.ROOT):
|
|
193
|
+
combos.append(local_candidates)
|
|
194
|
+
combos.append(Grain(components=[*local_candidates]).components_copy)
|
|
195
|
+
# append the empty set for sourcing concept by itself last
|
|
196
|
+
combos.append([])
|
|
197
|
+
return combos
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def generate_node(
|
|
201
|
+
concept: Concept,
|
|
202
|
+
local_optional: List[Concept],
|
|
203
|
+
environment: Environment,
|
|
204
|
+
g: ReferenceGraph,
|
|
205
|
+
depth: int,
|
|
206
|
+
source_concepts: Callable,
|
|
207
|
+
accept_partial: bool = False,
|
|
208
|
+
history: History | None = None,
|
|
209
|
+
) -> StrategyNode | None:
|
|
210
|
+
# first check in case there is a materialized_concept
|
|
211
|
+
candidate = gen_select_node(
|
|
212
|
+
concept,
|
|
213
|
+
local_optional,
|
|
214
|
+
environment,
|
|
215
|
+
g,
|
|
216
|
+
depth + 1,
|
|
217
|
+
fail_if_not_found=False,
|
|
218
|
+
accept_partial=accept_partial,
|
|
219
|
+
accept_partial_optional=False,
|
|
220
|
+
)
|
|
221
|
+
if candidate:
|
|
222
|
+
return candidate
|
|
223
|
+
|
|
224
|
+
if concept.derivation == PurposeLineage.WINDOW:
|
|
225
|
+
logger.info(
|
|
226
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating window node with optional {[x.address for x in local_optional]}"
|
|
227
|
+
)
|
|
228
|
+
return gen_window_node(
|
|
229
|
+
concept, local_optional, environment, g, depth + 1, source_concepts, history
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
elif concept.derivation == PurposeLineage.FILTER:
|
|
233
|
+
logger.info(
|
|
234
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating filter node with optional {[x.address for x in local_optional]}"
|
|
235
|
+
)
|
|
236
|
+
return gen_filter_node(
|
|
237
|
+
concept, local_optional, environment, g, depth + 1, source_concepts, history
|
|
238
|
+
)
|
|
239
|
+
elif concept.derivation == PurposeLineage.UNNEST:
|
|
240
|
+
logger.info(
|
|
241
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating unnest node with optional {[x.address for x in local_optional]}"
|
|
242
|
+
)
|
|
243
|
+
return gen_unnest_node(
|
|
244
|
+
concept, local_optional, environment, g, depth + 1, source_concepts, history
|
|
245
|
+
)
|
|
246
|
+
elif concept.derivation == PurposeLineage.AGGREGATE:
|
|
247
|
+
# don't push constants up before aggregation
|
|
248
|
+
# if not required
|
|
249
|
+
# to avoid constants multiplication changing default aggregation results
|
|
250
|
+
# ex sum(x) * 2 w/ no grain should return sum(x) * 2, not sum(x*2)
|
|
251
|
+
# these should always be sourceable independently
|
|
252
|
+
agg_optional = [
|
|
253
|
+
x for x in local_optional if x.granularity != Granularity.SINGLE_ROW
|
|
254
|
+
]
|
|
255
|
+
|
|
256
|
+
logger.info(
|
|
257
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating aggregate node with {[x.address for x in agg_optional]}"
|
|
258
|
+
)
|
|
259
|
+
return gen_group_node(
|
|
260
|
+
concept, agg_optional, environment, g, depth + 1, source_concepts, history
|
|
261
|
+
)
|
|
262
|
+
elif concept.derivation == PurposeLineage.ROWSET:
|
|
263
|
+
logger.info(
|
|
264
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating rowset node with optional {[x.address for x in local_optional]}"
|
|
265
|
+
)
|
|
266
|
+
return gen_rowset_node(
|
|
267
|
+
concept, local_optional, environment, g, depth + 1, source_concepts, history
|
|
268
|
+
)
|
|
269
|
+
elif concept.derivation == PurposeLineage.MULTISELECT:
|
|
270
|
+
logger.info(
|
|
271
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
|
|
272
|
+
)
|
|
273
|
+
return gen_multiselect_node(
|
|
274
|
+
concept, local_optional, environment, g, depth + 1, source_concepts, history
|
|
275
|
+
)
|
|
276
|
+
elif concept.derivation == PurposeLineage.MERGE:
|
|
277
|
+
logger.info(
|
|
278
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
|
|
279
|
+
)
|
|
280
|
+
return gen_concept_merge_node(
|
|
281
|
+
concept, local_optional, environment, g, depth + 1, source_concepts, history
|
|
282
|
+
)
|
|
283
|
+
elif concept.derivation == PurposeLineage.CONSTANT:
|
|
284
|
+
logger.info(
|
|
285
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
|
|
286
|
+
)
|
|
287
|
+
return ConstantNode(
|
|
288
|
+
input_concepts=[],
|
|
289
|
+
output_concepts=[concept],
|
|
290
|
+
environment=environment,
|
|
291
|
+
g=g,
|
|
292
|
+
parents=[],
|
|
293
|
+
depth=depth + 1,
|
|
294
|
+
)
|
|
295
|
+
elif concept.derivation == PurposeLineage.BASIC:
|
|
296
|
+
# this is special case handling for group bys
|
|
297
|
+
if (
|
|
298
|
+
isinstance(concept.lineage, Function)
|
|
299
|
+
and concept.lineage.operator == FunctionType.GROUP
|
|
300
|
+
):
|
|
301
|
+
logger.info(
|
|
302
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating group to grain node with {[x.address for x in local_optional]}"
|
|
303
|
+
)
|
|
304
|
+
return gen_group_to_node(
|
|
305
|
+
concept,
|
|
306
|
+
local_optional,
|
|
307
|
+
environment,
|
|
308
|
+
g,
|
|
309
|
+
depth + 1,
|
|
310
|
+
source_concepts,
|
|
311
|
+
history,
|
|
312
|
+
)
|
|
313
|
+
logger.info(
|
|
314
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating basic node with optional {[x.address for x in local_optional]}"
|
|
315
|
+
)
|
|
316
|
+
return gen_basic_node(
|
|
317
|
+
concept, local_optional, environment, g, depth + 1, source_concepts, history
|
|
318
|
+
)
|
|
319
|
+
elif concept.derivation == PurposeLineage.ROOT:
|
|
320
|
+
logger.info(
|
|
321
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating select node"
|
|
322
|
+
)
|
|
323
|
+
return gen_select_node(
|
|
324
|
+
concept,
|
|
325
|
+
local_optional,
|
|
326
|
+
environment,
|
|
327
|
+
g,
|
|
328
|
+
depth + 1,
|
|
329
|
+
fail_if_not_found=False,
|
|
330
|
+
accept_partial=accept_partial,
|
|
331
|
+
)
|
|
332
|
+
else:
|
|
333
|
+
raise ValueError(f"Unknown derivation {concept.derivation}")
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def validate_stack(
|
|
337
|
+
stack: List[StrategyNode], concepts: List[Concept], accept_partial: bool = False
|
|
338
|
+
) -> tuple[ValidationResult, set[str], set[str], set[str]]:
|
|
339
|
+
found_map = defaultdict(set)
|
|
340
|
+
found_addresses: set[str] = set()
|
|
341
|
+
non_partial_addresses: set[str] = set()
|
|
342
|
+
partial_addresses: set[str] = set()
|
|
343
|
+
for node in stack:
|
|
344
|
+
for concept in node.resolve().output_concepts:
|
|
345
|
+
found_map[str(node)].add(concept)
|
|
346
|
+
if concept not in node.partial_concepts:
|
|
347
|
+
found_addresses.add(concept.address)
|
|
348
|
+
non_partial_addresses.add(concept.address)
|
|
349
|
+
# remove it from our partial tracking
|
|
350
|
+
if concept.address in partial_addresses:
|
|
351
|
+
partial_addresses.remove(concept.address)
|
|
352
|
+
if concept in node.partial_concepts:
|
|
353
|
+
partial_addresses.add(concept.address)
|
|
354
|
+
if accept_partial:
|
|
355
|
+
found_addresses.add(concept.address)
|
|
356
|
+
found_map[str(node)].add(concept)
|
|
357
|
+
if not all([c.address in found_addresses for c in concepts]):
|
|
358
|
+
return (
|
|
359
|
+
ValidationResult.INCOMPLETE,
|
|
360
|
+
found_addresses,
|
|
361
|
+
{c.address for c in concepts if c.address not in found_addresses},
|
|
362
|
+
partial_addresses,
|
|
363
|
+
)
|
|
364
|
+
graph_count, graphs = get_disconnected_components(found_map)
|
|
365
|
+
if graph_count in (0, 1):
|
|
366
|
+
return ValidationResult.COMPLETE, found_addresses, set(), partial_addresses
|
|
367
|
+
# if we have too many subgraphs, we need to keep searching
|
|
368
|
+
return ValidationResult.DISCONNECTED, found_addresses, set(), partial_addresses
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def depth_to_prefix(depth: int) -> str:
|
|
372
|
+
return "\t" * depth
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def search_concepts(
|
|
376
|
+
mandatory_list: List[Concept],
|
|
377
|
+
environment: Environment,
|
|
378
|
+
depth: int,
|
|
379
|
+
g: ReferenceGraph,
|
|
380
|
+
accept_partial: bool = False,
|
|
381
|
+
history: History | None = None,
|
|
382
|
+
) -> StrategyNode | None:
|
|
383
|
+
|
|
384
|
+
history = history or History()
|
|
385
|
+
hist = history.get_history(mandatory_list, accept_partial)
|
|
386
|
+
if hist is not False:
|
|
387
|
+
logger.info(
|
|
388
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Returning search node from history"
|
|
389
|
+
)
|
|
390
|
+
assert not isinstance(hist, bool)
|
|
391
|
+
return hist
|
|
392
|
+
|
|
393
|
+
result = _search_concepts(
|
|
394
|
+
mandatory_list,
|
|
395
|
+
environment,
|
|
396
|
+
depth=depth,
|
|
397
|
+
g=g,
|
|
398
|
+
accept_partial=accept_partial,
|
|
399
|
+
history=history,
|
|
400
|
+
)
|
|
401
|
+
history.search_to_history(mandatory_list, accept_partial, result)
|
|
402
|
+
return result
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def _search_concepts(
|
|
406
|
+
mandatory_list: List[Concept],
|
|
407
|
+
environment: Environment,
|
|
408
|
+
depth: int,
|
|
409
|
+
g: ReferenceGraph,
|
|
410
|
+
history: History,
|
|
411
|
+
accept_partial: bool = False,
|
|
412
|
+
) -> StrategyNode | None:
|
|
413
|
+
|
|
414
|
+
mandatory_list = unique(mandatory_list, "address")
|
|
415
|
+
all_mandatory = set(c.address for c in mandatory_list)
|
|
416
|
+
attempted: set[str] = set()
|
|
417
|
+
|
|
418
|
+
found: set[str] = set()
|
|
419
|
+
skip: set[str] = set()
|
|
420
|
+
stack: List[StrategyNode] = []
|
|
421
|
+
complete = ValidationResult.INCOMPLETE
|
|
422
|
+
|
|
423
|
+
while attempted != all_mandatory:
|
|
424
|
+
priority_concept = get_priority_concept(
|
|
425
|
+
mandatory_list, attempted, found_concepts=found, depth=depth
|
|
426
|
+
)
|
|
427
|
+
logger.info(
|
|
428
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)}"
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
candidates = [
|
|
432
|
+
c for c in mandatory_list if c.address != priority_concept.address
|
|
433
|
+
]
|
|
434
|
+
candidate_lists = generate_candidates_restrictive(
|
|
435
|
+
priority_concept, candidates, skip
|
|
436
|
+
)
|
|
437
|
+
for list in candidate_lists:
|
|
438
|
+
logger.info(
|
|
439
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Beginning sourcing loop for {str(priority_concept)}, accept_partial {accept_partial} optional {[str(v) for v in list]}, exhausted {[str(c) for c in skip]}"
|
|
440
|
+
)
|
|
441
|
+
node = generate_node(
|
|
442
|
+
priority_concept,
|
|
443
|
+
list,
|
|
444
|
+
environment,
|
|
445
|
+
g,
|
|
446
|
+
depth + 1,
|
|
447
|
+
source_concepts=search_concepts,
|
|
448
|
+
accept_partial=accept_partial,
|
|
449
|
+
history=history,
|
|
450
|
+
)
|
|
451
|
+
if node:
|
|
452
|
+
stack.append(node)
|
|
453
|
+
node.resolve()
|
|
454
|
+
found.add(priority_concept.address)
|
|
455
|
+
# these concepts should not be attempted to be sourced again
|
|
456
|
+
# as fetching them requires operating on a subset of concepts
|
|
457
|
+
if priority_concept.derivation in [
|
|
458
|
+
PurposeLineage.AGGREGATE,
|
|
459
|
+
PurposeLineage.FILTER,
|
|
460
|
+
PurposeLineage.WINDOW,
|
|
461
|
+
PurposeLineage.UNNEST,
|
|
462
|
+
PurposeLineage.ROWSET,
|
|
463
|
+
PurposeLineage.BASIC,
|
|
464
|
+
PurposeLineage.MULTISELECT,
|
|
465
|
+
PurposeLineage.MERGE,
|
|
466
|
+
]:
|
|
467
|
+
skip.add(priority_concept.address)
|
|
468
|
+
break
|
|
469
|
+
attempted.add(priority_concept.address)
|
|
470
|
+
complete, found, missing, partial = validate_stack(
|
|
471
|
+
stack, mandatory_list, accept_partial
|
|
472
|
+
)
|
|
473
|
+
# early exit if we have a complete stack with one node
|
|
474
|
+
logger.info(
|
|
475
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} flag for accepting partial addresses is "
|
|
476
|
+
f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial}), attempted {attempted}"
|
|
477
|
+
)
|
|
478
|
+
# we can only early exit if we have a complete stack
|
|
479
|
+
# and we are not looking for more non-partial sources
|
|
480
|
+
if complete == ValidationResult.COMPLETE and (
|
|
481
|
+
not accept_partial or (accept_partial and not partial)
|
|
482
|
+
):
|
|
483
|
+
break
|
|
484
|
+
|
|
485
|
+
logger.info(
|
|
486
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}"
|
|
487
|
+
)
|
|
488
|
+
if complete == ValidationResult.COMPLETE:
|
|
489
|
+
all_partial = [
|
|
490
|
+
c
|
|
491
|
+
for c in mandatory_list
|
|
492
|
+
if all(
|
|
493
|
+
[
|
|
494
|
+
c.address in [x.address for x in p.partial_concepts]
|
|
495
|
+
for p in stack
|
|
496
|
+
if [c in p.output_concepts]
|
|
497
|
+
]
|
|
498
|
+
)
|
|
499
|
+
]
|
|
500
|
+
if len(stack) == 1:
|
|
501
|
+
logger.info(
|
|
502
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning just that node"
|
|
503
|
+
)
|
|
504
|
+
return stack[0]
|
|
505
|
+
|
|
506
|
+
output = MergeNode(
|
|
507
|
+
input_concepts=mandatory_list,
|
|
508
|
+
output_concepts=mandatory_list,
|
|
509
|
+
environment=environment,
|
|
510
|
+
g=g,
|
|
511
|
+
parents=stack,
|
|
512
|
+
depth=depth,
|
|
513
|
+
partial_concepts=all_partial,
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
# ensure we can resolve our final merge
|
|
517
|
+
output.resolve()
|
|
518
|
+
logger.info(
|
|
519
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Graph is connected, returning merge node, partial {[c.address for c in all_partial]}"
|
|
520
|
+
)
|
|
521
|
+
return output
|
|
522
|
+
|
|
523
|
+
# check that we're not already in a discovery loop
|
|
524
|
+
if not history.check_started(mandatory_list, accept_partial):
|
|
525
|
+
logger.info(
|
|
526
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Stack is not connected graph, flag for accepting partial addresses is {accept_partial}, checking for expanded concepts"
|
|
527
|
+
)
|
|
528
|
+
# gate against further recursion into this
|
|
529
|
+
history.log_start(mandatory_list, accept_partial)
|
|
530
|
+
expanded = gen_merge_node(
|
|
531
|
+
all_concepts=mandatory_list,
|
|
532
|
+
environment=environment,
|
|
533
|
+
g=g,
|
|
534
|
+
depth=depth,
|
|
535
|
+
source_concepts=search_concepts,
|
|
536
|
+
history=history,
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
if expanded:
|
|
540
|
+
expanded.resolve()
|
|
541
|
+
logger.info(
|
|
542
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in mandatory_list]} via concept addition;"
|
|
543
|
+
)
|
|
544
|
+
return expanded
|
|
545
|
+
# if we can't find it after expanding to a merge, then
|
|
546
|
+
# attempt to accept partials in join paths
|
|
547
|
+
|
|
548
|
+
if not accept_partial:
|
|
549
|
+
logger.info(
|
|
550
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Stack is not connected graph, flag for accepting partial addresses is {accept_partial}, changing flag"
|
|
551
|
+
)
|
|
552
|
+
partial_search = search_concepts(
|
|
553
|
+
mandatory_list=mandatory_list,
|
|
554
|
+
environment=environment,
|
|
555
|
+
depth=depth,
|
|
556
|
+
g=g,
|
|
557
|
+
accept_partial=True,
|
|
558
|
+
history=history,
|
|
559
|
+
)
|
|
560
|
+
if partial_search:
|
|
561
|
+
logger.info(
|
|
562
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found {[c.address for c in mandatory_list]} by accepting partials"
|
|
563
|
+
)
|
|
564
|
+
return partial_search
|
|
565
|
+
logger.error(
|
|
566
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found}"
|
|
567
|
+
)
|
|
568
|
+
return None
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def source_query_concepts(
|
|
572
|
+
output_concepts: List[Concept],
|
|
573
|
+
environment: Environment,
|
|
574
|
+
g: Optional[ReferenceGraph] = None,
|
|
575
|
+
):
|
|
576
|
+
if not g:
|
|
577
|
+
g = generate_graph(environment)
|
|
578
|
+
if not output_concepts:
|
|
579
|
+
raise ValueError(f"No output concepts provided {output_concepts}")
|
|
580
|
+
history = History()
|
|
581
|
+
root = search_concepts(
|
|
582
|
+
mandatory_list=output_concepts,
|
|
583
|
+
environment=environment,
|
|
584
|
+
g=g,
|
|
585
|
+
depth=0,
|
|
586
|
+
history=history,
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
if not root:
|
|
590
|
+
error_strings = [
|
|
591
|
+
f"{c.address}<{c.purpose}>{c.derivation}>" for c in output_concepts
|
|
592
|
+
]
|
|
593
|
+
raise ValueError(
|
|
594
|
+
f"Could not resolve conections between {error_strings} from environment graph."
|
|
595
|
+
)
|
|
596
|
+
return GroupNode(
|
|
597
|
+
output_concepts=output_concepts,
|
|
598
|
+
input_concepts=output_concepts,
|
|
599
|
+
environment=environment,
|
|
600
|
+
g=g,
|
|
601
|
+
parents=[root],
|
|
602
|
+
partial_concepts=root.partial_concepts,
|
|
603
|
+
)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
from trilogy.core.models import Concept
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from trilogy.utility import unique
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def extract_required_subgraphs(
|
|
8
|
+
assocs: defaultdict[str, list], path: List[str]
|
|
9
|
+
) -> defaultdict[str, list]:
|
|
10
|
+
|
|
11
|
+
ds = path[0]
|
|
12
|
+
current: list[str] = []
|
|
13
|
+
for idx, val in enumerate(path):
|
|
14
|
+
if val.startswith("ds~"):
|
|
15
|
+
|
|
16
|
+
if current:
|
|
17
|
+
assocs[ds] += current
|
|
18
|
+
current = [path[idx - 1]] if idx > 0 else []
|
|
19
|
+
ds = val
|
|
20
|
+
else:
|
|
21
|
+
current.append(val)
|
|
22
|
+
else:
|
|
23
|
+
if current:
|
|
24
|
+
assocs[ds] += current
|
|
25
|
+
|
|
26
|
+
return assocs
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def extract_mandatory_subgraphs(paths: Dict[str, List[str]], g) -> List[List[Concept]]:
|
|
30
|
+
final: list[list[str]] = []
|
|
31
|
+
assocs: defaultdict[str, list] = defaultdict(list)
|
|
32
|
+
for path in paths:
|
|
33
|
+
extract_required_subgraphs(assocs, paths[path])
|
|
34
|
+
|
|
35
|
+
for k, v in assocs.items():
|
|
36
|
+
final.append(v)
|
|
37
|
+
final_concepts = []
|
|
38
|
+
for value in final:
|
|
39
|
+
final_concepts.append(
|
|
40
|
+
unique(
|
|
41
|
+
[g.nodes[v]["concept"] for v in value if v.startswith("c~")], "address"
|
|
42
|
+
)
|
|
43
|
+
)
|
|
44
|
+
return final_concepts
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from .filter_node import gen_filter_node
|
|
2
|
+
from .window_node import gen_window_node
|
|
3
|
+
from .group_node import gen_group_node
|
|
4
|
+
from .group_to_node import gen_group_to_node
|
|
5
|
+
from .basic_node import gen_basic_node
|
|
6
|
+
from .select_node import gen_select_node
|
|
7
|
+
from .unnest_node import gen_unnest_node
|
|
8
|
+
from .merge_node import gen_merge_node
|
|
9
|
+
from .rowset_node import gen_rowset_node
|
|
10
|
+
from .multiselect_node import gen_multiselect_node
|
|
11
|
+
from .concept_merge import gen_concept_merge_node
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"gen_filter_node",
|
|
15
|
+
"gen_window_node",
|
|
16
|
+
"gen_group_node",
|
|
17
|
+
"gen_select_node",
|
|
18
|
+
"gen_basic_node",
|
|
19
|
+
"gen_unnest_node",
|
|
20
|
+
"gen_merge_node",
|
|
21
|
+
"gen_group_to_node",
|
|
22
|
+
"gen_rowset_node",
|
|
23
|
+
"gen_multiselect_node",
|
|
24
|
+
"gen_concept_merge_node",
|
|
25
|
+
]
|