pytrilogy 0.0.2.15__py3-none-any.whl → 0.0.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.15.dist-info → pytrilogy-0.0.2.18.dist-info}/METADATA +12 -8
- pytrilogy-0.0.2.18.dist-info/RECORD +83 -0
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -1
- trilogy/core/enums.py +1 -0
- trilogy/core/functions.py +11 -0
- trilogy/core/models.py +105 -59
- trilogy/core/optimization.py +15 -9
- trilogy/core/processing/concept_strategies_v3.py +372 -145
- trilogy/core/processing/node_generators/basic_node.py +27 -55
- trilogy/core/processing/node_generators/common.py +6 -7
- trilogy/core/processing/node_generators/filter_node.py +28 -31
- trilogy/core/processing/node_generators/group_node.py +14 -2
- trilogy/core/processing/node_generators/group_to_node.py +3 -1
- trilogy/core/processing/node_generators/multiselect_node.py +3 -0
- trilogy/core/processing/node_generators/node_merge_node.py +14 -9
- trilogy/core/processing/node_generators/rowset_node.py +12 -12
- trilogy/core/processing/node_generators/select_merge_node.py +302 -0
- trilogy/core/processing/node_generators/select_node.py +7 -511
- trilogy/core/processing/node_generators/unnest_node.py +4 -3
- trilogy/core/processing/node_generators/window_node.py +12 -37
- trilogy/core/processing/nodes/__init__.py +0 -2
- trilogy/core/processing/nodes/base_node.py +69 -20
- trilogy/core/processing/nodes/filter_node.py +3 -0
- trilogy/core/processing/nodes/group_node.py +18 -17
- trilogy/core/processing/nodes/merge_node.py +4 -10
- trilogy/core/processing/nodes/select_node_v2.py +28 -14
- trilogy/core/processing/nodes/window_node.py +1 -2
- trilogy/core/processing/utility.py +51 -3
- trilogy/core/query_processor.py +17 -73
- trilogy/dialect/base.py +8 -3
- trilogy/dialect/common.py +65 -10
- trilogy/dialect/duckdb.py +4 -1
- trilogy/dialect/sql_server.py +3 -3
- trilogy/executor.py +5 -0
- trilogy/hooks/query_debugger.py +5 -3
- trilogy/parsing/parse_engine.py +67 -39
- trilogy/parsing/render.py +2 -0
- trilogy/parsing/trilogy.lark +6 -3
- pytrilogy-0.0.2.15.dist-info/RECORD +0 -82
- {pytrilogy-0.0.2.15.dist-info → pytrilogy-0.0.2.18.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.15.dist-info → pytrilogy-0.0.2.18.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.2.15.dist-info → pytrilogy-0.0.2.18.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.15.dist-info → pytrilogy-0.0.2.18.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
|
-
from typing import List, Optional,
|
|
2
|
+
from typing import List, Optional, Protocol, Union
|
|
3
3
|
|
|
4
4
|
from trilogy.constants import logger
|
|
5
5
|
from trilogy.core.enums import PurposeLineage, Granularity, FunctionType
|
|
6
6
|
from trilogy.core.env_processor import generate_graph
|
|
7
7
|
from trilogy.core.graph_models import ReferenceGraph
|
|
8
|
-
from trilogy.core.models import
|
|
8
|
+
from trilogy.core.models import (
|
|
9
|
+
Concept,
|
|
10
|
+
Environment,
|
|
11
|
+
Function,
|
|
12
|
+
WhereClause,
|
|
13
|
+
RowsetItem,
|
|
14
|
+
)
|
|
9
15
|
from trilogy.core.processing.utility import (
|
|
10
16
|
get_disconnected_components,
|
|
11
17
|
)
|
|
@@ -36,11 +42,25 @@ class ValidationResult(Enum):
|
|
|
36
42
|
COMPLETE = 1
|
|
37
43
|
DISCONNECTED = 2
|
|
38
44
|
INCOMPLETE = 3
|
|
45
|
+
INCOMPLETE_CONDITION = 4
|
|
39
46
|
|
|
40
47
|
|
|
41
48
|
LOGGER_PREFIX = "[CONCEPT DETAIL]"
|
|
42
49
|
|
|
43
50
|
|
|
51
|
+
class SearchConceptsType(Protocol):
|
|
52
|
+
def __call__(
|
|
53
|
+
self,
|
|
54
|
+
mandatory_list: List[Concept],
|
|
55
|
+
environment: Environment,
|
|
56
|
+
depth: int,
|
|
57
|
+
g: ReferenceGraph,
|
|
58
|
+
accept_partial: bool = False,
|
|
59
|
+
history: Optional[History] = None,
|
|
60
|
+
conditions: Optional[WhereClause] = None,
|
|
61
|
+
) -> Union[StrategyNode, None]: ...
|
|
62
|
+
|
|
63
|
+
|
|
44
64
|
def get_upstream_concepts(base: Concept, nested: bool = False) -> set[str]:
|
|
45
65
|
upstream = set()
|
|
46
66
|
if nested:
|
|
@@ -48,6 +68,11 @@ def get_upstream_concepts(base: Concept, nested: bool = False) -> set[str]:
|
|
|
48
68
|
if not base.lineage:
|
|
49
69
|
return upstream
|
|
50
70
|
for x in base.lineage.concept_arguments:
|
|
71
|
+
# if it's derived from any value in a rowset, ALL rowset items are upstream
|
|
72
|
+
if x.derivation == PurposeLineage.ROWSET:
|
|
73
|
+
assert isinstance(x.lineage, RowsetItem)
|
|
74
|
+
for y in x.lineage.rowset.derived_concepts:
|
|
75
|
+
upstream = upstream.union(get_upstream_concepts(y, nested=True))
|
|
51
76
|
upstream = upstream.union(get_upstream_concepts(x, nested=True))
|
|
52
77
|
return upstream
|
|
53
78
|
|
|
@@ -64,10 +89,10 @@ def get_priority_concept(
|
|
|
64
89
|
for c in all_concepts
|
|
65
90
|
if c.address not in attempted_addresses and c.address not in found_concepts
|
|
66
91
|
]
|
|
67
|
-
# sometimes we need to scan intermediate concepts to get merge keys
|
|
68
|
-
#
|
|
69
|
-
pass_two = [c for c in all_concepts if c.address not in attempted_addresses]
|
|
70
|
-
for remaining_concept in (pass_one,
|
|
92
|
+
# sometimes we need to scan intermediate concepts to get merge keys or filter keys,
|
|
93
|
+
# so do an exhaustive search
|
|
94
|
+
# pass_two = [c for c in all_concepts+filter_only if c.address not in attempted_addresses]
|
|
95
|
+
for remaining_concept in (pass_one,):
|
|
71
96
|
priority = (
|
|
72
97
|
# find anything that needs no joins first, so we can exit early
|
|
73
98
|
[
|
|
@@ -85,60 +110,19 @@ def get_priority_concept(
|
|
|
85
110
|
# we should be home-free here
|
|
86
111
|
+
|
|
87
112
|
# then aggregates to remove them from scope, as they cannot get partials
|
|
88
|
-
[
|
|
89
|
-
c
|
|
90
|
-
for c in remaining_concept
|
|
91
|
-
if c.derivation == PurposeLineage.AGGREGATE
|
|
92
|
-
and not c.granularity == Granularity.SINGLE_ROW
|
|
93
|
-
]
|
|
113
|
+
[c for c in remaining_concept if c.derivation == PurposeLineage.AGGREGATE]
|
|
94
114
|
# then windows to remove them from scope, as they cannot get partials
|
|
95
|
-
+ [
|
|
96
|
-
c
|
|
97
|
-
for c in remaining_concept
|
|
98
|
-
if c.derivation == PurposeLineage.WINDOW
|
|
99
|
-
and not c.granularity == Granularity.SINGLE_ROW
|
|
100
|
-
]
|
|
115
|
+
+ [c for c in remaining_concept if c.derivation == PurposeLineage.WINDOW]
|
|
101
116
|
# then filters to remove them from scope, also cannot get partials
|
|
102
|
-
+ [
|
|
103
|
-
c
|
|
104
|
-
for c in remaining_concept
|
|
105
|
-
if c.derivation == PurposeLineage.FILTER
|
|
106
|
-
and not c.granularity == Granularity.SINGLE_ROW
|
|
107
|
-
]
|
|
117
|
+
+ [c for c in remaining_concept if c.derivation == PurposeLineage.FILTER]
|
|
108
118
|
# unnests are weird?
|
|
109
|
-
+ [
|
|
110
|
-
|
|
111
|
-
for c in remaining_concept
|
|
112
|
-
if c.derivation == PurposeLineage.UNNEST
|
|
113
|
-
and not c.granularity == Granularity.SINGLE_ROW
|
|
114
|
-
]
|
|
115
|
-
+ [
|
|
116
|
-
c
|
|
117
|
-
for c in remaining_concept
|
|
118
|
-
if c.derivation == PurposeLineage.BASIC
|
|
119
|
-
and not c.granularity == Granularity.SINGLE_ROW
|
|
120
|
-
]
|
|
119
|
+
+ [c for c in remaining_concept if c.derivation == PurposeLineage.UNNEST]
|
|
120
|
+
+ [c for c in remaining_concept if c.derivation == PurposeLineage.BASIC]
|
|
121
121
|
# finally our plain selects
|
|
122
122
|
+ [
|
|
123
|
-
c
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
and not c.granularity == Granularity.SINGLE_ROW
|
|
127
|
-
]
|
|
128
|
-
# and any non-single row constants
|
|
129
|
-
+ [
|
|
130
|
-
c
|
|
131
|
-
for c in remaining_concept
|
|
132
|
-
if c.derivation == PurposeLineage.CONSTANT
|
|
133
|
-
and not c.granularity == Granularity.SINGLE_ROW
|
|
134
|
-
]
|
|
135
|
-
# catch all
|
|
136
|
-
+ [
|
|
137
|
-
c
|
|
138
|
-
for c in remaining_concept
|
|
139
|
-
if c.derivation != PurposeLineage.CONSTANT
|
|
140
|
-
and c.granularity == Granularity.SINGLE_ROW
|
|
141
|
-
]
|
|
123
|
+
c for c in remaining_concept if c.derivation == PurposeLineage.ROOT
|
|
124
|
+
] # and any non-single row constants
|
|
125
|
+
+ [c for c in remaining_concept if c.derivation == PurposeLineage.CONSTANT]
|
|
142
126
|
)
|
|
143
127
|
|
|
144
128
|
priority += [
|
|
@@ -172,6 +156,7 @@ def generate_candidates_restrictive(
|
|
|
172
156
|
priority_concept: Concept,
|
|
173
157
|
candidates: list[Concept],
|
|
174
158
|
exhausted: set[str],
|
|
159
|
+
conditions: WhereClause | None = None,
|
|
175
160
|
) -> List[List[Concept]]:
|
|
176
161
|
# if it's single row, joins are irrelevant. Fetch without keys.
|
|
177
162
|
if priority_concept.granularity == Granularity.SINGLE_ROW:
|
|
@@ -185,19 +170,12 @@ def generate_candidates_restrictive(
|
|
|
185
170
|
and x.address not in priority_concept.pseudonyms
|
|
186
171
|
and priority_concept.address not in x.pseudonyms
|
|
187
172
|
]
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
):
|
|
195
|
-
combos.append(local_candidates)
|
|
196
|
-
combos.append(grain_check)
|
|
197
|
-
# combos.append(local_candidates)
|
|
198
|
-
# append the empty set for sourcing concept by itself last
|
|
199
|
-
combos.append([])
|
|
200
|
-
return combos
|
|
173
|
+
if conditions and priority_concept.derivation in (
|
|
174
|
+
PurposeLineage.ROOT,
|
|
175
|
+
PurposeLineage.CONSTANT,
|
|
176
|
+
):
|
|
177
|
+
return [unique(conditions.row_arguments + local_candidates, "address")]
|
|
178
|
+
return [local_candidates]
|
|
201
179
|
|
|
202
180
|
|
|
203
181
|
def generate_node(
|
|
@@ -206,7 +184,7 @@ def generate_node(
|
|
|
206
184
|
environment: Environment,
|
|
207
185
|
g: ReferenceGraph,
|
|
208
186
|
depth: int,
|
|
209
|
-
source_concepts:
|
|
187
|
+
source_concepts: SearchConceptsType,
|
|
210
188
|
accept_partial: bool = False,
|
|
211
189
|
history: History | None = None,
|
|
212
190
|
conditions: WhereClause | None = None,
|
|
@@ -260,7 +238,7 @@ def generate_node(
|
|
|
260
238
|
)
|
|
261
239
|
elif concept.derivation == PurposeLineage.UNNEST:
|
|
262
240
|
logger.info(
|
|
263
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating unnest node with optional {[x.address for x in local_optional]}"
|
|
241
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating unnest node with optional {[x.address for x in local_optional]} and condition {conditions}"
|
|
264
242
|
)
|
|
265
243
|
return gen_unnest_node(
|
|
266
244
|
concept,
|
|
@@ -300,22 +278,68 @@ def generate_node(
|
|
|
300
278
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating rowset node with optional {[x.address for x in local_optional]}"
|
|
301
279
|
)
|
|
302
280
|
return gen_rowset_node(
|
|
303
|
-
concept,
|
|
281
|
+
concept,
|
|
282
|
+
local_optional,
|
|
283
|
+
environment,
|
|
284
|
+
g,
|
|
285
|
+
depth + 1,
|
|
286
|
+
source_concepts,
|
|
287
|
+
history,
|
|
288
|
+
conditions=conditions,
|
|
304
289
|
)
|
|
305
290
|
elif concept.derivation == PurposeLineage.MULTISELECT:
|
|
306
291
|
logger.info(
|
|
307
292
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
|
|
308
293
|
)
|
|
309
294
|
return gen_multiselect_node(
|
|
310
|
-
concept,
|
|
295
|
+
concept,
|
|
296
|
+
local_optional,
|
|
297
|
+
environment,
|
|
298
|
+
g,
|
|
299
|
+
depth + 1,
|
|
300
|
+
source_concepts,
|
|
301
|
+
history,
|
|
302
|
+
conditions=conditions,
|
|
311
303
|
)
|
|
312
304
|
elif concept.derivation == PurposeLineage.CONSTANT:
|
|
305
|
+
constant_targets = [concept] + local_optional
|
|
313
306
|
logger.info(
|
|
314
307
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
|
|
315
308
|
)
|
|
309
|
+
if any([x.derivation != PurposeLineage.CONSTANT for x in local_optional]):
|
|
310
|
+
non_root = [
|
|
311
|
+
x.address
|
|
312
|
+
for x in local_optional
|
|
313
|
+
if x.derivation != PurposeLineage.CONSTANT
|
|
314
|
+
]
|
|
315
|
+
logger.info(
|
|
316
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including filter concepts, there is non root filter requirements {non_root}. Recursing with all of these as mandatory"
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
if not history.check_started(
|
|
320
|
+
constant_targets, accept_partial=accept_partial, conditions=conditions
|
|
321
|
+
):
|
|
322
|
+
history.log_start(
|
|
323
|
+
constant_targets,
|
|
324
|
+
accept_partial=accept_partial,
|
|
325
|
+
conditions=conditions,
|
|
326
|
+
)
|
|
327
|
+
return source_concepts(
|
|
328
|
+
mandatory_list=constant_targets,
|
|
329
|
+
environment=environment,
|
|
330
|
+
g=g,
|
|
331
|
+
depth=depth + 1,
|
|
332
|
+
accept_partial=accept_partial,
|
|
333
|
+
history=history,
|
|
334
|
+
# we DO NOT pass up conditions at this point, as we are now expanding to include conditions in search
|
|
335
|
+
# which we do whenever we hit a root node
|
|
336
|
+
# conditions=conditions,
|
|
337
|
+
)
|
|
338
|
+
else:
|
|
339
|
+
return None
|
|
316
340
|
return ConstantNode(
|
|
317
341
|
input_concepts=[],
|
|
318
|
-
output_concepts=
|
|
342
|
+
output_concepts=constant_targets,
|
|
319
343
|
environment=environment,
|
|
320
344
|
g=g,
|
|
321
345
|
parents=[],
|
|
@@ -338,19 +362,64 @@ def generate_node(
|
|
|
338
362
|
depth + 1,
|
|
339
363
|
source_concepts,
|
|
340
364
|
history,
|
|
365
|
+
conditions=conditions,
|
|
341
366
|
)
|
|
342
367
|
logger.info(
|
|
343
368
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating basic node with optional {[x.address for x in local_optional]}"
|
|
344
369
|
)
|
|
345
370
|
return gen_basic_node(
|
|
346
|
-
concept,
|
|
371
|
+
concept,
|
|
372
|
+
local_optional,
|
|
373
|
+
environment,
|
|
374
|
+
g,
|
|
375
|
+
depth + 1,
|
|
376
|
+
source_concepts,
|
|
377
|
+
history,
|
|
378
|
+
conditions=conditions,
|
|
347
379
|
)
|
|
348
380
|
|
|
349
381
|
elif concept.derivation == PurposeLineage.ROOT:
|
|
350
382
|
logger.info(
|
|
351
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating select node with optional {[x.address for x in local_optional]}"
|
|
383
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating select node with optional including condition inputs {[x.address for x in local_optional]}"
|
|
352
384
|
)
|
|
353
|
-
|
|
385
|
+
# we've injected in any conditional concepts that may exist
|
|
386
|
+
# so if we don't still have just roots, we need to go up
|
|
387
|
+
root_targets = [concept] + local_optional
|
|
388
|
+
|
|
389
|
+
if any(
|
|
390
|
+
[
|
|
391
|
+
x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
|
|
392
|
+
for x in local_optional
|
|
393
|
+
]
|
|
394
|
+
):
|
|
395
|
+
non_root = [
|
|
396
|
+
x.address
|
|
397
|
+
for x in local_optional
|
|
398
|
+
if x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
|
|
399
|
+
]
|
|
400
|
+
logger.info(
|
|
401
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including filter concepts, there is non root filter requirements {non_root}. Recursing with all of these as mandatory"
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
if not history.check_started(
|
|
405
|
+
root_targets, accept_partial=accept_partial, conditions=conditions
|
|
406
|
+
):
|
|
407
|
+
history.log_start(
|
|
408
|
+
root_targets, accept_partial=accept_partial, conditions=conditions
|
|
409
|
+
)
|
|
410
|
+
return source_concepts(
|
|
411
|
+
mandatory_list=root_targets,
|
|
412
|
+
environment=environment,
|
|
413
|
+
g=g,
|
|
414
|
+
depth=depth + 1,
|
|
415
|
+
accept_partial=accept_partial,
|
|
416
|
+
history=history,
|
|
417
|
+
# we DO NOT pass up conditions at this point, as we are now expanding to include conditions in search
|
|
418
|
+
# which we do whenever we hit a root node
|
|
419
|
+
# conditions=conditions,
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
check = history.gen_select_node(
|
|
354
423
|
concept,
|
|
355
424
|
local_optional,
|
|
356
425
|
environment,
|
|
@@ -358,12 +427,48 @@ def generate_node(
|
|
|
358
427
|
depth + 1,
|
|
359
428
|
fail_if_not_found=False,
|
|
360
429
|
accept_partial=accept_partial,
|
|
361
|
-
accept_partial_optional=
|
|
430
|
+
accept_partial_optional=False,
|
|
362
431
|
source_concepts=source_concepts,
|
|
363
432
|
conditions=conditions,
|
|
364
433
|
)
|
|
434
|
+
if not check:
|
|
435
|
+
|
|
436
|
+
logger.info(
|
|
437
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve root concepts, checking for expanded concepts"
|
|
438
|
+
)
|
|
439
|
+
for accept_partial in [False, True]:
|
|
440
|
+
expanded = gen_merge_node(
|
|
441
|
+
all_concepts=root_targets,
|
|
442
|
+
environment=environment,
|
|
443
|
+
g=g,
|
|
444
|
+
depth=depth,
|
|
445
|
+
source_concepts=source_concepts,
|
|
446
|
+
history=history,
|
|
447
|
+
search_conditions=conditions,
|
|
448
|
+
accept_partial=accept_partial,
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
if expanded:
|
|
452
|
+
ex_resolve = expanded.resolve()
|
|
453
|
+
extra = [
|
|
454
|
+
x
|
|
455
|
+
for x in ex_resolve.output_concepts
|
|
456
|
+
if x.address not in [y.address for y in root_targets]
|
|
457
|
+
and x not in ex_resolve.grain.components
|
|
458
|
+
]
|
|
459
|
+
expanded.set_output_concepts(root_targets)
|
|
460
|
+
|
|
461
|
+
logger.info(
|
|
462
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in root_targets]} via concept addition; removing extra {[c.address for c in extra]}"
|
|
463
|
+
)
|
|
464
|
+
return expanded
|
|
465
|
+
logger.info(
|
|
466
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} could not find additional concept to inject"
|
|
467
|
+
)
|
|
468
|
+
return None
|
|
365
469
|
else:
|
|
366
470
|
raise ValueError(f"Unknown derivation {concept.derivation}")
|
|
471
|
+
return None
|
|
367
472
|
|
|
368
473
|
|
|
369
474
|
def validate_concept(
|
|
@@ -417,6 +522,8 @@ def validate_concept(
|
|
|
417
522
|
def validate_stack(
|
|
418
523
|
stack: List[StrategyNode],
|
|
419
524
|
concepts: List[Concept],
|
|
525
|
+
mandatory_with_filter: List[Concept],
|
|
526
|
+
conditions: WhereClause | None = None,
|
|
420
527
|
accept_partial: bool = False,
|
|
421
528
|
) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
|
|
422
529
|
found_map: dict[str, set[Concept]] = defaultdict(set)
|
|
@@ -425,6 +532,7 @@ def validate_stack(
|
|
|
425
532
|
partial_addresses: set[str] = set()
|
|
426
533
|
virtual_addresses: set[str] = set()
|
|
427
534
|
seen: set[str] = set()
|
|
535
|
+
|
|
428
536
|
for node in stack:
|
|
429
537
|
resolved = node.resolve()
|
|
430
538
|
|
|
@@ -445,13 +553,26 @@ def validate_stack(
|
|
|
445
553
|
continue
|
|
446
554
|
found_addresses.add(concept.address)
|
|
447
555
|
virtual_addresses.add(concept.address)
|
|
448
|
-
|
|
556
|
+
if not conditions:
|
|
557
|
+
conditions_met = True
|
|
558
|
+
else:
|
|
559
|
+
conditions_met = all(
|
|
560
|
+
[node.preexisting_conditions == conditions.conditional for node in stack]
|
|
561
|
+
) or all([c.address in found_addresses for c in mandatory_with_filter])
|
|
449
562
|
# zip in those we know we found
|
|
450
|
-
if not all([c.address in found_addresses for c in concepts]):
|
|
563
|
+
if not all([c.address in found_addresses for c in concepts]) or not conditions_met:
|
|
564
|
+
if not all([c.address in found_addresses for c in concepts]):
|
|
565
|
+
return (
|
|
566
|
+
ValidationResult.INCOMPLETE,
|
|
567
|
+
found_addresses,
|
|
568
|
+
{c.address for c in concepts if c.address not in found_addresses},
|
|
569
|
+
partial_addresses,
|
|
570
|
+
virtual_addresses,
|
|
571
|
+
)
|
|
451
572
|
return (
|
|
452
|
-
ValidationResult.
|
|
573
|
+
ValidationResult.INCOMPLETE_CONDITION,
|
|
453
574
|
found_addresses,
|
|
454
|
-
{c.address for c in concepts if c.address not in
|
|
575
|
+
{c.address for c in concepts if c.address not in mandatory_with_filter},
|
|
455
576
|
partial_addresses,
|
|
456
577
|
virtual_addresses,
|
|
457
578
|
)
|
|
@@ -479,6 +600,38 @@ def depth_to_prefix(depth: int) -> str:
|
|
|
479
600
|
return "\t" * depth
|
|
480
601
|
|
|
481
602
|
|
|
603
|
+
def append_existence_check(
|
|
604
|
+
node: StrategyNode,
|
|
605
|
+
environment: Environment,
|
|
606
|
+
graph: ReferenceGraph,
|
|
607
|
+
where: WhereClause,
|
|
608
|
+
history: History | None = None,
|
|
609
|
+
):
|
|
610
|
+
# we if we have a where clause doing an existence check
|
|
611
|
+
# treat that as separate subquery
|
|
612
|
+
if where.existence_arguments:
|
|
613
|
+
for subselect in where.existence_arguments:
|
|
614
|
+
if not subselect:
|
|
615
|
+
continue
|
|
616
|
+
if all([x.address in node.input_concepts for x in subselect]):
|
|
617
|
+
logger.info(
|
|
618
|
+
f"{LOGGER_PREFIX} existance clause inputs already found {[str(c) for c in subselect]}"
|
|
619
|
+
)
|
|
620
|
+
continue
|
|
621
|
+
logger.info(
|
|
622
|
+
f"{LOGGER_PREFIX} fetching existence clause inputs {[str(c) for c in subselect]}"
|
|
623
|
+
)
|
|
624
|
+
parent = source_query_concepts(
|
|
625
|
+
[*subselect], environment=environment, g=graph, history=history
|
|
626
|
+
)
|
|
627
|
+
assert parent, "Could not resolve existence clause"
|
|
628
|
+
node.add_parents([parent])
|
|
629
|
+
logger.info(
|
|
630
|
+
f"{LOGGER_PREFIX} fetching existence clause inputs {[str(c) for c in subselect]}"
|
|
631
|
+
)
|
|
632
|
+
node.add_existence_concepts([*subselect])
|
|
633
|
+
|
|
634
|
+
|
|
482
635
|
def search_concepts(
|
|
483
636
|
mandatory_list: List[Concept],
|
|
484
637
|
environment: Environment,
|
|
@@ -529,9 +682,32 @@ def _search_concepts(
|
|
|
529
682
|
conditions: WhereClause | None = None,
|
|
530
683
|
) -> StrategyNode | None:
|
|
531
684
|
|
|
685
|
+
# these are the concepts we need in the output projection
|
|
532
686
|
mandatory_list = unique(mandatory_list, "address")
|
|
533
687
|
|
|
534
688
|
all_mandatory = set(c.address for c in mandatory_list)
|
|
689
|
+
|
|
690
|
+
must_evaluate_condition_on_this_level_not_push_down = False
|
|
691
|
+
|
|
692
|
+
# if we have a filter, we may need to get more values to support that.
|
|
693
|
+
if conditions:
|
|
694
|
+
completion_mandatory = unique(
|
|
695
|
+
mandatory_list + conditions.row_arguments, "address"
|
|
696
|
+
)
|
|
697
|
+
# if anything we need to get is in the filter set and it's a computed value
|
|
698
|
+
# we need to get _everything_ in this loop
|
|
699
|
+
if any(
|
|
700
|
+
[
|
|
701
|
+
x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
|
|
702
|
+
and x.address in conditions.row_arguments
|
|
703
|
+
for x in mandatory_list
|
|
704
|
+
]
|
|
705
|
+
):
|
|
706
|
+
mandatory_list = completion_mandatory
|
|
707
|
+
must_evaluate_condition_on_this_level_not_push_down = True
|
|
708
|
+
else:
|
|
709
|
+
completion_mandatory = mandatory_list
|
|
710
|
+
|
|
535
711
|
attempted: set[str] = set()
|
|
536
712
|
|
|
537
713
|
found: set[str] = set()
|
|
@@ -542,22 +718,60 @@ def _search_concepts(
|
|
|
542
718
|
|
|
543
719
|
while attempted != all_mandatory:
|
|
544
720
|
priority_concept = get_priority_concept(
|
|
545
|
-
mandatory_list,
|
|
721
|
+
mandatory_list,
|
|
722
|
+
attempted,
|
|
723
|
+
found_concepts=found,
|
|
724
|
+
depth=depth,
|
|
546
725
|
)
|
|
726
|
+
# filter evaluation
|
|
727
|
+
# always pass the filter up when we aren't looking at all filter inputs
|
|
728
|
+
# or there are any non-filter complex types
|
|
729
|
+
if conditions:
|
|
730
|
+
should_evaluate_filter_on_this_level_not_push_down = all(
|
|
731
|
+
[x.address in mandatory_list for x in conditions.row_arguments]
|
|
732
|
+
) and not any(
|
|
733
|
+
[
|
|
734
|
+
x.derivation not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
|
|
735
|
+
for x in mandatory_list
|
|
736
|
+
if x.address not in conditions.row_arguments
|
|
737
|
+
]
|
|
738
|
+
)
|
|
739
|
+
else:
|
|
740
|
+
should_evaluate_filter_on_this_level_not_push_down = True
|
|
741
|
+
local_conditions = (
|
|
742
|
+
conditions
|
|
743
|
+
if conditions
|
|
744
|
+
and not must_evaluate_condition_on_this_level_not_push_down
|
|
745
|
+
and not should_evaluate_filter_on_this_level_not_push_down
|
|
746
|
+
else None
|
|
747
|
+
)
|
|
748
|
+
# but if it's not basic, and it's not condition;
|
|
749
|
+
# we do need to push it down (and have another layer of filter evaluation)
|
|
750
|
+
# to ensure filtering happens before something like a SUM
|
|
751
|
+
if (
|
|
752
|
+
conditions
|
|
753
|
+
and priority_concept.derivation
|
|
754
|
+
not in (PurposeLineage.ROOT, PurposeLineage.CONSTANT)
|
|
755
|
+
and priority_concept.address not in conditions.row_arguments
|
|
756
|
+
):
|
|
757
|
+
logger.info(
|
|
758
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Force including conditions to push filtering above complex condition that is not condition member or parent"
|
|
759
|
+
)
|
|
760
|
+
local_conditions = conditions
|
|
547
761
|
|
|
548
762
|
logger.info(
|
|
549
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)}"
|
|
763
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)} derivation {priority_concept.derivation} with conditions {local_conditions}"
|
|
550
764
|
)
|
|
551
765
|
|
|
552
766
|
candidates = [
|
|
553
767
|
c for c in mandatory_list if c.address != priority_concept.address
|
|
554
768
|
]
|
|
555
769
|
candidate_lists = generate_candidates_restrictive(
|
|
556
|
-
priority_concept, candidates, skip
|
|
770
|
+
priority_concept, candidates, skip, conditions=conditions
|
|
557
771
|
)
|
|
558
772
|
for clist in candidate_lists:
|
|
559
773
|
logger.info(
|
|
560
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Beginning sourcing loop for {
|
|
774
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Beginning sourcing loop for {priority_concept.address}, accept_partial {accept_partial}, optional {[v.address for v in clist]}, exhausted {[c for c in skip]}"
|
|
561
775
|
)
|
|
562
776
|
node = generate_node(
|
|
563
777
|
priority_concept,
|
|
@@ -568,11 +782,17 @@ def _search_concepts(
|
|
|
568
782
|
source_concepts=search_concepts,
|
|
569
783
|
accept_partial=accept_partial,
|
|
570
784
|
history=history,
|
|
571
|
-
conditions=
|
|
785
|
+
conditions=local_conditions,
|
|
572
786
|
)
|
|
573
787
|
if node:
|
|
574
788
|
stack.append(node)
|
|
575
|
-
|
|
789
|
+
try:
|
|
790
|
+
node.resolve()
|
|
791
|
+
except Exception as e:
|
|
792
|
+
logger.error(
|
|
793
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve node {node} {e}"
|
|
794
|
+
)
|
|
795
|
+
raise e
|
|
576
796
|
# these concepts should not be attempted to be sourced again
|
|
577
797
|
# as fetching them requires operating on a subset of concepts
|
|
578
798
|
if priority_concept.derivation in [
|
|
@@ -588,13 +808,21 @@ def _search_concepts(
|
|
|
588
808
|
break
|
|
589
809
|
attempted.add(priority_concept.address)
|
|
590
810
|
complete, found, missing, partial, virtual = validate_stack(
|
|
591
|
-
stack,
|
|
811
|
+
stack,
|
|
812
|
+
mandatory_list,
|
|
813
|
+
completion_mandatory,
|
|
814
|
+
conditions=conditions,
|
|
815
|
+
accept_partial=accept_partial,
|
|
592
816
|
)
|
|
593
|
-
|
|
817
|
+
mandatory_completion = [c.address for c in completion_mandatory]
|
|
594
818
|
logger.info(
|
|
595
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} flag for accepting partial addresses is"
|
|
596
|
-
f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial} virtual {virtual}), attempted {attempted}"
|
|
819
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} {priority_concept.derivation} condition {conditions} flag for accepting partial addresses is"
|
|
820
|
+
f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial} virtual {virtual}), attempted {attempted}, mandatory w/ filter {mandatory_completion}"
|
|
597
821
|
)
|
|
822
|
+
if complete == ValidationResult.INCOMPLETE_CONDITION:
|
|
823
|
+
raise SyntaxError(
|
|
824
|
+
{str(node): node.preexisting_conditions for node in stack}
|
|
825
|
+
)
|
|
598
826
|
# early exit if we have a complete stack with one node
|
|
599
827
|
# we can only early exit if we have a complete stack
|
|
600
828
|
# and we are not looking for more non-partial sources
|
|
@@ -602,73 +830,65 @@ def _search_concepts(
|
|
|
602
830
|
not accept_partial or (accept_partial and not partial)
|
|
603
831
|
):
|
|
604
832
|
break
|
|
833
|
+
# if we have attempted on root node, we've tried them all.
|
|
834
|
+
# inject in another search with filter concepts
|
|
835
|
+
if priority_concept.derivation == PurposeLineage.ROOT:
|
|
836
|
+
logger.info(
|
|
837
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Breaking as attempted root with no results"
|
|
838
|
+
)
|
|
839
|
+
break
|
|
605
840
|
|
|
606
841
|
logger.info(
|
|
607
842
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}, virtual {virtual}"
|
|
608
843
|
)
|
|
609
844
|
if complete == ValidationResult.COMPLETE:
|
|
610
|
-
|
|
845
|
+
condition_required = True
|
|
846
|
+
non_virtual = [c for c in completion_mandatory if c.address not in virtual]
|
|
847
|
+
if not conditions:
|
|
848
|
+
condition_required = False
|
|
849
|
+
non_virtual = [c for c in mandatory_list if c.address not in virtual]
|
|
850
|
+
|
|
851
|
+
elif all([x.preexisting_conditions == conditions.conditional for x in stack]):
|
|
852
|
+
condition_required = False
|
|
853
|
+
non_virtual = [c for c in mandatory_list if c.address not in virtual]
|
|
854
|
+
|
|
855
|
+
if conditions and not condition_required:
|
|
856
|
+
parent_map = {
|
|
857
|
+
str(x): x.preexisting_conditions == conditions.conditional
|
|
858
|
+
for x in stack
|
|
859
|
+
}
|
|
860
|
+
logger.info(
|
|
861
|
+
f"Condition {conditions} not required, parents included filtering! {parent_map }"
|
|
862
|
+
)
|
|
611
863
|
if len(stack) == 1:
|
|
612
|
-
output = stack[0]
|
|
864
|
+
output: StrategyNode = stack[0]
|
|
613
865
|
logger.info(
|
|
614
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}
|
|
866
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
|
|
867
|
+
)
|
|
868
|
+
else:
|
|
869
|
+
output = MergeNode(
|
|
870
|
+
input_concepts=non_virtual,
|
|
871
|
+
output_concepts=non_virtual,
|
|
872
|
+
environment=environment,
|
|
873
|
+
g=g,
|
|
874
|
+
parents=stack,
|
|
875
|
+
depth=depth,
|
|
615
876
|
)
|
|
616
|
-
return output
|
|
617
|
-
|
|
618
|
-
output = MergeNode(
|
|
619
|
-
input_concepts=non_virtual,
|
|
620
|
-
output_concepts=non_virtual,
|
|
621
|
-
environment=environment,
|
|
622
|
-
g=g,
|
|
623
|
-
parents=stack,
|
|
624
|
-
depth=depth,
|
|
625
|
-
)
|
|
626
|
-
|
|
627
877
|
# ensure we can resolve our final merge
|
|
628
878
|
output.resolve()
|
|
879
|
+
if condition_required and conditions:
|
|
880
|
+
output.add_condition(conditions.conditional)
|
|
881
|
+
if conditions.existence_arguments:
|
|
882
|
+
append_existence_check(
|
|
883
|
+
output, environment, g, where=conditions, history=history
|
|
884
|
+
)
|
|
629
885
|
logger.info(
|
|
630
886
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Graph is connected, returning merge node, partial {[c.address for c in output.partial_concepts]}"
|
|
631
887
|
)
|
|
632
888
|
return output
|
|
633
889
|
|
|
634
|
-
# check that we're not already in a discovery loop
|
|
635
|
-
if not history.check_started(
|
|
636
|
-
mandatory_list, accept_partial=accept_partial, conditions=conditions
|
|
637
|
-
):
|
|
638
|
-
logger.info(
|
|
639
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Stack is not connected graph, flag for accepting partial addresses is {accept_partial}, checking for expanded concepts"
|
|
640
|
-
)
|
|
641
|
-
# gate against further recursion into this
|
|
642
|
-
history.log_start(
|
|
643
|
-
mandatory_list, accept_partial=accept_partial, conditions=conditions
|
|
644
|
-
)
|
|
645
|
-
expanded = gen_merge_node(
|
|
646
|
-
all_concepts=mandatory_list,
|
|
647
|
-
environment=environment,
|
|
648
|
-
g=g,
|
|
649
|
-
depth=depth,
|
|
650
|
-
source_concepts=search_concepts,
|
|
651
|
-
history=history,
|
|
652
|
-
search_conditions=conditions,
|
|
653
|
-
)
|
|
654
|
-
|
|
655
|
-
if expanded:
|
|
656
|
-
# we don't need to return the entire list; just the ones we needed pre-expansion
|
|
657
|
-
ex_resolve = expanded.resolve()
|
|
658
|
-
extra = [
|
|
659
|
-
x
|
|
660
|
-
for x in ex_resolve.output_concepts
|
|
661
|
-
if x.address not in [y.address for y in mandatory_list]
|
|
662
|
-
and x not in ex_resolve.grain.components
|
|
663
|
-
]
|
|
664
|
-
expanded.set_output_concepts(mandatory_list)
|
|
665
|
-
|
|
666
|
-
logger.info(
|
|
667
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in mandatory_list]} via concept addition; removing extra {[c.address for c in extra]}"
|
|
668
|
-
)
|
|
669
|
-
return expanded
|
|
670
890
|
# if we can't find it after expanding to a merge, then
|
|
671
|
-
#
|
|
891
|
+
# accept partials in join paths
|
|
672
892
|
|
|
673
893
|
if not accept_partial:
|
|
674
894
|
logger.info(
|
|
@@ -723,11 +943,18 @@ def source_query_concepts(
|
|
|
723
943
|
raise ValueError(
|
|
724
944
|
f"Could not resolve conections between {error_strings} from environment graph."
|
|
725
945
|
)
|
|
726
|
-
|
|
727
|
-
output_concepts=
|
|
728
|
-
|
|
946
|
+
candidate = GroupNode(
|
|
947
|
+
output_concepts=[
|
|
948
|
+
x for x in root.output_concepts if x.address not in root.hidden_concepts
|
|
949
|
+
],
|
|
950
|
+
input_concepts=[
|
|
951
|
+
x for x in root.output_concepts if x.address not in root.hidden_concepts
|
|
952
|
+
],
|
|
729
953
|
environment=environment,
|
|
730
954
|
g=g,
|
|
731
955
|
parents=[root],
|
|
732
956
|
partial_concepts=root.partial_concepts,
|
|
733
957
|
)
|
|
958
|
+
if not candidate.resolve().group_required:
|
|
959
|
+
return root
|
|
960
|
+
return candidate
|