pytrilogy 0.0.3.55__py3-none-any.whl → 0.0.3.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.55.dist-info → pytrilogy-0.0.3.57.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.3.55.dist-info → pytrilogy-0.0.3.57.dist-info}/RECORD +39 -34
- {pytrilogy-0.0.3.55.dist-info → pytrilogy-0.0.3.57.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/authoring/__init__.py +12 -1
- trilogy/core/enums.py +1 -0
- trilogy/core/models/author.py +6 -4
- trilogy/core/models/execute.py +4 -1
- trilogy/core/optimization.py +4 -4
- trilogy/core/processing/concept_strategies_v3.py +324 -895
- trilogy/core/processing/discovery_loop.py +0 -0
- trilogy/core/processing/discovery_node_factory.py +475 -0
- trilogy/core/processing/discovery_utility.py +123 -0
- trilogy/core/processing/discovery_validation.py +155 -0
- trilogy/core/processing/node_generators/basic_node.py +29 -11
- trilogy/core/processing/node_generators/node_merge_node.py +1 -1
- trilogy/core/processing/node_generators/select_node.py +6 -8
- trilogy/core/processing/node_generators/synonym_node.py +2 -1
- trilogy/core/processing/node_generators/unnest_node.py +7 -1
- trilogy/core/processing/nodes/__init__.py +2 -4
- trilogy/core/processing/nodes/base_node.py +0 -13
- trilogy/core/processing/nodes/group_node.py +1 -1
- trilogy/core/processing/utility.py +38 -11
- trilogy/core/query_processor.py +3 -3
- trilogy/core/statements/author.py +6 -2
- trilogy/core/statements/execute.py +3 -2
- trilogy/dialect/base.py +3 -30
- trilogy/dialect/snowflake.py +1 -1
- trilogy/executor.py +13 -4
- trilogy/parsing/common.py +1 -3
- trilogy/parsing/parse_engine.py +14 -2
- trilogy/parsing/trilogy.lark +1 -1
- trilogy/std/date.preql +3 -1
- trilogy/std/geography.preql +4 -0
- trilogy/std/money.preql +65 -4
- trilogy/std/net.preql +8 -0
- {pytrilogy-0.0.3.55.dist-info → pytrilogy-0.0.3.57.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.55.dist-info → pytrilogy-0.0.3.57.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.55.dist-info → pytrilogy-0.0.3.57.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
from
|
|
2
|
-
from
|
|
3
|
-
from typing import List, Optional, Protocol, Union
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List, Optional
|
|
4
3
|
|
|
5
4
|
from trilogy.constants import logger
|
|
6
|
-
from trilogy.core.enums import Derivation,
|
|
5
|
+
from trilogy.core.enums import Derivation, Granularity
|
|
7
6
|
from trilogy.core.env_processor import generate_graph
|
|
8
7
|
from trilogy.core.exceptions import UnresolvableQueryException
|
|
9
8
|
from trilogy.core.graph_models import ReferenceGraph
|
|
@@ -12,187 +11,41 @@ from trilogy.core.models.author import (
|
|
|
12
11
|
)
|
|
13
12
|
from trilogy.core.models.build import (
|
|
14
13
|
BuildConcept,
|
|
15
|
-
BuildFunction,
|
|
16
|
-
BuildRowsetItem,
|
|
17
14
|
BuildWhereClause,
|
|
18
15
|
)
|
|
19
16
|
from trilogy.core.models.build_environment import BuildEnvironment
|
|
20
|
-
from trilogy.core.processing.
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
gen_synonym_node,
|
|
30
|
-
gen_union_node,
|
|
31
|
-
gen_unnest_node,
|
|
32
|
-
gen_window_node,
|
|
17
|
+
from trilogy.core.processing.discovery_node_factory import generate_node
|
|
18
|
+
from trilogy.core.processing.discovery_utility import (
|
|
19
|
+
LOGGER_PREFIX,
|
|
20
|
+
depth_to_prefix,
|
|
21
|
+
get_priority_concept,
|
|
22
|
+
)
|
|
23
|
+
from trilogy.core.processing.discovery_validation import (
|
|
24
|
+
ValidationResult,
|
|
25
|
+
validate_stack,
|
|
33
26
|
)
|
|
34
27
|
from trilogy.core.processing.nodes import (
|
|
35
|
-
ConstantNode,
|
|
36
28
|
GroupNode,
|
|
37
29
|
History,
|
|
38
30
|
MergeNode,
|
|
39
31
|
StrategyNode,
|
|
40
32
|
)
|
|
41
|
-
from trilogy.core.processing.utility import (
|
|
42
|
-
get_disconnected_components,
|
|
43
|
-
)
|
|
44
33
|
from trilogy.utility import unique
|
|
45
34
|
|
|
35
|
+
SKIPPED_DERIVATIONS = [
|
|
36
|
+
Derivation.AGGREGATE,
|
|
37
|
+
Derivation.FILTER,
|
|
38
|
+
Derivation.WINDOW,
|
|
39
|
+
Derivation.UNNEST,
|
|
40
|
+
Derivation.RECURSIVE,
|
|
41
|
+
Derivation.ROWSET,
|
|
42
|
+
Derivation.BASIC,
|
|
43
|
+
Derivation.GROUP_TO,
|
|
44
|
+
Derivation.MULTISELECT,
|
|
45
|
+
Derivation.UNION,
|
|
46
|
+
]
|
|
46
47
|
|
|
47
|
-
|
|
48
|
-
COMPLETE = 1
|
|
49
|
-
DISCONNECTED = 2
|
|
50
|
-
INCOMPLETE = 3
|
|
51
|
-
INCOMPLETE_CONDITION = 4
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
LOGGER_PREFIX = "[CONCEPT DETAIL]"
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class SearchConceptsType(Protocol):
|
|
58
|
-
def __call__(
|
|
59
|
-
self,
|
|
60
|
-
mandatory_list: List[BuildConcept],
|
|
61
|
-
history: History,
|
|
62
|
-
environment: BuildEnvironment,
|
|
63
|
-
depth: int,
|
|
64
|
-
g: ReferenceGraph,
|
|
65
|
-
accept_partial: bool = False,
|
|
66
|
-
conditions: Optional[BuildWhereClause] = None,
|
|
67
|
-
) -> Union[StrategyNode, None]: ...
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def get_upstream_concepts(base: BuildConcept, nested: bool = False) -> set[str]:
|
|
71
|
-
upstream = set()
|
|
72
|
-
if nested:
|
|
73
|
-
upstream.add(base.address)
|
|
74
|
-
if not base.lineage:
|
|
75
|
-
return upstream
|
|
76
|
-
for x in base.lineage.concept_arguments:
|
|
77
|
-
# if it's derived from any value in a rowset, ALL rowset items are upstream
|
|
78
|
-
if x.derivation == Derivation.ROWSET:
|
|
79
|
-
assert isinstance(x.lineage, BuildRowsetItem), type(x.lineage)
|
|
80
|
-
for y in x.lineage.rowset.select.output_components:
|
|
81
|
-
upstream.add(f"{x.lineage.rowset.name}.{y.address}")
|
|
82
|
-
# upstream = upstream.union(get_upstream_concepts(y, nested=True))
|
|
83
|
-
upstream = upstream.union(get_upstream_concepts(x, nested=True))
|
|
84
|
-
return upstream
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def restrict_node_outputs_targets(
|
|
88
|
-
node: StrategyNode, targets: list[BuildConcept], depth: int
|
|
89
|
-
) -> list[BuildConcept]:
|
|
90
|
-
ex_resolve = node.resolve()
|
|
91
|
-
extra = [
|
|
92
|
-
x
|
|
93
|
-
for x in ex_resolve.output_concepts
|
|
94
|
-
if x.address not in [y.address for y in targets]
|
|
95
|
-
]
|
|
96
|
-
|
|
97
|
-
base = [x for x in ex_resolve.output_concepts if x.address not in extra]
|
|
98
|
-
logger.info(
|
|
99
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} reducing final outputs, was {[c.address for c in ex_resolve.output_concepts]} with extra {[c.address for c in extra]}, remaining {base}"
|
|
100
|
-
)
|
|
101
|
-
for x in targets:
|
|
102
|
-
if x.address not in base:
|
|
103
|
-
base.append(x)
|
|
104
|
-
node.set_output_concepts(base)
|
|
105
|
-
return extra
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def get_priority_concept(
|
|
109
|
-
all_concepts: List[BuildConcept],
|
|
110
|
-
attempted_addresses: set[str],
|
|
111
|
-
found_concepts: set[str],
|
|
112
|
-
depth: int,
|
|
113
|
-
) -> BuildConcept:
|
|
114
|
-
# optimized search for missing concepts
|
|
115
|
-
pass_one = sorted(
|
|
116
|
-
[
|
|
117
|
-
c
|
|
118
|
-
for c in all_concepts
|
|
119
|
-
if c.address not in attempted_addresses and c.address not in found_concepts
|
|
120
|
-
],
|
|
121
|
-
key=lambda x: x.address,
|
|
122
|
-
)
|
|
123
|
-
# sometimes we need to scan intermediate concepts to get merge keys or filter keys,
|
|
124
|
-
# so do an exhaustive search
|
|
125
|
-
# pass_two = [c for c in all_concepts+filter_only if c.address not in attempted_addresses]
|
|
126
|
-
for remaining_concept in (pass_one,):
|
|
127
|
-
priority = (
|
|
128
|
-
# find anything that needs no joins first, so we can exit early
|
|
129
|
-
[
|
|
130
|
-
c
|
|
131
|
-
for c in remaining_concept
|
|
132
|
-
if c.derivation == Derivation.CONSTANT
|
|
133
|
-
and c.granularity == Granularity.SINGLE_ROW
|
|
134
|
-
]
|
|
135
|
-
+
|
|
136
|
-
# then multiselects to remove them from scope
|
|
137
|
-
[c for c in remaining_concept if c.derivation == Derivation.MULTISELECT]
|
|
138
|
-
+
|
|
139
|
-
# then rowsets to remove them from scope, as they cannot get partials
|
|
140
|
-
[c for c in remaining_concept if c.derivation == Derivation.ROWSET]
|
|
141
|
-
+
|
|
142
|
-
# then rowsets to remove them from scope, as they cannot get partials
|
|
143
|
-
[c for c in remaining_concept if c.derivation == Derivation.UNION]
|
|
144
|
-
# we should be home-free here
|
|
145
|
-
+
|
|
146
|
-
# then aggregates to remove them from scope, as they cannot get partials
|
|
147
|
-
[c for c in remaining_concept if c.derivation == Derivation.AGGREGATE]
|
|
148
|
-
# then windows to remove them from scope, as they cannot get partials
|
|
149
|
-
+ [c for c in remaining_concept if c.derivation == Derivation.WINDOW]
|
|
150
|
-
# then filters to remove them from scope, also cannot get partials
|
|
151
|
-
+ [c for c in remaining_concept if c.derivation == Derivation.FILTER]
|
|
152
|
-
# unnests are weird?
|
|
153
|
-
+ [c for c in remaining_concept if c.derivation == Derivation.UNNEST]
|
|
154
|
-
+ [c for c in remaining_concept if c.derivation == Derivation.RECURSIVE]
|
|
155
|
-
+ [c for c in remaining_concept if c.derivation == Derivation.BASIC]
|
|
156
|
-
# finally our plain selects
|
|
157
|
-
+ [
|
|
158
|
-
c for c in remaining_concept if c.derivation == Derivation.ROOT
|
|
159
|
-
] # and any non-single row constants
|
|
160
|
-
+ [c for c in remaining_concept if c.derivation == Derivation.CONSTANT]
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
priority += [
|
|
164
|
-
c
|
|
165
|
-
for c in remaining_concept
|
|
166
|
-
if c.address not in [x.address for x in priority]
|
|
167
|
-
]
|
|
168
|
-
final = []
|
|
169
|
-
# if any thing is derived from another concept
|
|
170
|
-
# get the derived copy first
|
|
171
|
-
# as this will usually resolve cleaner
|
|
172
|
-
for x in priority:
|
|
173
|
-
if any(
|
|
174
|
-
[
|
|
175
|
-
x.address
|
|
176
|
-
in get_upstream_concepts(
|
|
177
|
-
c,
|
|
178
|
-
)
|
|
179
|
-
for c in priority
|
|
180
|
-
]
|
|
181
|
-
):
|
|
182
|
-
logger.info(
|
|
183
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} delaying fetch of {x.address} as parent of another concept"
|
|
184
|
-
)
|
|
185
|
-
continue
|
|
186
|
-
final.append(x)
|
|
187
|
-
# then append anything we didn't get
|
|
188
|
-
for x2 in priority:
|
|
189
|
-
if x2 not in final:
|
|
190
|
-
final.append(x2)
|
|
191
|
-
if final:
|
|
192
|
-
return final[0]
|
|
193
|
-
raise ValueError(
|
|
194
|
-
f"Cannot resolve query. No remaining priority concepts, have attempted {attempted_addresses}"
|
|
195
|
-
)
|
|
48
|
+
ROOT_DERIVATIONS = [Derivation.ROOT, Derivation.CONSTANT]
|
|
196
49
|
|
|
197
50
|
|
|
198
51
|
def generate_candidates_restrictive(
|
|
@@ -201,10 +54,10 @@ def generate_candidates_restrictive(
|
|
|
201
54
|
exhausted: set[str],
|
|
202
55
|
depth: int,
|
|
203
56
|
conditions: BuildWhereClause | None = None,
|
|
204
|
-
) -> List[
|
|
57
|
+
) -> List[BuildConcept]:
|
|
205
58
|
# if it's single row, joins are irrelevant. Fetch without keys.
|
|
206
59
|
if priority_concept.granularity == Granularity.SINGLE_ROW:
|
|
207
|
-
return [
|
|
60
|
+
return []
|
|
208
61
|
|
|
209
62
|
local_candidates = [
|
|
210
63
|
x
|
|
@@ -214,510 +67,12 @@ def generate_candidates_restrictive(
|
|
|
214
67
|
and x.address not in priority_concept.pseudonyms
|
|
215
68
|
and priority_concept.address not in x.pseudonyms
|
|
216
69
|
]
|
|
217
|
-
if conditions and priority_concept.derivation in
|
|
218
|
-
Derivation.ROOT,
|
|
219
|
-
Derivation.CONSTANT,
|
|
220
|
-
):
|
|
70
|
+
if conditions and priority_concept.derivation in ROOT_DERIVATIONS:
|
|
221
71
|
logger.info(
|
|
222
72
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Injecting additional conditional row arguments as all remaining concepts are roots or constant"
|
|
223
73
|
)
|
|
224
|
-
return
|
|
225
|
-
return
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
def generate_node(
|
|
229
|
-
concept: BuildConcept,
|
|
230
|
-
local_optional: List[BuildConcept],
|
|
231
|
-
environment: BuildEnvironment,
|
|
232
|
-
g: ReferenceGraph,
|
|
233
|
-
depth: int,
|
|
234
|
-
source_concepts: SearchConceptsType,
|
|
235
|
-
history: History,
|
|
236
|
-
accept_partial: bool = False,
|
|
237
|
-
conditions: BuildWhereClause | None = None,
|
|
238
|
-
) -> StrategyNode | None:
|
|
239
|
-
# first check in case there is a materialized_concept
|
|
240
|
-
candidate = history.gen_select_node(
|
|
241
|
-
concept,
|
|
242
|
-
local_optional,
|
|
243
|
-
environment,
|
|
244
|
-
g,
|
|
245
|
-
depth + 1,
|
|
246
|
-
fail_if_not_found=False,
|
|
247
|
-
accept_partial=accept_partial,
|
|
248
|
-
accept_partial_optional=False,
|
|
249
|
-
source_concepts=source_concepts,
|
|
250
|
-
conditions=conditions,
|
|
251
|
-
)
|
|
252
|
-
|
|
253
|
-
if candidate:
|
|
254
|
-
return candidate
|
|
255
|
-
|
|
256
|
-
if concept.derivation == Derivation.WINDOW:
|
|
257
|
-
logger.info(
|
|
258
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating window node with optional {[x.address for x in local_optional]}"
|
|
259
|
-
)
|
|
260
|
-
return gen_window_node(
|
|
261
|
-
concept,
|
|
262
|
-
local_optional,
|
|
263
|
-
history=history,
|
|
264
|
-
environment=environment,
|
|
265
|
-
g=g,
|
|
266
|
-
depth=depth + 1,
|
|
267
|
-
source_concepts=source_concepts,
|
|
268
|
-
conditions=conditions,
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
elif concept.derivation == Derivation.FILTER:
|
|
272
|
-
logger.info(
|
|
273
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating filter node with optional {[x.address for x in local_optional]}"
|
|
274
|
-
)
|
|
275
|
-
return gen_filter_node(
|
|
276
|
-
concept,
|
|
277
|
-
local_optional,
|
|
278
|
-
history=history,
|
|
279
|
-
environment=environment,
|
|
280
|
-
g=g,
|
|
281
|
-
depth=depth + 1,
|
|
282
|
-
source_concepts=source_concepts,
|
|
283
|
-
conditions=conditions,
|
|
284
|
-
)
|
|
285
|
-
elif concept.derivation == Derivation.UNNEST:
|
|
286
|
-
logger.info(
|
|
287
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating unnest node with optional {[x.address for x in local_optional]} and condition {conditions}"
|
|
288
|
-
)
|
|
289
|
-
return gen_unnest_node(
|
|
290
|
-
concept,
|
|
291
|
-
local_optional,
|
|
292
|
-
history=history,
|
|
293
|
-
environment=environment,
|
|
294
|
-
g=g,
|
|
295
|
-
depth=depth + 1,
|
|
296
|
-
source_concepts=source_concepts,
|
|
297
|
-
conditions=conditions,
|
|
298
|
-
)
|
|
299
|
-
elif concept.derivation == Derivation.RECURSIVE:
|
|
300
|
-
logger.info(
|
|
301
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating recursive node with optional {[x.address for x in local_optional]} and condition {conditions}"
|
|
302
|
-
)
|
|
303
|
-
return gen_recursive_node(
|
|
304
|
-
concept,
|
|
305
|
-
local_optional,
|
|
306
|
-
history=history,
|
|
307
|
-
environment=environment,
|
|
308
|
-
g=g,
|
|
309
|
-
depth=depth + 1,
|
|
310
|
-
source_concepts=source_concepts,
|
|
311
|
-
conditions=conditions,
|
|
312
|
-
)
|
|
313
|
-
elif concept.derivation == Derivation.UNION:
|
|
314
|
-
logger.info(
|
|
315
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating union node with optional {[x.address for x in local_optional]} and condition {conditions}"
|
|
316
|
-
)
|
|
317
|
-
return gen_union_node(
|
|
318
|
-
concept,
|
|
319
|
-
local_optional,
|
|
320
|
-
environment,
|
|
321
|
-
g,
|
|
322
|
-
depth + 1,
|
|
323
|
-
source_concepts,
|
|
324
|
-
history,
|
|
325
|
-
conditions=conditions,
|
|
326
|
-
)
|
|
327
|
-
elif concept.derivation == Derivation.AGGREGATE:
|
|
328
|
-
# don't push constants up before aggregation
|
|
329
|
-
# if not required
|
|
330
|
-
# to avoid constants multiplication changing default aggregation results
|
|
331
|
-
# ex sum(x) * 2 w/ no grain should return sum(x) * 2, not sum(x*2)
|
|
332
|
-
# these should always be sourceable independently
|
|
333
|
-
agg_optional = [
|
|
334
|
-
x for x in local_optional if x.granularity != Granularity.SINGLE_ROW
|
|
335
|
-
]
|
|
336
|
-
|
|
337
|
-
logger.info(
|
|
338
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating aggregate node with {[x for x in agg_optional]}"
|
|
339
|
-
)
|
|
340
|
-
return gen_group_node(
|
|
341
|
-
concept,
|
|
342
|
-
agg_optional,
|
|
343
|
-
history=history,
|
|
344
|
-
environment=environment,
|
|
345
|
-
g=g,
|
|
346
|
-
depth=depth + 1,
|
|
347
|
-
source_concepts=source_concepts,
|
|
348
|
-
conditions=conditions,
|
|
349
|
-
)
|
|
350
|
-
elif concept.derivation == Derivation.ROWSET:
|
|
351
|
-
logger.info(
|
|
352
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating rowset node with optional {[x.address for x in local_optional]}"
|
|
353
|
-
)
|
|
354
|
-
return gen_rowset_node(
|
|
355
|
-
concept,
|
|
356
|
-
local_optional,
|
|
357
|
-
environment,
|
|
358
|
-
g,
|
|
359
|
-
depth + 1,
|
|
360
|
-
source_concepts,
|
|
361
|
-
history,
|
|
362
|
-
conditions=conditions,
|
|
363
|
-
)
|
|
364
|
-
elif concept.derivation == Derivation.MULTISELECT:
|
|
365
|
-
logger.info(
|
|
366
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
|
|
367
|
-
)
|
|
368
|
-
return gen_multiselect_node(
|
|
369
|
-
concept,
|
|
370
|
-
local_optional,
|
|
371
|
-
environment,
|
|
372
|
-
g,
|
|
373
|
-
depth + 1,
|
|
374
|
-
source_concepts,
|
|
375
|
-
history,
|
|
376
|
-
conditions=conditions,
|
|
377
|
-
)
|
|
378
|
-
elif concept.derivation == Derivation.CONSTANT:
|
|
379
|
-
constant_targets = [concept] + local_optional
|
|
380
|
-
logger.info(
|
|
381
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
|
|
382
|
-
)
|
|
383
|
-
if any([x.derivation != Derivation.CONSTANT for x in local_optional]):
|
|
384
|
-
non_root = [
|
|
385
|
-
x.address for x in local_optional if x.derivation != Derivation.CONSTANT
|
|
386
|
-
]
|
|
387
|
-
logger.info(
|
|
388
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including filter concepts, there are non root/non constant concepts we should find first: {non_root}. Recursing with all of these as mandatory"
|
|
389
|
-
)
|
|
390
|
-
|
|
391
|
-
if not history.check_started(
|
|
392
|
-
constant_targets, accept_partial=accept_partial, conditions=conditions
|
|
393
|
-
):
|
|
394
|
-
history.log_start(
|
|
395
|
-
constant_targets,
|
|
396
|
-
accept_partial=accept_partial,
|
|
397
|
-
conditions=conditions,
|
|
398
|
-
)
|
|
399
|
-
return source_concepts(
|
|
400
|
-
mandatory_list=constant_targets,
|
|
401
|
-
environment=environment,
|
|
402
|
-
g=g,
|
|
403
|
-
depth=depth + 1,
|
|
404
|
-
accept_partial=accept_partial,
|
|
405
|
-
history=history,
|
|
406
|
-
# we DO NOT pass up conditions at this point, as we are now expanding to include conditions in search
|
|
407
|
-
# which we do whenever we hit a root node
|
|
408
|
-
# conditions=conditions,
|
|
409
|
-
)
|
|
410
|
-
else:
|
|
411
|
-
logger.info(
|
|
412
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} skipping search, already in a recursion fot these concepts"
|
|
413
|
-
)
|
|
414
|
-
return None
|
|
415
|
-
return ConstantNode(
|
|
416
|
-
input_concepts=[],
|
|
417
|
-
output_concepts=constant_targets,
|
|
418
|
-
environment=environment,
|
|
419
|
-
parents=[],
|
|
420
|
-
depth=depth + 1,
|
|
421
|
-
preexisting_conditions=conditions.conditional if conditions else None,
|
|
422
|
-
)
|
|
423
|
-
elif concept.derivation == Derivation.BASIC:
|
|
424
|
-
# this is special case handling for group bys
|
|
425
|
-
if (
|
|
426
|
-
isinstance(concept.lineage, BuildFunction)
|
|
427
|
-
and concept.lineage.operator == FunctionType.GROUP
|
|
428
|
-
):
|
|
429
|
-
logger.info(
|
|
430
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating group to grain node with {[x.address for x in local_optional]}"
|
|
431
|
-
)
|
|
432
|
-
return gen_group_to_node(
|
|
433
|
-
concept,
|
|
434
|
-
local_optional,
|
|
435
|
-
environment,
|
|
436
|
-
g,
|
|
437
|
-
depth + 1,
|
|
438
|
-
source_concepts,
|
|
439
|
-
history,
|
|
440
|
-
conditions=conditions,
|
|
441
|
-
)
|
|
442
|
-
logger.info(
|
|
443
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating basic node with optional {[x.address for x in local_optional]}"
|
|
444
|
-
)
|
|
445
|
-
return gen_basic_node(
|
|
446
|
-
concept,
|
|
447
|
-
local_optional,
|
|
448
|
-
history=history,
|
|
449
|
-
environment=environment,
|
|
450
|
-
g=g,
|
|
451
|
-
depth=depth + 1,
|
|
452
|
-
source_concepts=source_concepts,
|
|
453
|
-
conditions=conditions,
|
|
454
|
-
)
|
|
455
|
-
|
|
456
|
-
elif concept.derivation == Derivation.ROOT:
|
|
457
|
-
logger.info(
|
|
458
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating select node with optional including condition inputs {[x.address for x in local_optional]}"
|
|
459
|
-
)
|
|
460
|
-
# we've injected in any conditional concepts that may exist
|
|
461
|
-
# so if we don't still have just roots, we need to go up
|
|
462
|
-
root_targets = [concept] + local_optional
|
|
463
|
-
|
|
464
|
-
if any(
|
|
465
|
-
[
|
|
466
|
-
x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
|
|
467
|
-
for x in local_optional
|
|
468
|
-
]
|
|
469
|
-
):
|
|
470
|
-
non_root = [
|
|
471
|
-
x.address
|
|
472
|
-
for x in local_optional
|
|
473
|
-
if x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
|
|
474
|
-
]
|
|
475
|
-
logger.info(
|
|
476
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including any filters, there are non-root concepts we should expand first: {non_root}. Recursing with all of these as mandatory"
|
|
477
|
-
)
|
|
478
|
-
|
|
479
|
-
# if not history.check_started(
|
|
480
|
-
# root_targets, accept_partial=accept_partial, conditions=conditions
|
|
481
|
-
# ) or 1==1:
|
|
482
|
-
if True:
|
|
483
|
-
history.log_start(
|
|
484
|
-
root_targets, accept_partial=accept_partial, conditions=conditions
|
|
485
|
-
)
|
|
486
|
-
return source_concepts(
|
|
487
|
-
mandatory_list=root_targets,
|
|
488
|
-
environment=environment,
|
|
489
|
-
g=g,
|
|
490
|
-
depth=depth + 1,
|
|
491
|
-
accept_partial=accept_partial,
|
|
492
|
-
history=history,
|
|
493
|
-
# we DO NOT pass up conditions at this point, as we are now expanding to include conditions in search
|
|
494
|
-
# which we do whenever we hit a root node
|
|
495
|
-
# conditions=conditions,
|
|
496
|
-
)
|
|
497
|
-
else:
|
|
498
|
-
logger.info(
|
|
499
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} skipping root search, already in a recursion for these concepts"
|
|
500
|
-
)
|
|
501
|
-
check = history.gen_select_node(
|
|
502
|
-
concept,
|
|
503
|
-
local_optional,
|
|
504
|
-
environment,
|
|
505
|
-
g,
|
|
506
|
-
depth + 1,
|
|
507
|
-
fail_if_not_found=False,
|
|
508
|
-
accept_partial=accept_partial,
|
|
509
|
-
accept_partial_optional=False,
|
|
510
|
-
source_concepts=source_concepts,
|
|
511
|
-
conditions=conditions,
|
|
512
|
-
)
|
|
513
|
-
if not check:
|
|
514
|
-
|
|
515
|
-
logger.info(
|
|
516
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve root concepts, checking for expanded concepts"
|
|
517
|
-
)
|
|
518
|
-
for accept_partial in [False, True]:
|
|
519
|
-
expanded = gen_merge_node(
|
|
520
|
-
all_concepts=root_targets,
|
|
521
|
-
environment=environment,
|
|
522
|
-
g=g,
|
|
523
|
-
depth=depth + 1,
|
|
524
|
-
source_concepts=source_concepts,
|
|
525
|
-
history=history,
|
|
526
|
-
search_conditions=conditions,
|
|
527
|
-
accept_partial=accept_partial,
|
|
528
|
-
)
|
|
529
|
-
|
|
530
|
-
if expanded:
|
|
531
|
-
extra = restrict_node_outputs_targets(expanded, root_targets, depth)
|
|
532
|
-
pseudonyms = [
|
|
533
|
-
x
|
|
534
|
-
for x in extra
|
|
535
|
-
if any(x.address in y.pseudonyms for y in root_targets)
|
|
536
|
-
]
|
|
537
|
-
if pseudonyms:
|
|
538
|
-
expanded.add_output_concepts(pseudonyms)
|
|
539
|
-
logger.info(
|
|
540
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Hiding pseudonyms{[c.address for c in pseudonyms]}"
|
|
541
|
-
)
|
|
542
|
-
expanded.hide_output_concepts(pseudonyms)
|
|
543
|
-
|
|
544
|
-
logger.info(
|
|
545
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in root_targets]} via concept addition; removing extra {[c.address for c in extra]}"
|
|
546
|
-
)
|
|
547
|
-
return expanded
|
|
548
|
-
|
|
549
|
-
logger.info(
|
|
550
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} could not find additional concept(s) to inject"
|
|
551
|
-
)
|
|
552
|
-
logger.info(
|
|
553
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve root concepts, checking for synonyms"
|
|
554
|
-
)
|
|
555
|
-
if not history.check_started(
|
|
556
|
-
root_targets, accept_partial=accept_partial, conditions=conditions
|
|
557
|
-
):
|
|
558
|
-
history.log_start(
|
|
559
|
-
root_targets, accept_partial=accept_partial, conditions=conditions
|
|
560
|
-
)
|
|
561
|
-
resolved = gen_synonym_node(
|
|
562
|
-
all_concepts=root_targets,
|
|
563
|
-
environment=environment,
|
|
564
|
-
g=g,
|
|
565
|
-
depth=depth + 1,
|
|
566
|
-
source_concepts=source_concepts,
|
|
567
|
-
history=history,
|
|
568
|
-
conditions=conditions,
|
|
569
|
-
accept_partial=accept_partial,
|
|
570
|
-
)
|
|
571
|
-
if resolved:
|
|
572
|
-
logger.info(
|
|
573
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} resolved concepts through synonyms"
|
|
574
|
-
)
|
|
575
|
-
return resolved
|
|
576
|
-
else:
|
|
577
|
-
logger.info(
|
|
578
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} skipping synonym search, already in a recursion for these concepts"
|
|
579
|
-
)
|
|
580
|
-
return None
|
|
581
|
-
else:
|
|
582
|
-
raise ValueError(f"Unknown derivation {concept.derivation} on {concept}")
|
|
583
|
-
return None
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
def validate_concept(
|
|
587
|
-
concept: BuildConcept,
|
|
588
|
-
node: StrategyNode,
|
|
589
|
-
found_addresses: set[str],
|
|
590
|
-
non_partial_addresses: set[str],
|
|
591
|
-
partial_addresses: set[str],
|
|
592
|
-
virtual_addresses: set[str],
|
|
593
|
-
found_map: dict[str, set[BuildConcept]],
|
|
594
|
-
accept_partial: bool,
|
|
595
|
-
seen: set[str],
|
|
596
|
-
environment: BuildEnvironment,
|
|
597
|
-
):
|
|
598
|
-
found_map[str(node)].add(concept)
|
|
599
|
-
seen.add(concept.address)
|
|
600
|
-
if concept not in node.partial_concepts:
|
|
601
|
-
found_addresses.add(concept.address)
|
|
602
|
-
non_partial_addresses.add(concept.address)
|
|
603
|
-
# remove it from our partial tracking
|
|
604
|
-
if concept.address in partial_addresses:
|
|
605
|
-
partial_addresses.remove(concept.address)
|
|
606
|
-
if concept.address in virtual_addresses:
|
|
607
|
-
virtual_addresses.remove(concept.address)
|
|
608
|
-
if concept in node.partial_concepts:
|
|
609
|
-
if concept.address in non_partial_addresses:
|
|
610
|
-
return None
|
|
611
|
-
partial_addresses.add(concept.address)
|
|
612
|
-
if accept_partial:
|
|
613
|
-
found_addresses.add(concept.address)
|
|
614
|
-
found_map[str(node)].add(concept)
|
|
615
|
-
for v_address in concept.pseudonyms:
|
|
616
|
-
if v_address in seen:
|
|
617
|
-
return
|
|
618
|
-
v = environment.concepts[v_address]
|
|
619
|
-
if v.address in seen:
|
|
620
|
-
return
|
|
621
|
-
if v.address == concept.address:
|
|
622
|
-
return
|
|
623
|
-
validate_concept(
|
|
624
|
-
v,
|
|
625
|
-
node,
|
|
626
|
-
found_addresses,
|
|
627
|
-
non_partial_addresses,
|
|
628
|
-
partial_addresses,
|
|
629
|
-
virtual_addresses,
|
|
630
|
-
found_map,
|
|
631
|
-
accept_partial,
|
|
632
|
-
seen=seen,
|
|
633
|
-
environment=environment,
|
|
634
|
-
)
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
def validate_stack(
|
|
638
|
-
environment: BuildEnvironment,
|
|
639
|
-
stack: List[StrategyNode],
|
|
640
|
-
concepts: List[BuildConcept],
|
|
641
|
-
mandatory_with_filter: List[BuildConcept],
|
|
642
|
-
conditions: BuildWhereClause | None = None,
|
|
643
|
-
accept_partial: bool = False,
|
|
644
|
-
) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
|
|
645
|
-
found_map: dict[str, set[BuildConcept]] = defaultdict(set)
|
|
646
|
-
found_addresses: set[str] = set()
|
|
647
|
-
non_partial_addresses: set[str] = set()
|
|
648
|
-
partial_addresses: set[str] = set()
|
|
649
|
-
virtual_addresses: set[str] = set()
|
|
650
|
-
seen: set[str] = set()
|
|
651
|
-
|
|
652
|
-
for node in stack:
|
|
653
|
-
resolved = node.resolve()
|
|
654
|
-
|
|
655
|
-
for concept in resolved.output_concepts:
|
|
656
|
-
if concept.address in resolved.hidden_concepts:
|
|
657
|
-
continue
|
|
658
|
-
|
|
659
|
-
validate_concept(
|
|
660
|
-
concept,
|
|
661
|
-
node,
|
|
662
|
-
found_addresses,
|
|
663
|
-
non_partial_addresses,
|
|
664
|
-
partial_addresses,
|
|
665
|
-
virtual_addresses,
|
|
666
|
-
found_map,
|
|
667
|
-
accept_partial,
|
|
668
|
-
seen,
|
|
669
|
-
environment,
|
|
670
|
-
)
|
|
671
|
-
for concept in node.virtual_output_concepts:
|
|
672
|
-
if concept.address in non_partial_addresses:
|
|
673
|
-
continue
|
|
674
|
-
found_addresses.add(concept.address)
|
|
675
|
-
virtual_addresses.add(concept.address)
|
|
676
|
-
if not conditions:
|
|
677
|
-
conditions_met = True
|
|
678
|
-
else:
|
|
679
|
-
conditions_met = all(
|
|
680
|
-
[node.preexisting_conditions == conditions.conditional for node in stack]
|
|
681
|
-
) or all([c.address in found_addresses for c in mandatory_with_filter])
|
|
682
|
-
# zip in those we know we found
|
|
683
|
-
if not all([c.address in found_addresses for c in concepts]) or not conditions_met:
|
|
684
|
-
if not all([c.address in found_addresses for c in concepts]):
|
|
685
|
-
return (
|
|
686
|
-
ValidationResult.INCOMPLETE,
|
|
687
|
-
found_addresses,
|
|
688
|
-
{c.address for c in concepts if c.address not in found_addresses},
|
|
689
|
-
partial_addresses,
|
|
690
|
-
virtual_addresses,
|
|
691
|
-
)
|
|
692
|
-
return (
|
|
693
|
-
ValidationResult.INCOMPLETE_CONDITION,
|
|
694
|
-
found_addresses,
|
|
695
|
-
{c.address for c in concepts if c.address not in mandatory_with_filter},
|
|
696
|
-
partial_addresses,
|
|
697
|
-
virtual_addresses,
|
|
698
|
-
)
|
|
699
|
-
|
|
700
|
-
graph_count, _ = get_disconnected_components(found_map)
|
|
701
|
-
if graph_count in (0, 1):
|
|
702
|
-
return (
|
|
703
|
-
ValidationResult.COMPLETE,
|
|
704
|
-
found_addresses,
|
|
705
|
-
set(),
|
|
706
|
-
partial_addresses,
|
|
707
|
-
virtual_addresses,
|
|
708
|
-
)
|
|
709
|
-
# if we have too many subgraphs, we need to keep searching
|
|
710
|
-
return (
|
|
711
|
-
ValidationResult.DISCONNECTED,
|
|
712
|
-
found_addresses,
|
|
713
|
-
set(),
|
|
714
|
-
partial_addresses,
|
|
715
|
-
virtual_addresses,
|
|
716
|
-
)
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
def depth_to_prefix(depth: int) -> str:
|
|
720
|
-
return "\t" * depth
|
|
74
|
+
return unique(list(conditions.row_arguments) + local_candidates, "address")
|
|
75
|
+
return local_candidates
|
|
721
76
|
|
|
722
77
|
|
|
723
78
|
def append_existence_check(
|
|
@@ -764,7 +119,6 @@ def search_concepts(
|
|
|
764
119
|
accept_partial: bool = False,
|
|
765
120
|
conditions: BuildWhereClause | None = None,
|
|
766
121
|
) -> StrategyNode | None:
|
|
767
|
-
logger.error(f"starting search for {mandatory_list}")
|
|
768
122
|
hist = history.get_history(
|
|
769
123
|
search=mandatory_list, accept_partial=accept_partial, conditions=conditions
|
|
770
124
|
)
|
|
@@ -794,7 +148,31 @@ def search_concepts(
|
|
|
794
148
|
return result
|
|
795
149
|
|
|
796
150
|
|
|
797
|
-
|
|
151
|
+
@dataclass
|
|
152
|
+
class LoopContext:
|
|
153
|
+
mandatory_list: List[BuildConcept]
|
|
154
|
+
environment: BuildEnvironment
|
|
155
|
+
depth: int
|
|
156
|
+
g: ReferenceGraph
|
|
157
|
+
history: History
|
|
158
|
+
attempted: set[str]
|
|
159
|
+
found: set[str]
|
|
160
|
+
skip: set[str]
|
|
161
|
+
all_mandatory: set[str]
|
|
162
|
+
original_mandatory: List[BuildConcept]
|
|
163
|
+
completion_mandatory: List[BuildConcept]
|
|
164
|
+
stack: List[StrategyNode]
|
|
165
|
+
complete: ValidationResult = ValidationResult.INCOMPLETE
|
|
166
|
+
accept_partial: bool = False
|
|
167
|
+
must_evaluate_condition_on_this_level_not_push_down: bool = False
|
|
168
|
+
conditions: BuildWhereClause | None = None
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def incomplete(self) -> bool:
|
|
172
|
+
return self.attempted != self.all_mandatory
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def initialize_loop_context(
|
|
798
176
|
mandatory_list: List[BuildConcept],
|
|
799
177
|
environment: BuildEnvironment,
|
|
800
178
|
depth: int,
|
|
@@ -802,7 +180,7 @@ def _search_concepts(
|
|
|
802
180
|
history: History,
|
|
803
181
|
accept_partial: bool = False,
|
|
804
182
|
conditions: BuildWhereClause | None = None,
|
|
805
|
-
)
|
|
183
|
+
):
|
|
806
184
|
# these are the concepts we need in the output projection
|
|
807
185
|
mandatory_list = unique(mandatory_list, "address")
|
|
808
186
|
# cache our values before an filter injection
|
|
@@ -824,7 +202,7 @@ def _search_concepts(
|
|
|
824
202
|
required_filters = [
|
|
825
203
|
x
|
|
826
204
|
for x in mandatory_list
|
|
827
|
-
if x.derivation not in
|
|
205
|
+
if x.derivation not in ROOT_DERIVATIONS
|
|
828
206
|
and not (
|
|
829
207
|
x.derivation == Derivation.AGGREGATE
|
|
830
208
|
and x.granularity == Granularity.SINGLE_ROW
|
|
@@ -844,258 +222,309 @@ def _search_concepts(
|
|
|
844
222
|
else:
|
|
845
223
|
|
|
846
224
|
completion_mandatory = mandatory_list
|
|
847
|
-
|
|
225
|
+
return LoopContext(
|
|
226
|
+
mandatory_list=mandatory_list,
|
|
227
|
+
environment=environment,
|
|
228
|
+
depth=depth,
|
|
229
|
+
g=g,
|
|
230
|
+
history=history,
|
|
231
|
+
attempted=set(),
|
|
232
|
+
found=set(),
|
|
233
|
+
skip=set(),
|
|
234
|
+
all_mandatory=all_mandatory,
|
|
235
|
+
original_mandatory=original_mandatory,
|
|
236
|
+
completion_mandatory=completion_mandatory,
|
|
237
|
+
stack=[],
|
|
238
|
+
complete=ValidationResult.INCOMPLETE,
|
|
239
|
+
accept_partial=accept_partial,
|
|
240
|
+
must_evaluate_condition_on_this_level_not_push_down=must_evaluate_condition_on_this_level_not_push_down,
|
|
241
|
+
conditions=conditions,
|
|
242
|
+
)
|
|
848
243
|
|
|
849
|
-
found: set[str] = set()
|
|
850
|
-
skip: set[str] = set()
|
|
851
|
-
virtual: set[str] = set()
|
|
852
|
-
stack: List[StrategyNode] = []
|
|
853
|
-
complete = ValidationResult.INCOMPLETE
|
|
854
244
|
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
245
|
+
def evaluate_loop_conditions(
|
|
246
|
+
context: LoopContext, priority_concept: BuildConcept
|
|
247
|
+
) -> BuildWhereClause | None:
|
|
248
|
+
# filter evaluation
|
|
249
|
+
# always pass the filter up when we aren't looking at all filter inputs
|
|
250
|
+
# or there are any non-filter complex types
|
|
251
|
+
if context.conditions:
|
|
252
|
+
should_evaluate_filter_on_this_level_not_push_down = all(
|
|
253
|
+
[
|
|
254
|
+
x.address in context.mandatory_list
|
|
255
|
+
for x in context.conditions.row_arguments
|
|
256
|
+
]
|
|
257
|
+
) and not any(
|
|
258
|
+
[
|
|
259
|
+
x.derivation not in ROOT_DERIVATIONS
|
|
260
|
+
for x in context.mandatory_list
|
|
261
|
+
if x.address not in context.conditions.row_arguments
|
|
262
|
+
]
|
|
263
|
+
)
|
|
264
|
+
else:
|
|
265
|
+
should_evaluate_filter_on_this_level_not_push_down = True
|
|
266
|
+
local_conditions = (
|
|
267
|
+
context.conditions
|
|
268
|
+
if context.conditions
|
|
269
|
+
and not context.must_evaluate_condition_on_this_level_not_push_down
|
|
270
|
+
and not should_evaluate_filter_on_this_level_not_push_down
|
|
271
|
+
else None
|
|
272
|
+
)
|
|
273
|
+
# but if it's not basic, and it's not condition;
|
|
274
|
+
# we do need to push it down (and have another layer of filter evaluation)
|
|
275
|
+
# to ensure filtering happens before something like a SUM
|
|
276
|
+
if (
|
|
277
|
+
context.conditions
|
|
278
|
+
and priority_concept.derivation not in ROOT_DERIVATIONS
|
|
279
|
+
and priority_concept.address not in context.conditions.row_arguments
|
|
280
|
+
):
|
|
281
|
+
logger.info(
|
|
282
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Force including conditions in {priority_concept.address} to push filtering above complex condition that is not condition member or parent"
|
|
861
283
|
)
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
284
|
+
local_conditions = context.conditions
|
|
285
|
+
return local_conditions
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def check_for_early_exit(
|
|
289
|
+
complete, partial, context: LoopContext, priority_concept: BuildConcept
|
|
290
|
+
) -> bool:
|
|
291
|
+
if complete == ValidationResult.INCOMPLETE_CONDITION:
|
|
292
|
+
cond_dict = {str(node): node.preexisting_conditions for node in context.stack}
|
|
293
|
+
for node in context.stack:
|
|
294
|
+
logger.info(
|
|
295
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Node {node} has conditions {node.preexisting_conditions} and {node.conditions}"
|
|
874
296
|
)
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
297
|
+
raise SyntaxError(f"Have {cond_dict} and need {str(context.conditions)}")
|
|
298
|
+
# early exit if we have a complete stack with one node
|
|
299
|
+
# we can only early exit if we have a complete stack
|
|
300
|
+
# and we are not looking for more non-partial sources
|
|
301
|
+
if complete == ValidationResult.COMPLETE and (
|
|
302
|
+
not context.accept_partial or (context.accept_partial and not partial)
|
|
303
|
+
):
|
|
304
|
+
logger.info(
|
|
305
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} breaking loop, complete"
|
|
883
306
|
)
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
if (
|
|
888
|
-
conditions
|
|
889
|
-
and priority_concept.derivation
|
|
890
|
-
not in (Derivation.ROOT, Derivation.CONSTANT)
|
|
891
|
-
and priority_concept.address not in conditions.row_arguments
|
|
892
|
-
):
|
|
307
|
+
return True
|
|
308
|
+
elif complete == ValidationResult.COMPLETE and context.accept_partial and partial:
|
|
309
|
+
if len(context.attempted) == len(context.mandatory_list):
|
|
893
310
|
logger.info(
|
|
894
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
311
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Breaking as we have attempted all nodes"
|
|
895
312
|
)
|
|
896
|
-
|
|
897
|
-
|
|
313
|
+
return True
|
|
898
314
|
logger.info(
|
|
899
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
315
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Found complete stack with partials {partial}, continuing search, attempted {context.attempted} all {len(context.mandatory_list)}"
|
|
900
316
|
)
|
|
317
|
+
else:
|
|
318
|
+
logger.info(
|
|
319
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Not complete, continuing search"
|
|
320
|
+
)
|
|
321
|
+
# if we have attempted on root node, we've tried them all.
|
|
322
|
+
# inject in another search with filter concepts
|
|
323
|
+
if priority_concept.derivation == Derivation.ROOT:
|
|
324
|
+
logger.info(
|
|
325
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Breaking as attempted root with no results"
|
|
326
|
+
)
|
|
327
|
+
return True
|
|
328
|
+
return False
|
|
901
329
|
|
|
902
|
-
|
|
903
|
-
|
|
330
|
+
|
|
331
|
+
def generate_loop_completion(context: LoopContext, virtual) -> StrategyNode:
|
|
332
|
+
condition_required = True
|
|
333
|
+
non_virtual = [c for c in context.completion_mandatory if c.address not in virtual]
|
|
334
|
+
non_virtual_output = [
|
|
335
|
+
c for c in context.original_mandatory if c.address not in virtual
|
|
336
|
+
]
|
|
337
|
+
non_virtual_different = len(context.completion_mandatory) != len(
|
|
338
|
+
context.original_mandatory
|
|
339
|
+
)
|
|
340
|
+
non_virtual_difference_values = set(
|
|
341
|
+
[x.address for x in context.completion_mandatory]
|
|
342
|
+
).difference(set([x.address for x in context.original_mandatory]))
|
|
343
|
+
if not context.conditions:
|
|
344
|
+
condition_required = False
|
|
345
|
+
non_virtual = [c for c in context.mandatory_list if c.address not in virtual]
|
|
346
|
+
|
|
347
|
+
elif all(
|
|
348
|
+
[
|
|
349
|
+
x.preexisting_conditions == context.conditions.conditional
|
|
350
|
+
for x in context.stack
|
|
904
351
|
]
|
|
905
|
-
|
|
906
|
-
|
|
352
|
+
):
|
|
353
|
+
condition_required = False
|
|
354
|
+
non_virtual = [c for c in context.mandatory_list if c.address not in virtual]
|
|
355
|
+
|
|
356
|
+
if context.conditions and not condition_required:
|
|
357
|
+
parent_map = {
|
|
358
|
+
str(x): x.preexisting_conditions == context.conditions.conditional
|
|
359
|
+
for x in context.stack
|
|
360
|
+
}
|
|
361
|
+
logger.info(
|
|
362
|
+
f"Condition {context.conditions} not required, parents included filtering! {parent_map }"
|
|
907
363
|
)
|
|
908
|
-
|
|
364
|
+
if len(context.stack) == 1:
|
|
365
|
+
output: StrategyNode = context.stack[0]
|
|
366
|
+
if non_virtual_different:
|
|
909
367
|
logger.info(
|
|
910
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
368
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Found different non-virtual output concepts ({non_virtual_difference_values}), removing condition injected values"
|
|
911
369
|
)
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
environment,
|
|
916
|
-
g,
|
|
917
|
-
depth,
|
|
918
|
-
source_concepts=search_concepts,
|
|
919
|
-
accept_partial=accept_partial,
|
|
920
|
-
history=history,
|
|
921
|
-
conditions=local_conditions,
|
|
370
|
+
output.set_output_concepts(
|
|
371
|
+
[x for x in output.output_concepts if x.address in non_virtual_output],
|
|
372
|
+
rebuild=False,
|
|
922
373
|
)
|
|
923
|
-
|
|
924
|
-
stack.append(node)
|
|
925
|
-
try:
|
|
926
|
-
node.resolve()
|
|
927
|
-
except Exception as e:
|
|
928
|
-
logger.error(
|
|
929
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve node {node} {e}"
|
|
930
|
-
)
|
|
931
|
-
raise e
|
|
932
|
-
# these concepts should not be attempted to be sourced again
|
|
933
|
-
# as fetching them requires operating on a subset of concepts
|
|
934
|
-
if priority_concept.derivation in [
|
|
935
|
-
Derivation.AGGREGATE,
|
|
936
|
-
Derivation.FILTER,
|
|
937
|
-
Derivation.WINDOW,
|
|
938
|
-
Derivation.UNNEST,
|
|
939
|
-
Derivation.RECURSIVE,
|
|
940
|
-
Derivation.ROWSET,
|
|
941
|
-
Derivation.BASIC,
|
|
942
|
-
Derivation.MULTISELECT,
|
|
943
|
-
Derivation.UNION,
|
|
944
|
-
]:
|
|
945
|
-
skip.add(priority_concept.address)
|
|
946
|
-
break
|
|
947
|
-
attempted.add(priority_concept.address)
|
|
948
|
-
complete, found, missing, partial, virtual = validate_stack(
|
|
949
|
-
environment,
|
|
950
|
-
stack,
|
|
951
|
-
mandatory_list,
|
|
952
|
-
completion_mandatory,
|
|
953
|
-
conditions=conditions,
|
|
954
|
-
accept_partial=accept_partial,
|
|
955
|
-
)
|
|
956
|
-
mandatory_completion = [c.address for c in completion_mandatory]
|
|
374
|
+
|
|
957
375
|
logger.info(
|
|
958
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
959
|
-
f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} synonyms partial {partial} virtual {virtual}), attempted {attempted}, mandatory w/ filter {mandatory_completion}"
|
|
376
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
|
|
960
377
|
)
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
378
|
+
else:
|
|
379
|
+
logger.info(
|
|
380
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} wrapping multiple parent nodes {[type(x) for x in context.stack]} in merge node"
|
|
381
|
+
)
|
|
382
|
+
output = MergeNode(
|
|
383
|
+
input_concepts=non_virtual,
|
|
384
|
+
output_concepts=non_virtual,
|
|
385
|
+
environment=context.environment,
|
|
386
|
+
parents=context.stack,
|
|
387
|
+
depth=context.depth,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# ensure we can resolve our final merge
|
|
391
|
+
output.resolve()
|
|
392
|
+
if condition_required and context.conditions:
|
|
393
|
+
output.add_condition(context.conditions.conditional)
|
|
394
|
+
if context.conditions.existence_arguments:
|
|
395
|
+
append_existence_check(
|
|
396
|
+
output,
|
|
397
|
+
context.environment,
|
|
398
|
+
context.g,
|
|
399
|
+
where=context.conditions,
|
|
400
|
+
history=context.history,
|
|
983
401
|
)
|
|
984
|
-
|
|
402
|
+
elif context.conditions:
|
|
403
|
+
output.preexisting_conditions = context.conditions.conditional
|
|
404
|
+
logger.info(
|
|
405
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Graph is connected, returning {type(output)} node partial {[c.address for c in output.partial_concepts]} with {context.conditions}"
|
|
406
|
+
)
|
|
407
|
+
if condition_required and context.conditions and non_virtual_different:
|
|
408
|
+
logger.info(
|
|
409
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Conditions {context.conditions} were injected, checking if we need a group to restore grain"
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
result = GroupNode.check_if_required(
|
|
413
|
+
downstream_concepts=output.usable_outputs,
|
|
414
|
+
parents=[output.resolve()],
|
|
415
|
+
environment=context.environment,
|
|
416
|
+
depth=context.depth,
|
|
417
|
+
)
|
|
418
|
+
if result.required:
|
|
985
419
|
logger.info(
|
|
986
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
420
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Adding group node"
|
|
987
421
|
)
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
422
|
+
return GroupNode(
|
|
423
|
+
output_concepts=context.original_mandatory,
|
|
424
|
+
input_concepts=output.usable_outputs,
|
|
425
|
+
environment=context.environment,
|
|
426
|
+
parents=[output],
|
|
427
|
+
partial_concepts=output.partial_concepts,
|
|
428
|
+
preexisting_conditions=context.conditions.conditional,
|
|
429
|
+
depth=context.depth,
|
|
993
430
|
)
|
|
994
|
-
|
|
431
|
+
return output
|
|
995
432
|
|
|
996
|
-
|
|
997
|
-
|
|
433
|
+
|
|
434
|
+
def _search_concepts(
|
|
435
|
+
mandatory_list: List[BuildConcept],
|
|
436
|
+
environment: BuildEnvironment,
|
|
437
|
+
depth: int,
|
|
438
|
+
g: ReferenceGraph,
|
|
439
|
+
history: History,
|
|
440
|
+
accept_partial: bool = False,
|
|
441
|
+
conditions: BuildWhereClause | None = None,
|
|
442
|
+
) -> StrategyNode | None:
|
|
443
|
+
|
|
444
|
+
context = initialize_loop_context(
|
|
445
|
+
mandatory_list=mandatory_list,
|
|
446
|
+
environment=environment,
|
|
447
|
+
depth=depth,
|
|
448
|
+
g=g,
|
|
449
|
+
history=history,
|
|
450
|
+
accept_partial=accept_partial,
|
|
451
|
+
conditions=conditions,
|
|
998
452
|
)
|
|
999
|
-
if complete == ValidationResult.COMPLETE:
|
|
1000
|
-
condition_required = True
|
|
1001
|
-
non_virtual = [c for c in completion_mandatory if c.address not in virtual]
|
|
1002
|
-
non_virtual_output = [c for c in original_mandatory if c.address not in virtual]
|
|
1003
|
-
non_virtual_different = len(completion_mandatory) != len(original_mandatory)
|
|
1004
|
-
non_virtual_difference_values = set(
|
|
1005
|
-
[x.address for x in completion_mandatory]
|
|
1006
|
-
).difference(set([x.address for x in original_mandatory]))
|
|
1007
|
-
if not conditions:
|
|
1008
|
-
condition_required = False
|
|
1009
|
-
non_virtual = [c for c in mandatory_list if c.address not in virtual]
|
|
1010
453
|
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
454
|
+
while context.incomplete:
|
|
455
|
+
priority_concept = get_priority_concept(
|
|
456
|
+
context.mandatory_list,
|
|
457
|
+
context.attempted,
|
|
458
|
+
found_concepts=context.found,
|
|
459
|
+
depth=depth,
|
|
460
|
+
)
|
|
1014
461
|
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
}
|
|
1020
|
-
logger.info(
|
|
1021
|
-
f"Condition {conditions} not required, parents included filtering! {parent_map }"
|
|
1022
|
-
)
|
|
1023
|
-
if len(stack) == 1:
|
|
1024
|
-
output: StrategyNode = stack[0]
|
|
1025
|
-
if non_virtual_different:
|
|
1026
|
-
logger.info(
|
|
1027
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found different non-virtual output concepts ({non_virtual_difference_values}), removing condition injected values"
|
|
1028
|
-
)
|
|
1029
|
-
output.set_output_concepts(
|
|
1030
|
-
[
|
|
1031
|
-
x
|
|
1032
|
-
for x in output.output_concepts
|
|
1033
|
-
if x.address in non_virtual_output
|
|
1034
|
-
],
|
|
1035
|
-
rebuild=False,
|
|
1036
|
-
)
|
|
462
|
+
local_conditions = evaluate_loop_conditions(context, priority_concept)
|
|
463
|
+
logger.info(
|
|
464
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)} derivation {priority_concept.derivation} granularity {priority_concept.granularity} with conditions {local_conditions}"
|
|
465
|
+
)
|
|
1037
466
|
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
environment=environment,
|
|
1049
|
-
parents=stack,
|
|
1050
|
-
depth=depth,
|
|
1051
|
-
)
|
|
467
|
+
candidates = [
|
|
468
|
+
c for c in context.mandatory_list if c.address != priority_concept.address
|
|
469
|
+
]
|
|
470
|
+
candidate_list = generate_candidates_restrictive(
|
|
471
|
+
priority_concept,
|
|
472
|
+
candidates,
|
|
473
|
+
context.skip,
|
|
474
|
+
depth=depth,
|
|
475
|
+
conditions=context.conditions,
|
|
476
|
+
)
|
|
1052
477
|
|
|
1053
|
-
# ensure we can resolve our final merge
|
|
1054
|
-
output.resolve()
|
|
1055
|
-
if condition_required and conditions:
|
|
1056
|
-
output.add_condition(conditions.conditional)
|
|
1057
|
-
if conditions.existence_arguments:
|
|
1058
|
-
append_existence_check(
|
|
1059
|
-
output, environment, g, where=conditions, history=history
|
|
1060
|
-
)
|
|
1061
|
-
elif conditions:
|
|
1062
|
-
output.preexisting_conditions = conditions.conditional
|
|
1063
478
|
logger.info(
|
|
1064
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
479
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Beginning sourcing loop for {priority_concept.address}, accept_partial {accept_partial}, optional {[v.address for v in candidate_list]}, exhausted {[c for c in context.skip]}"
|
|
1065
480
|
)
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
481
|
+
node = generate_node(
|
|
482
|
+
priority_concept,
|
|
483
|
+
candidate_list,
|
|
484
|
+
environment,
|
|
485
|
+
g,
|
|
486
|
+
depth,
|
|
487
|
+
source_concepts=search_concepts,
|
|
488
|
+
accept_partial=accept_partial,
|
|
489
|
+
history=history,
|
|
490
|
+
conditions=local_conditions,
|
|
491
|
+
)
|
|
492
|
+
if node:
|
|
493
|
+
context.stack.append(node)
|
|
494
|
+
node.resolve()
|
|
495
|
+
# these concepts should not be attempted to be sourced again
|
|
496
|
+
# as fetching them requires operating on a subset of concepts
|
|
497
|
+
if priority_concept.derivation in SKIPPED_DERIVATIONS:
|
|
498
|
+
context.skip.add(priority_concept.address)
|
|
499
|
+
context.attempted.add(priority_concept.address)
|
|
500
|
+
complete, found_c, missing_c, partial, virtual = validate_stack(
|
|
501
|
+
environment,
|
|
502
|
+
context.stack,
|
|
503
|
+
context.mandatory_list,
|
|
504
|
+
context.completion_mandatory,
|
|
505
|
+
conditions=context.conditions,
|
|
506
|
+
accept_partial=accept_partial,
|
|
507
|
+
)
|
|
508
|
+
# assign
|
|
509
|
+
context.found = found_c
|
|
510
|
+
early_exit = check_for_early_exit(complete, partial, context, priority_concept)
|
|
511
|
+
if early_exit:
|
|
512
|
+
break
|
|
513
|
+
|
|
514
|
+
logger.info(
|
|
515
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {context.found} from {[n for n in context.stack]} (missing {context.all_mandatory - context.found}), attempted {context.attempted}, virtual {virtual}"
|
|
516
|
+
)
|
|
517
|
+
if complete == ValidationResult.COMPLETE:
|
|
518
|
+
return generate_loop_completion(context, virtual)
|
|
1090
519
|
|
|
1091
520
|
# if we can't find it after expanding to a merge, then
|
|
1092
521
|
# accept partials in join paths
|
|
1093
|
-
|
|
1094
522
|
if not accept_partial:
|
|
1095
523
|
logger.info(
|
|
1096
524
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Stack is not connected graph, flag for accepting partial addresses is {accept_partial}, changing flag"
|
|
1097
525
|
)
|
|
1098
526
|
partial_search = search_concepts(
|
|
527
|
+
# use the original mandatory list
|
|
1099
528
|
mandatory_list=mandatory_list,
|
|
1100
529
|
environment=environment,
|
|
1101
530
|
depth=depth,
|
|
@@ -1110,7 +539,7 @@ def _search_concepts(
|
|
|
1110
539
|
)
|
|
1111
540
|
return partial_search
|
|
1112
541
|
logger.error(
|
|
1113
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found},"
|
|
542
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {context.all_mandatory - context.found},"
|
|
1114
543
|
)
|
|
1115
544
|
|
|
1116
545
|
return None
|