pytrilogy 0.0.3.54__py3-none-any.whl → 0.0.3.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/RECORD +37 -30
- trilogy/__init__.py +1 -1
- trilogy/constants.py +2 -0
- trilogy/core/enums.py +6 -0
- trilogy/core/functions.py +3 -0
- trilogy/core/models/author.py +12 -4
- trilogy/core/models/execute.py +207 -2
- trilogy/core/optimization.py +3 -3
- trilogy/core/optimizations/inline_datasource.py +5 -7
- trilogy/core/processing/concept_strategies_v3.py +323 -878
- trilogy/core/processing/discovery_loop.py +0 -0
- trilogy/core/processing/discovery_node_factory.py +469 -0
- trilogy/core/processing/discovery_utility.py +123 -0
- trilogy/core/processing/discovery_validation.py +155 -0
- trilogy/core/processing/node_generators/__init__.py +2 -0
- trilogy/core/processing/node_generators/recursive_node.py +87 -0
- trilogy/core/processing/node_generators/select_node.py +6 -8
- trilogy/core/processing/nodes/__init__.py +4 -4
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/query_processor.py +7 -1
- trilogy/dialect/base.py +11 -2
- trilogy/dialect/bigquery.py +5 -6
- trilogy/dialect/common.py +19 -3
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/snowflake.py +8 -8
- trilogy/parsing/common.py +4 -3
- trilogy/parsing/parse_engine.py +12 -0
- trilogy/parsing/trilogy.lark +3 -1
- trilogy/std/date.preql +3 -1
- trilogy/std/geography.preql +4 -0
- trilogy/std/money.preql +65 -4
- trilogy/std/net.preql +8 -0
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
from
|
|
2
|
-
from
|
|
3
|
-
from typing import List, Optional, Protocol, Union
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List, Optional
|
|
4
3
|
|
|
5
4
|
from trilogy.constants import logger
|
|
6
|
-
from trilogy.core.enums import Derivation,
|
|
5
|
+
from trilogy.core.enums import Derivation, Granularity
|
|
7
6
|
from trilogy.core.env_processor import generate_graph
|
|
8
7
|
from trilogy.core.exceptions import UnresolvableQueryException
|
|
9
8
|
from trilogy.core.graph_models import ReferenceGraph
|
|
@@ -12,185 +11,41 @@ from trilogy.core.models.author import (
|
|
|
12
11
|
)
|
|
13
12
|
from trilogy.core.models.build import (
|
|
14
13
|
BuildConcept,
|
|
15
|
-
BuildFunction,
|
|
16
|
-
BuildRowsetItem,
|
|
17
14
|
BuildWhereClause,
|
|
18
15
|
)
|
|
19
16
|
from trilogy.core.models.build_environment import BuildEnvironment
|
|
20
|
-
from trilogy.core.processing.
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
gen_union_node,
|
|
30
|
-
gen_unnest_node,
|
|
31
|
-
gen_window_node,
|
|
17
|
+
from trilogy.core.processing.discovery_node_factory import generate_node
|
|
18
|
+
from trilogy.core.processing.discovery_utility import (
|
|
19
|
+
LOGGER_PREFIX,
|
|
20
|
+
depth_to_prefix,
|
|
21
|
+
get_priority_concept,
|
|
22
|
+
)
|
|
23
|
+
from trilogy.core.processing.discovery_validation import (
|
|
24
|
+
ValidationResult,
|
|
25
|
+
validate_stack,
|
|
32
26
|
)
|
|
33
27
|
from trilogy.core.processing.nodes import (
|
|
34
|
-
ConstantNode,
|
|
35
28
|
GroupNode,
|
|
36
29
|
History,
|
|
37
30
|
MergeNode,
|
|
38
31
|
StrategyNode,
|
|
39
32
|
)
|
|
40
|
-
from trilogy.core.processing.utility import (
|
|
41
|
-
get_disconnected_components,
|
|
42
|
-
)
|
|
43
33
|
from trilogy.utility import unique
|
|
44
34
|
|
|
35
|
+
SKIPPED_DERIVATIONS = [
|
|
36
|
+
Derivation.AGGREGATE,
|
|
37
|
+
Derivation.FILTER,
|
|
38
|
+
Derivation.WINDOW,
|
|
39
|
+
Derivation.UNNEST,
|
|
40
|
+
Derivation.RECURSIVE,
|
|
41
|
+
Derivation.ROWSET,
|
|
42
|
+
Derivation.BASIC,
|
|
43
|
+
Derivation.GROUP_TO,
|
|
44
|
+
Derivation.MULTISELECT,
|
|
45
|
+
Derivation.UNION,
|
|
46
|
+
]
|
|
45
47
|
|
|
46
|
-
|
|
47
|
-
COMPLETE = 1
|
|
48
|
-
DISCONNECTED = 2
|
|
49
|
-
INCOMPLETE = 3
|
|
50
|
-
INCOMPLETE_CONDITION = 4
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
LOGGER_PREFIX = "[CONCEPT DETAIL]"
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class SearchConceptsType(Protocol):
|
|
57
|
-
def __call__(
|
|
58
|
-
self,
|
|
59
|
-
mandatory_list: List[BuildConcept],
|
|
60
|
-
history: History,
|
|
61
|
-
environment: BuildEnvironment,
|
|
62
|
-
depth: int,
|
|
63
|
-
g: ReferenceGraph,
|
|
64
|
-
accept_partial: bool = False,
|
|
65
|
-
conditions: Optional[BuildWhereClause] = None,
|
|
66
|
-
) -> Union[StrategyNode, None]: ...
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def get_upstream_concepts(base: BuildConcept, nested: bool = False) -> set[str]:
|
|
70
|
-
upstream = set()
|
|
71
|
-
if nested:
|
|
72
|
-
upstream.add(base.address)
|
|
73
|
-
if not base.lineage:
|
|
74
|
-
return upstream
|
|
75
|
-
for x in base.lineage.concept_arguments:
|
|
76
|
-
# if it's derived from any value in a rowset, ALL rowset items are upstream
|
|
77
|
-
if x.derivation == Derivation.ROWSET:
|
|
78
|
-
assert isinstance(x.lineage, BuildRowsetItem), type(x.lineage)
|
|
79
|
-
for y in x.lineage.rowset.select.output_components:
|
|
80
|
-
upstream.add(f"{x.lineage.rowset.name}.{y.address}")
|
|
81
|
-
# upstream = upstream.union(get_upstream_concepts(y, nested=True))
|
|
82
|
-
upstream = upstream.union(get_upstream_concepts(x, nested=True))
|
|
83
|
-
return upstream
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def restrict_node_outputs_targets(
|
|
87
|
-
node: StrategyNode, targets: list[BuildConcept], depth: int
|
|
88
|
-
) -> list[BuildConcept]:
|
|
89
|
-
ex_resolve = node.resolve()
|
|
90
|
-
extra = [
|
|
91
|
-
x
|
|
92
|
-
for x in ex_resolve.output_concepts
|
|
93
|
-
if x.address not in [y.address for y in targets]
|
|
94
|
-
]
|
|
95
|
-
|
|
96
|
-
base = [x for x in ex_resolve.output_concepts if x.address not in extra]
|
|
97
|
-
logger.info(
|
|
98
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} reducing final outputs, was {[c.address for c in ex_resolve.output_concepts]} with extra {[c.address for c in extra]}, remaining {base}"
|
|
99
|
-
)
|
|
100
|
-
for x in targets:
|
|
101
|
-
if x.address not in base:
|
|
102
|
-
base.append(x)
|
|
103
|
-
node.set_output_concepts(base)
|
|
104
|
-
return extra
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def get_priority_concept(
|
|
108
|
-
all_concepts: List[BuildConcept],
|
|
109
|
-
attempted_addresses: set[str],
|
|
110
|
-
found_concepts: set[str],
|
|
111
|
-
depth: int,
|
|
112
|
-
) -> BuildConcept:
|
|
113
|
-
# optimized search for missing concepts
|
|
114
|
-
pass_one = sorted(
|
|
115
|
-
[
|
|
116
|
-
c
|
|
117
|
-
for c in all_concepts
|
|
118
|
-
if c.address not in attempted_addresses and c.address not in found_concepts
|
|
119
|
-
],
|
|
120
|
-
key=lambda x: x.address,
|
|
121
|
-
)
|
|
122
|
-
# sometimes we need to scan intermediate concepts to get merge keys or filter keys,
|
|
123
|
-
# so do an exhaustive search
|
|
124
|
-
# pass_two = [c for c in all_concepts+filter_only if c.address not in attempted_addresses]
|
|
125
|
-
for remaining_concept in (pass_one,):
|
|
126
|
-
priority = (
|
|
127
|
-
# find anything that needs no joins first, so we can exit early
|
|
128
|
-
[
|
|
129
|
-
c
|
|
130
|
-
for c in remaining_concept
|
|
131
|
-
if c.derivation == Derivation.CONSTANT
|
|
132
|
-
and c.granularity == Granularity.SINGLE_ROW
|
|
133
|
-
]
|
|
134
|
-
+
|
|
135
|
-
# then multiselects to remove them from scope
|
|
136
|
-
[c for c in remaining_concept if c.derivation == Derivation.MULTISELECT]
|
|
137
|
-
+
|
|
138
|
-
# then rowsets to remove them from scope, as they cannot get partials
|
|
139
|
-
[c for c in remaining_concept if c.derivation == Derivation.ROWSET]
|
|
140
|
-
+
|
|
141
|
-
# then rowsets to remove them from scope, as they cannot get partials
|
|
142
|
-
[c for c in remaining_concept if c.derivation == Derivation.UNION]
|
|
143
|
-
# we should be home-free here
|
|
144
|
-
+
|
|
145
|
-
# then aggregates to remove them from scope, as they cannot get partials
|
|
146
|
-
[c for c in remaining_concept if c.derivation == Derivation.AGGREGATE]
|
|
147
|
-
# then windows to remove them from scope, as they cannot get partials
|
|
148
|
-
+ [c for c in remaining_concept if c.derivation == Derivation.WINDOW]
|
|
149
|
-
# then filters to remove them from scope, also cannot get partials
|
|
150
|
-
+ [c for c in remaining_concept if c.derivation == Derivation.FILTER]
|
|
151
|
-
# unnests are weird?
|
|
152
|
-
+ [c for c in remaining_concept if c.derivation == Derivation.UNNEST]
|
|
153
|
-
+ [c for c in remaining_concept if c.derivation == Derivation.BASIC]
|
|
154
|
-
# finally our plain selects
|
|
155
|
-
+ [
|
|
156
|
-
c for c in remaining_concept if c.derivation == Derivation.ROOT
|
|
157
|
-
] # and any non-single row constants
|
|
158
|
-
+ [c for c in remaining_concept if c.derivation == Derivation.CONSTANT]
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
priority += [
|
|
162
|
-
c
|
|
163
|
-
for c in remaining_concept
|
|
164
|
-
if c.address not in [x.address for x in priority]
|
|
165
|
-
]
|
|
166
|
-
final = []
|
|
167
|
-
# if any thing is derived from another concept
|
|
168
|
-
# get the derived copy first
|
|
169
|
-
# as this will usually resolve cleaner
|
|
170
|
-
for x in priority:
|
|
171
|
-
if any(
|
|
172
|
-
[
|
|
173
|
-
x.address
|
|
174
|
-
in get_upstream_concepts(
|
|
175
|
-
c,
|
|
176
|
-
)
|
|
177
|
-
for c in priority
|
|
178
|
-
]
|
|
179
|
-
):
|
|
180
|
-
logger.info(
|
|
181
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} delaying fetch of {x.address} as parent of another concept"
|
|
182
|
-
)
|
|
183
|
-
continue
|
|
184
|
-
final.append(x)
|
|
185
|
-
# then append anything we didn't get
|
|
186
|
-
for x2 in priority:
|
|
187
|
-
if x2 not in final:
|
|
188
|
-
final.append(x2)
|
|
189
|
-
if final:
|
|
190
|
-
return final[0]
|
|
191
|
-
raise ValueError(
|
|
192
|
-
f"Cannot resolve query. No remaining priority concepts, have attempted {attempted_addresses}"
|
|
193
|
-
)
|
|
48
|
+
ROOT_DERIVATIONS = [Derivation.ROOT, Derivation.CONSTANT]
|
|
194
49
|
|
|
195
50
|
|
|
196
51
|
def generate_candidates_restrictive(
|
|
@@ -199,10 +54,10 @@ def generate_candidates_restrictive(
|
|
|
199
54
|
exhausted: set[str],
|
|
200
55
|
depth: int,
|
|
201
56
|
conditions: BuildWhereClause | None = None,
|
|
202
|
-
) -> List[
|
|
57
|
+
) -> List[BuildConcept]:
|
|
203
58
|
# if it's single row, joins are irrelevant. Fetch without keys.
|
|
204
59
|
if priority_concept.granularity == Granularity.SINGLE_ROW:
|
|
205
|
-
return [
|
|
60
|
+
return []
|
|
206
61
|
|
|
207
62
|
local_candidates = [
|
|
208
63
|
x
|
|
@@ -212,496 +67,12 @@ def generate_candidates_restrictive(
|
|
|
212
67
|
and x.address not in priority_concept.pseudonyms
|
|
213
68
|
and priority_concept.address not in x.pseudonyms
|
|
214
69
|
]
|
|
215
|
-
if conditions and priority_concept.derivation in
|
|
216
|
-
Derivation.ROOT,
|
|
217
|
-
Derivation.CONSTANT,
|
|
218
|
-
):
|
|
70
|
+
if conditions and priority_concept.derivation in ROOT_DERIVATIONS:
|
|
219
71
|
logger.info(
|
|
220
72
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Injecting additional conditional row arguments as all remaining concepts are roots or constant"
|
|
221
73
|
)
|
|
222
|
-
return
|
|
223
|
-
return
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
def generate_node(
|
|
227
|
-
concept: BuildConcept,
|
|
228
|
-
local_optional: List[BuildConcept],
|
|
229
|
-
environment: BuildEnvironment,
|
|
230
|
-
g: ReferenceGraph,
|
|
231
|
-
depth: int,
|
|
232
|
-
source_concepts: SearchConceptsType,
|
|
233
|
-
history: History,
|
|
234
|
-
accept_partial: bool = False,
|
|
235
|
-
conditions: BuildWhereClause | None = None,
|
|
236
|
-
) -> StrategyNode | None:
|
|
237
|
-
# first check in case there is a materialized_concept
|
|
238
|
-
candidate = history.gen_select_node(
|
|
239
|
-
concept,
|
|
240
|
-
local_optional,
|
|
241
|
-
environment,
|
|
242
|
-
g,
|
|
243
|
-
depth + 1,
|
|
244
|
-
fail_if_not_found=False,
|
|
245
|
-
accept_partial=accept_partial,
|
|
246
|
-
accept_partial_optional=False,
|
|
247
|
-
source_concepts=source_concepts,
|
|
248
|
-
conditions=conditions,
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
if candidate:
|
|
252
|
-
return candidate
|
|
253
|
-
|
|
254
|
-
if concept.derivation == Derivation.WINDOW:
|
|
255
|
-
logger.info(
|
|
256
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating window node with optional {[x.address for x in local_optional]}"
|
|
257
|
-
)
|
|
258
|
-
return gen_window_node(
|
|
259
|
-
concept,
|
|
260
|
-
local_optional,
|
|
261
|
-
history=history,
|
|
262
|
-
environment=environment,
|
|
263
|
-
g=g,
|
|
264
|
-
depth=depth + 1,
|
|
265
|
-
source_concepts=source_concepts,
|
|
266
|
-
conditions=conditions,
|
|
267
|
-
)
|
|
268
|
-
|
|
269
|
-
elif concept.derivation == Derivation.FILTER:
|
|
270
|
-
logger.info(
|
|
271
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating filter node with optional {[x.address for x in local_optional]}"
|
|
272
|
-
)
|
|
273
|
-
return gen_filter_node(
|
|
274
|
-
concept,
|
|
275
|
-
local_optional,
|
|
276
|
-
history=history,
|
|
277
|
-
environment=environment,
|
|
278
|
-
g=g,
|
|
279
|
-
depth=depth + 1,
|
|
280
|
-
source_concepts=source_concepts,
|
|
281
|
-
conditions=conditions,
|
|
282
|
-
)
|
|
283
|
-
elif concept.derivation == Derivation.UNNEST:
|
|
284
|
-
logger.info(
|
|
285
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating unnest node with optional {[x.address for x in local_optional]} and condition {conditions}"
|
|
286
|
-
)
|
|
287
|
-
return gen_unnest_node(
|
|
288
|
-
concept,
|
|
289
|
-
local_optional,
|
|
290
|
-
history=history,
|
|
291
|
-
environment=environment,
|
|
292
|
-
g=g,
|
|
293
|
-
depth=depth + 1,
|
|
294
|
-
source_concepts=source_concepts,
|
|
295
|
-
conditions=conditions,
|
|
296
|
-
)
|
|
297
|
-
elif concept.derivation == Derivation.UNION:
|
|
298
|
-
logger.info(
|
|
299
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating union node with optional {[x.address for x in local_optional]} and condition {conditions}"
|
|
300
|
-
)
|
|
301
|
-
return gen_union_node(
|
|
302
|
-
concept,
|
|
303
|
-
local_optional,
|
|
304
|
-
environment,
|
|
305
|
-
g,
|
|
306
|
-
depth + 1,
|
|
307
|
-
source_concepts,
|
|
308
|
-
history,
|
|
309
|
-
conditions=conditions,
|
|
310
|
-
)
|
|
311
|
-
elif concept.derivation == Derivation.AGGREGATE:
|
|
312
|
-
# don't push constants up before aggregation
|
|
313
|
-
# if not required
|
|
314
|
-
# to avoid constants multiplication changing default aggregation results
|
|
315
|
-
# ex sum(x) * 2 w/ no grain should return sum(x) * 2, not sum(x*2)
|
|
316
|
-
# these should always be sourceable independently
|
|
317
|
-
agg_optional = [
|
|
318
|
-
x for x in local_optional if x.granularity != Granularity.SINGLE_ROW
|
|
319
|
-
]
|
|
320
|
-
|
|
321
|
-
logger.info(
|
|
322
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating aggregate node with {[x for x in agg_optional]}"
|
|
323
|
-
)
|
|
324
|
-
return gen_group_node(
|
|
325
|
-
concept,
|
|
326
|
-
agg_optional,
|
|
327
|
-
history=history,
|
|
328
|
-
environment=environment,
|
|
329
|
-
g=g,
|
|
330
|
-
depth=depth + 1,
|
|
331
|
-
source_concepts=source_concepts,
|
|
332
|
-
conditions=conditions,
|
|
333
|
-
)
|
|
334
|
-
elif concept.derivation == Derivation.ROWSET:
|
|
335
|
-
logger.info(
|
|
336
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating rowset node with optional {[x.address for x in local_optional]}"
|
|
337
|
-
)
|
|
338
|
-
return gen_rowset_node(
|
|
339
|
-
concept,
|
|
340
|
-
local_optional,
|
|
341
|
-
environment,
|
|
342
|
-
g,
|
|
343
|
-
depth + 1,
|
|
344
|
-
source_concepts,
|
|
345
|
-
history,
|
|
346
|
-
conditions=conditions,
|
|
347
|
-
)
|
|
348
|
-
elif concept.derivation == Derivation.MULTISELECT:
|
|
349
|
-
logger.info(
|
|
350
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
|
|
351
|
-
)
|
|
352
|
-
return gen_multiselect_node(
|
|
353
|
-
concept,
|
|
354
|
-
local_optional,
|
|
355
|
-
environment,
|
|
356
|
-
g,
|
|
357
|
-
depth + 1,
|
|
358
|
-
source_concepts,
|
|
359
|
-
history,
|
|
360
|
-
conditions=conditions,
|
|
361
|
-
)
|
|
362
|
-
elif concept.derivation == Derivation.CONSTANT:
|
|
363
|
-
constant_targets = [concept] + local_optional
|
|
364
|
-
logger.info(
|
|
365
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
|
|
366
|
-
)
|
|
367
|
-
if any([x.derivation != Derivation.CONSTANT for x in local_optional]):
|
|
368
|
-
non_root = [
|
|
369
|
-
x.address for x in local_optional if x.derivation != Derivation.CONSTANT
|
|
370
|
-
]
|
|
371
|
-
logger.info(
|
|
372
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including filter concepts, there are non root/non constant concepts we should find first: {non_root}. Recursing with all of these as mandatory"
|
|
373
|
-
)
|
|
374
|
-
|
|
375
|
-
if not history.check_started(
|
|
376
|
-
constant_targets, accept_partial=accept_partial, conditions=conditions
|
|
377
|
-
):
|
|
378
|
-
history.log_start(
|
|
379
|
-
constant_targets,
|
|
380
|
-
accept_partial=accept_partial,
|
|
381
|
-
conditions=conditions,
|
|
382
|
-
)
|
|
383
|
-
return source_concepts(
|
|
384
|
-
mandatory_list=constant_targets,
|
|
385
|
-
environment=environment,
|
|
386
|
-
g=g,
|
|
387
|
-
depth=depth + 1,
|
|
388
|
-
accept_partial=accept_partial,
|
|
389
|
-
history=history,
|
|
390
|
-
# we DO NOT pass up conditions at this point, as we are now expanding to include conditions in search
|
|
391
|
-
# which we do whenever we hit a root node
|
|
392
|
-
# conditions=conditions,
|
|
393
|
-
)
|
|
394
|
-
else:
|
|
395
|
-
logger.info(
|
|
396
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} skipping search, already in a recursion fot these concepts"
|
|
397
|
-
)
|
|
398
|
-
return None
|
|
399
|
-
return ConstantNode(
|
|
400
|
-
input_concepts=[],
|
|
401
|
-
output_concepts=constant_targets,
|
|
402
|
-
environment=environment,
|
|
403
|
-
parents=[],
|
|
404
|
-
depth=depth + 1,
|
|
405
|
-
preexisting_conditions=conditions.conditional if conditions else None,
|
|
406
|
-
)
|
|
407
|
-
elif concept.derivation == Derivation.BASIC:
|
|
408
|
-
# this is special case handling for group bys
|
|
409
|
-
if (
|
|
410
|
-
isinstance(concept.lineage, BuildFunction)
|
|
411
|
-
and concept.lineage.operator == FunctionType.GROUP
|
|
412
|
-
):
|
|
413
|
-
logger.info(
|
|
414
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating group to grain node with {[x.address for x in local_optional]}"
|
|
415
|
-
)
|
|
416
|
-
return gen_group_to_node(
|
|
417
|
-
concept,
|
|
418
|
-
local_optional,
|
|
419
|
-
environment,
|
|
420
|
-
g,
|
|
421
|
-
depth + 1,
|
|
422
|
-
source_concepts,
|
|
423
|
-
history,
|
|
424
|
-
conditions=conditions,
|
|
425
|
-
)
|
|
426
|
-
logger.info(
|
|
427
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating basic node with optional {[x.address for x in local_optional]}"
|
|
428
|
-
)
|
|
429
|
-
return gen_basic_node(
|
|
430
|
-
concept,
|
|
431
|
-
local_optional,
|
|
432
|
-
history=history,
|
|
433
|
-
environment=environment,
|
|
434
|
-
g=g,
|
|
435
|
-
depth=depth + 1,
|
|
436
|
-
source_concepts=source_concepts,
|
|
437
|
-
conditions=conditions,
|
|
438
|
-
)
|
|
439
|
-
|
|
440
|
-
elif concept.derivation == Derivation.ROOT:
|
|
441
|
-
logger.info(
|
|
442
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating select node with optional including condition inputs {[x.address for x in local_optional]}"
|
|
443
|
-
)
|
|
444
|
-
# we've injected in any conditional concepts that may exist
|
|
445
|
-
# so if we don't still have just roots, we need to go up
|
|
446
|
-
root_targets = [concept] + local_optional
|
|
447
|
-
|
|
448
|
-
if any(
|
|
449
|
-
[
|
|
450
|
-
x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
|
|
451
|
-
for x in local_optional
|
|
452
|
-
]
|
|
453
|
-
):
|
|
454
|
-
non_root = [
|
|
455
|
-
x.address
|
|
456
|
-
for x in local_optional
|
|
457
|
-
if x.derivation not in (Derivation.ROOT, Derivation.CONSTANT)
|
|
458
|
-
]
|
|
459
|
-
logger.info(
|
|
460
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} including any filters, there are non-root concepts we should expand first: {non_root}. Recursing with all of these as mandatory"
|
|
461
|
-
)
|
|
462
|
-
|
|
463
|
-
# if not history.check_started(
|
|
464
|
-
# root_targets, accept_partial=accept_partial, conditions=conditions
|
|
465
|
-
# ) or 1==1:
|
|
466
|
-
if True:
|
|
467
|
-
history.log_start(
|
|
468
|
-
root_targets, accept_partial=accept_partial, conditions=conditions
|
|
469
|
-
)
|
|
470
|
-
return source_concepts(
|
|
471
|
-
mandatory_list=root_targets,
|
|
472
|
-
environment=environment,
|
|
473
|
-
g=g,
|
|
474
|
-
depth=depth + 1,
|
|
475
|
-
accept_partial=accept_partial,
|
|
476
|
-
history=history,
|
|
477
|
-
# we DO NOT pass up conditions at this point, as we are now expanding to include conditions in search
|
|
478
|
-
# which we do whenever we hit a root node
|
|
479
|
-
# conditions=conditions,
|
|
480
|
-
)
|
|
481
|
-
else:
|
|
482
|
-
logger.info(
|
|
483
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} skipping root search, already in a recursion for these concepts"
|
|
484
|
-
)
|
|
485
|
-
check = history.gen_select_node(
|
|
486
|
-
concept,
|
|
487
|
-
local_optional,
|
|
488
|
-
environment,
|
|
489
|
-
g,
|
|
490
|
-
depth + 1,
|
|
491
|
-
fail_if_not_found=False,
|
|
492
|
-
accept_partial=accept_partial,
|
|
493
|
-
accept_partial_optional=False,
|
|
494
|
-
source_concepts=source_concepts,
|
|
495
|
-
conditions=conditions,
|
|
496
|
-
)
|
|
497
|
-
if not check:
|
|
498
|
-
|
|
499
|
-
logger.info(
|
|
500
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve root concepts, checking for expanded concepts"
|
|
501
|
-
)
|
|
502
|
-
for accept_partial in [False, True]:
|
|
503
|
-
expanded = gen_merge_node(
|
|
504
|
-
all_concepts=root_targets,
|
|
505
|
-
environment=environment,
|
|
506
|
-
g=g,
|
|
507
|
-
depth=depth + 1,
|
|
508
|
-
source_concepts=source_concepts,
|
|
509
|
-
history=history,
|
|
510
|
-
search_conditions=conditions,
|
|
511
|
-
accept_partial=accept_partial,
|
|
512
|
-
)
|
|
513
|
-
|
|
514
|
-
if expanded:
|
|
515
|
-
extra = restrict_node_outputs_targets(expanded, root_targets, depth)
|
|
516
|
-
pseudonyms = [
|
|
517
|
-
x
|
|
518
|
-
for x in extra
|
|
519
|
-
if any(x.address in y.pseudonyms for y in root_targets)
|
|
520
|
-
]
|
|
521
|
-
if pseudonyms:
|
|
522
|
-
expanded.add_output_concepts(pseudonyms)
|
|
523
|
-
logger.info(
|
|
524
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Hiding pseudonyms{[c.address for c in pseudonyms]}"
|
|
525
|
-
)
|
|
526
|
-
expanded.hide_output_concepts(pseudonyms)
|
|
527
|
-
|
|
528
|
-
logger.info(
|
|
529
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in root_targets]} via concept addition; removing extra {[c.address for c in extra]}"
|
|
530
|
-
)
|
|
531
|
-
return expanded
|
|
532
|
-
|
|
533
|
-
logger.info(
|
|
534
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} could not find additional concept(s) to inject"
|
|
535
|
-
)
|
|
536
|
-
logger.info(
|
|
537
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve root concepts, checking for synonyms"
|
|
538
|
-
)
|
|
539
|
-
if not history.check_started(
|
|
540
|
-
root_targets, accept_partial=accept_partial, conditions=conditions
|
|
541
|
-
):
|
|
542
|
-
history.log_start(
|
|
543
|
-
root_targets, accept_partial=accept_partial, conditions=conditions
|
|
544
|
-
)
|
|
545
|
-
resolved = gen_synonym_node(
|
|
546
|
-
all_concepts=root_targets,
|
|
547
|
-
environment=environment,
|
|
548
|
-
g=g,
|
|
549
|
-
depth=depth + 1,
|
|
550
|
-
source_concepts=source_concepts,
|
|
551
|
-
history=history,
|
|
552
|
-
conditions=conditions,
|
|
553
|
-
accept_partial=accept_partial,
|
|
554
|
-
)
|
|
555
|
-
if resolved:
|
|
556
|
-
logger.info(
|
|
557
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} resolved concepts through synonyms"
|
|
558
|
-
)
|
|
559
|
-
return resolved
|
|
560
|
-
else:
|
|
561
|
-
logger.info(
|
|
562
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} skipping synonym search, already in a recursion for these concepts"
|
|
563
|
-
)
|
|
564
|
-
return None
|
|
565
|
-
else:
|
|
566
|
-
raise ValueError(f"Unknown derivation {concept.derivation} on {concept}")
|
|
567
|
-
return None
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
def validate_concept(
|
|
571
|
-
concept: BuildConcept,
|
|
572
|
-
node: StrategyNode,
|
|
573
|
-
found_addresses: set[str],
|
|
574
|
-
non_partial_addresses: set[str],
|
|
575
|
-
partial_addresses: set[str],
|
|
576
|
-
virtual_addresses: set[str],
|
|
577
|
-
found_map: dict[str, set[BuildConcept]],
|
|
578
|
-
accept_partial: bool,
|
|
579
|
-
seen: set[str],
|
|
580
|
-
environment: BuildEnvironment,
|
|
581
|
-
):
|
|
582
|
-
found_map[str(node)].add(concept)
|
|
583
|
-
seen.add(concept.address)
|
|
584
|
-
if concept not in node.partial_concepts:
|
|
585
|
-
found_addresses.add(concept.address)
|
|
586
|
-
non_partial_addresses.add(concept.address)
|
|
587
|
-
# remove it from our partial tracking
|
|
588
|
-
if concept.address in partial_addresses:
|
|
589
|
-
partial_addresses.remove(concept.address)
|
|
590
|
-
if concept.address in virtual_addresses:
|
|
591
|
-
virtual_addresses.remove(concept.address)
|
|
592
|
-
if concept in node.partial_concepts:
|
|
593
|
-
if concept.address in non_partial_addresses:
|
|
594
|
-
return None
|
|
595
|
-
partial_addresses.add(concept.address)
|
|
596
|
-
if accept_partial:
|
|
597
|
-
found_addresses.add(concept.address)
|
|
598
|
-
found_map[str(node)].add(concept)
|
|
599
|
-
for v_address in concept.pseudonyms:
|
|
600
|
-
if v_address in seen:
|
|
601
|
-
return
|
|
602
|
-
v = environment.concepts[v_address]
|
|
603
|
-
if v.address in seen:
|
|
604
|
-
return
|
|
605
|
-
if v.address == concept.address:
|
|
606
|
-
return
|
|
607
|
-
validate_concept(
|
|
608
|
-
v,
|
|
609
|
-
node,
|
|
610
|
-
found_addresses,
|
|
611
|
-
non_partial_addresses,
|
|
612
|
-
partial_addresses,
|
|
613
|
-
virtual_addresses,
|
|
614
|
-
found_map,
|
|
615
|
-
accept_partial,
|
|
616
|
-
seen=seen,
|
|
617
|
-
environment=environment,
|
|
618
|
-
)
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
def validate_stack(
|
|
622
|
-
environment: BuildEnvironment,
|
|
623
|
-
stack: List[StrategyNode],
|
|
624
|
-
concepts: List[BuildConcept],
|
|
625
|
-
mandatory_with_filter: List[BuildConcept],
|
|
626
|
-
conditions: BuildWhereClause | None = None,
|
|
627
|
-
accept_partial: bool = False,
|
|
628
|
-
) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
|
|
629
|
-
found_map: dict[str, set[BuildConcept]] = defaultdict(set)
|
|
630
|
-
found_addresses: set[str] = set()
|
|
631
|
-
non_partial_addresses: set[str] = set()
|
|
632
|
-
partial_addresses: set[str] = set()
|
|
633
|
-
virtual_addresses: set[str] = set()
|
|
634
|
-
seen: set[str] = set()
|
|
635
|
-
|
|
636
|
-
for node in stack:
|
|
637
|
-
resolved = node.resolve()
|
|
638
|
-
|
|
639
|
-
for concept in resolved.output_concepts:
|
|
640
|
-
if concept.address in resolved.hidden_concepts:
|
|
641
|
-
continue
|
|
642
|
-
|
|
643
|
-
validate_concept(
|
|
644
|
-
concept,
|
|
645
|
-
node,
|
|
646
|
-
found_addresses,
|
|
647
|
-
non_partial_addresses,
|
|
648
|
-
partial_addresses,
|
|
649
|
-
virtual_addresses,
|
|
650
|
-
found_map,
|
|
651
|
-
accept_partial,
|
|
652
|
-
seen,
|
|
653
|
-
environment,
|
|
654
|
-
)
|
|
655
|
-
for concept in node.virtual_output_concepts:
|
|
656
|
-
if concept.address in non_partial_addresses:
|
|
657
|
-
continue
|
|
658
|
-
found_addresses.add(concept.address)
|
|
659
|
-
virtual_addresses.add(concept.address)
|
|
660
|
-
if not conditions:
|
|
661
|
-
conditions_met = True
|
|
662
|
-
else:
|
|
663
|
-
conditions_met = all(
|
|
664
|
-
[node.preexisting_conditions == conditions.conditional for node in stack]
|
|
665
|
-
) or all([c.address in found_addresses for c in mandatory_with_filter])
|
|
666
|
-
# zip in those we know we found
|
|
667
|
-
if not all([c.address in found_addresses for c in concepts]) or not conditions_met:
|
|
668
|
-
if not all([c.address in found_addresses for c in concepts]):
|
|
669
|
-
return (
|
|
670
|
-
ValidationResult.INCOMPLETE,
|
|
671
|
-
found_addresses,
|
|
672
|
-
{c.address for c in concepts if c.address not in found_addresses},
|
|
673
|
-
partial_addresses,
|
|
674
|
-
virtual_addresses,
|
|
675
|
-
)
|
|
676
|
-
return (
|
|
677
|
-
ValidationResult.INCOMPLETE_CONDITION,
|
|
678
|
-
found_addresses,
|
|
679
|
-
{c.address for c in concepts if c.address not in mandatory_with_filter},
|
|
680
|
-
partial_addresses,
|
|
681
|
-
virtual_addresses,
|
|
682
|
-
)
|
|
683
|
-
|
|
684
|
-
graph_count, _ = get_disconnected_components(found_map)
|
|
685
|
-
if graph_count in (0, 1):
|
|
686
|
-
return (
|
|
687
|
-
ValidationResult.COMPLETE,
|
|
688
|
-
found_addresses,
|
|
689
|
-
set(),
|
|
690
|
-
partial_addresses,
|
|
691
|
-
virtual_addresses,
|
|
692
|
-
)
|
|
693
|
-
# if we have too many subgraphs, we need to keep searching
|
|
694
|
-
return (
|
|
695
|
-
ValidationResult.DISCONNECTED,
|
|
696
|
-
found_addresses,
|
|
697
|
-
set(),
|
|
698
|
-
partial_addresses,
|
|
699
|
-
virtual_addresses,
|
|
700
|
-
)
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
def depth_to_prefix(depth: int) -> str:
|
|
704
|
-
return "\t" * depth
|
|
74
|
+
return unique(list(conditions.row_arguments) + local_candidates, "address")
|
|
75
|
+
return local_candidates
|
|
705
76
|
|
|
706
77
|
|
|
707
78
|
def append_existence_check(
|
|
@@ -748,7 +119,6 @@ def search_concepts(
|
|
|
748
119
|
accept_partial: bool = False,
|
|
749
120
|
conditions: BuildWhereClause | None = None,
|
|
750
121
|
) -> StrategyNode | None:
|
|
751
|
-
logger.error(f"starting search for {mandatory_list}")
|
|
752
122
|
hist = history.get_history(
|
|
753
123
|
search=mandatory_list, accept_partial=accept_partial, conditions=conditions
|
|
754
124
|
)
|
|
@@ -778,7 +148,31 @@ def search_concepts(
|
|
|
778
148
|
return result
|
|
779
149
|
|
|
780
150
|
|
|
781
|
-
|
|
151
|
+
@dataclass
|
|
152
|
+
class LoopContext:
|
|
153
|
+
mandatory_list: List[BuildConcept]
|
|
154
|
+
environment: BuildEnvironment
|
|
155
|
+
depth: int
|
|
156
|
+
g: ReferenceGraph
|
|
157
|
+
history: History
|
|
158
|
+
attempted: set[str]
|
|
159
|
+
found: set[str]
|
|
160
|
+
skip: set[str]
|
|
161
|
+
all_mandatory: set[str]
|
|
162
|
+
original_mandatory: List[BuildConcept]
|
|
163
|
+
completion_mandatory: List[BuildConcept]
|
|
164
|
+
stack: List[StrategyNode]
|
|
165
|
+
complete: ValidationResult = ValidationResult.INCOMPLETE
|
|
166
|
+
accept_partial: bool = False
|
|
167
|
+
must_evaluate_condition_on_this_level_not_push_down: bool = False
|
|
168
|
+
conditions: BuildWhereClause | None = None
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def incomplete(self) -> bool:
|
|
172
|
+
return self.attempted != self.all_mandatory
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def initialize_loop_context(
|
|
782
176
|
mandatory_list: List[BuildConcept],
|
|
783
177
|
environment: BuildEnvironment,
|
|
784
178
|
depth: int,
|
|
@@ -786,7 +180,7 @@ def _search_concepts(
|
|
|
786
180
|
history: History,
|
|
787
181
|
accept_partial: bool = False,
|
|
788
182
|
conditions: BuildWhereClause | None = None,
|
|
789
|
-
)
|
|
183
|
+
):
|
|
790
184
|
# these are the concepts we need in the output projection
|
|
791
185
|
mandatory_list = unique(mandatory_list, "address")
|
|
792
186
|
# cache our values before an filter injection
|
|
@@ -808,7 +202,7 @@ def _search_concepts(
|
|
|
808
202
|
required_filters = [
|
|
809
203
|
x
|
|
810
204
|
for x in mandatory_list
|
|
811
|
-
if x.derivation not in
|
|
205
|
+
if x.derivation not in ROOT_DERIVATIONS
|
|
812
206
|
and not (
|
|
813
207
|
x.derivation == Derivation.AGGREGATE
|
|
814
208
|
and x.granularity == Granularity.SINGLE_ROW
|
|
@@ -828,257 +222,308 @@ def _search_concepts(
|
|
|
828
222
|
else:
|
|
829
223
|
|
|
830
224
|
completion_mandatory = mandatory_list
|
|
831
|
-
|
|
225
|
+
return LoopContext(
|
|
226
|
+
mandatory_list=mandatory_list,
|
|
227
|
+
environment=environment,
|
|
228
|
+
depth=depth,
|
|
229
|
+
g=g,
|
|
230
|
+
history=history,
|
|
231
|
+
attempted=set(),
|
|
232
|
+
found=set(),
|
|
233
|
+
skip=set(),
|
|
234
|
+
all_mandatory=all_mandatory,
|
|
235
|
+
original_mandatory=original_mandatory,
|
|
236
|
+
completion_mandatory=completion_mandatory,
|
|
237
|
+
stack=[],
|
|
238
|
+
complete=ValidationResult.INCOMPLETE,
|
|
239
|
+
accept_partial=accept_partial,
|
|
240
|
+
must_evaluate_condition_on_this_level_not_push_down=must_evaluate_condition_on_this_level_not_push_down,
|
|
241
|
+
conditions=conditions,
|
|
242
|
+
)
|
|
832
243
|
|
|
833
|
-
found: set[str] = set()
|
|
834
|
-
skip: set[str] = set()
|
|
835
|
-
virtual: set[str] = set()
|
|
836
|
-
stack: List[StrategyNode] = []
|
|
837
|
-
complete = ValidationResult.INCOMPLETE
|
|
838
244
|
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
245
|
+
def evaluate_loop_conditions(
|
|
246
|
+
context: LoopContext, priority_concept: BuildConcept
|
|
247
|
+
) -> BuildWhereClause | None:
|
|
248
|
+
# filter evaluation
|
|
249
|
+
# always pass the filter up when we aren't looking at all filter inputs
|
|
250
|
+
# or there are any non-filter complex types
|
|
251
|
+
if context.conditions:
|
|
252
|
+
should_evaluate_filter_on_this_level_not_push_down = all(
|
|
253
|
+
[
|
|
254
|
+
x.address in context.mandatory_list
|
|
255
|
+
for x in context.conditions.row_arguments
|
|
256
|
+
]
|
|
257
|
+
) and not any(
|
|
258
|
+
[
|
|
259
|
+
x.derivation not in ROOT_DERIVATIONS
|
|
260
|
+
for x in context.mandatory_list
|
|
261
|
+
if x.address not in context.conditions.row_arguments
|
|
262
|
+
]
|
|
263
|
+
)
|
|
264
|
+
else:
|
|
265
|
+
should_evaluate_filter_on_this_level_not_push_down = True
|
|
266
|
+
local_conditions = (
|
|
267
|
+
context.conditions
|
|
268
|
+
if context.conditions
|
|
269
|
+
and not context.must_evaluate_condition_on_this_level_not_push_down
|
|
270
|
+
and not should_evaluate_filter_on_this_level_not_push_down
|
|
271
|
+
else None
|
|
272
|
+
)
|
|
273
|
+
# but if it's not basic, and it's not condition;
|
|
274
|
+
# we do need to push it down (and have another layer of filter evaluation)
|
|
275
|
+
# to ensure filtering happens before something like a SUM
|
|
276
|
+
if (
|
|
277
|
+
context.conditions
|
|
278
|
+
and priority_concept.derivation not in ROOT_DERIVATIONS
|
|
279
|
+
and priority_concept.address not in context.conditions.row_arguments
|
|
280
|
+
):
|
|
281
|
+
logger.info(
|
|
282
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Force including conditions in {priority_concept.address} to push filtering above complex condition that is not condition member or parent"
|
|
845
283
|
)
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
284
|
+
local_conditions = context.conditions
|
|
285
|
+
return local_conditions
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def check_for_early_exit(
|
|
289
|
+
complete, partial, context: LoopContext, priority_concept: BuildConcept
|
|
290
|
+
) -> bool:
|
|
291
|
+
if complete == ValidationResult.INCOMPLETE_CONDITION:
|
|
292
|
+
cond_dict = {str(node): node.preexisting_conditions for node in context.stack}
|
|
293
|
+
for node in context.stack:
|
|
294
|
+
logger.info(
|
|
295
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Node {node} has conditions {node.preexisting_conditions} and {node.conditions}"
|
|
858
296
|
)
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
297
|
+
raise SyntaxError(f"Have {cond_dict} and need {str(context.conditions)}")
|
|
298
|
+
# early exit if we have a complete stack with one node
|
|
299
|
+
# we can only early exit if we have a complete stack
|
|
300
|
+
# and we are not looking for more non-partial sources
|
|
301
|
+
if complete == ValidationResult.COMPLETE and (
|
|
302
|
+
not context.accept_partial or (context.accept_partial and not partial)
|
|
303
|
+
):
|
|
304
|
+
logger.info(
|
|
305
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} breaking loop, complete"
|
|
867
306
|
)
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
if (
|
|
872
|
-
conditions
|
|
873
|
-
and priority_concept.derivation
|
|
874
|
-
not in (Derivation.ROOT, Derivation.CONSTANT)
|
|
875
|
-
and priority_concept.address not in conditions.row_arguments
|
|
876
|
-
):
|
|
307
|
+
return True
|
|
308
|
+
elif complete == ValidationResult.COMPLETE and context.accept_partial and partial:
|
|
309
|
+
if len(context.attempted) == len(context.mandatory_list):
|
|
877
310
|
logger.info(
|
|
878
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
311
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Breaking as we have attempted all nodes"
|
|
879
312
|
)
|
|
880
|
-
|
|
881
|
-
|
|
313
|
+
return True
|
|
882
314
|
logger.info(
|
|
883
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
315
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Found complete stack with partials {partial}, continuing search, attempted {context.attempted} all {len(context.mandatory_list)}"
|
|
316
|
+
)
|
|
317
|
+
else:
|
|
318
|
+
logger.info(
|
|
319
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Not complete, continuing search"
|
|
320
|
+
)
|
|
321
|
+
# if we have attempted on root node, we've tried them all.
|
|
322
|
+
# inject in another search with filter concepts
|
|
323
|
+
if priority_concept.derivation == Derivation.ROOT:
|
|
324
|
+
logger.info(
|
|
325
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Breaking as attempted root with no results"
|
|
884
326
|
)
|
|
327
|
+
return True
|
|
328
|
+
return False
|
|
885
329
|
|
|
886
|
-
|
|
887
|
-
|
|
330
|
+
|
|
331
|
+
def generate_loop_completion(context: LoopContext, virtual) -> StrategyNode:
|
|
332
|
+
condition_required = True
|
|
333
|
+
non_virtual = [c for c in context.completion_mandatory if c.address not in virtual]
|
|
334
|
+
non_virtual_output = [
|
|
335
|
+
c for c in context.original_mandatory if c.address not in virtual
|
|
336
|
+
]
|
|
337
|
+
non_virtual_different = len(context.completion_mandatory) != len(
|
|
338
|
+
context.original_mandatory
|
|
339
|
+
)
|
|
340
|
+
non_virtual_difference_values = set(
|
|
341
|
+
[x.address for x in context.completion_mandatory]
|
|
342
|
+
).difference(set([x.address for x in context.original_mandatory]))
|
|
343
|
+
if not context.conditions:
|
|
344
|
+
condition_required = False
|
|
345
|
+
non_virtual = [c for c in context.mandatory_list if c.address not in virtual]
|
|
346
|
+
|
|
347
|
+
elif all(
|
|
348
|
+
[
|
|
349
|
+
x.preexisting_conditions == context.conditions.conditional
|
|
350
|
+
for x in context.stack
|
|
888
351
|
]
|
|
889
|
-
|
|
890
|
-
|
|
352
|
+
):
|
|
353
|
+
condition_required = False
|
|
354
|
+
non_virtual = [c for c in context.mandatory_list if c.address not in virtual]
|
|
355
|
+
|
|
356
|
+
if context.conditions and not condition_required:
|
|
357
|
+
parent_map = {
|
|
358
|
+
str(x): x.preexisting_conditions == context.conditions.conditional
|
|
359
|
+
for x in context.stack
|
|
360
|
+
}
|
|
361
|
+
logger.info(
|
|
362
|
+
f"Condition {context.conditions} not required, parents included filtering! {parent_map }"
|
|
891
363
|
)
|
|
892
|
-
|
|
364
|
+
if len(context.stack) == 1:
|
|
365
|
+
output: StrategyNode = context.stack[0]
|
|
366
|
+
if non_virtual_different:
|
|
893
367
|
logger.info(
|
|
894
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
368
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Found different non-virtual output concepts ({non_virtual_difference_values}), removing condition injected values"
|
|
895
369
|
)
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
environment,
|
|
900
|
-
g,
|
|
901
|
-
depth,
|
|
902
|
-
source_concepts=search_concepts,
|
|
903
|
-
accept_partial=accept_partial,
|
|
904
|
-
history=history,
|
|
905
|
-
conditions=local_conditions,
|
|
370
|
+
output.set_output_concepts(
|
|
371
|
+
[x for x in output.output_concepts if x.address in non_virtual_output],
|
|
372
|
+
rebuild=False,
|
|
906
373
|
)
|
|
907
|
-
|
|
908
|
-
stack.append(node)
|
|
909
|
-
try:
|
|
910
|
-
node.resolve()
|
|
911
|
-
except Exception as e:
|
|
912
|
-
logger.error(
|
|
913
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve node {node} {e}"
|
|
914
|
-
)
|
|
915
|
-
raise e
|
|
916
|
-
# these concepts should not be attempted to be sourced again
|
|
917
|
-
# as fetching them requires operating on a subset of concepts
|
|
918
|
-
if priority_concept.derivation in [
|
|
919
|
-
Derivation.AGGREGATE,
|
|
920
|
-
Derivation.FILTER,
|
|
921
|
-
Derivation.WINDOW,
|
|
922
|
-
Derivation.UNNEST,
|
|
923
|
-
Derivation.ROWSET,
|
|
924
|
-
Derivation.BASIC,
|
|
925
|
-
Derivation.MULTISELECT,
|
|
926
|
-
Derivation.UNION,
|
|
927
|
-
]:
|
|
928
|
-
skip.add(priority_concept.address)
|
|
929
|
-
break
|
|
930
|
-
attempted.add(priority_concept.address)
|
|
931
|
-
complete, found, missing, partial, virtual = validate_stack(
|
|
932
|
-
environment,
|
|
933
|
-
stack,
|
|
934
|
-
mandatory_list,
|
|
935
|
-
completion_mandatory,
|
|
936
|
-
conditions=conditions,
|
|
937
|
-
accept_partial=accept_partial,
|
|
938
|
-
)
|
|
939
|
-
mandatory_completion = [c.address for c in completion_mandatory]
|
|
374
|
+
|
|
940
375
|
logger.info(
|
|
941
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
942
|
-
f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} synonyms partial {partial} virtual {virtual}), attempted {attempted}, mandatory w/ filter {mandatory_completion}"
|
|
376
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
|
|
943
377
|
)
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
378
|
+
else:
|
|
379
|
+
logger.info(
|
|
380
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} wrapping multiple parent nodes {[type(x) for x in context.stack]} in merge node"
|
|
381
|
+
)
|
|
382
|
+
output = MergeNode(
|
|
383
|
+
input_concepts=non_virtual,
|
|
384
|
+
output_concepts=non_virtual,
|
|
385
|
+
environment=context.environment,
|
|
386
|
+
parents=context.stack,
|
|
387
|
+
depth=context.depth,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# ensure we can resolve our final merge
|
|
391
|
+
output.resolve()
|
|
392
|
+
if condition_required and context.conditions:
|
|
393
|
+
output.add_condition(context.conditions.conditional)
|
|
394
|
+
if context.conditions.existence_arguments:
|
|
395
|
+
append_existence_check(
|
|
396
|
+
output,
|
|
397
|
+
context.environment,
|
|
398
|
+
context.g,
|
|
399
|
+
where=context.conditions,
|
|
400
|
+
history=context.history,
|
|
966
401
|
)
|
|
967
|
-
|
|
402
|
+
elif context.conditions:
|
|
403
|
+
output.preexisting_conditions = context.conditions.conditional
|
|
404
|
+
logger.info(
|
|
405
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Graph is connected, returning {type(output)} node partial {[c.address for c in output.partial_concepts]} with {context.conditions}"
|
|
406
|
+
)
|
|
407
|
+
if condition_required and context.conditions and non_virtual_different:
|
|
408
|
+
logger.info(
|
|
409
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Conditions {context.conditions} were injected, checking if we need a group to restore grain"
|
|
410
|
+
)
|
|
411
|
+
result = GroupNode.check_if_required(
|
|
412
|
+
downstream_concepts=context.original_mandatory,
|
|
413
|
+
parents=[output.resolve()],
|
|
414
|
+
environment=context.environment,
|
|
415
|
+
depth=context.depth,
|
|
416
|
+
)
|
|
417
|
+
if result.required:
|
|
968
418
|
logger.info(
|
|
969
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
419
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Adding group node"
|
|
970
420
|
)
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
421
|
+
return GroupNode(
|
|
422
|
+
output_concepts=context.original_mandatory,
|
|
423
|
+
input_concepts=context.original_mandatory,
|
|
424
|
+
environment=context.environment,
|
|
425
|
+
parents=[output],
|
|
426
|
+
partial_concepts=output.partial_concepts,
|
|
427
|
+
preexisting_conditions=context.conditions.conditional,
|
|
428
|
+
depth=context.depth,
|
|
976
429
|
)
|
|
977
|
-
|
|
430
|
+
return output
|
|
978
431
|
|
|
979
|
-
|
|
980
|
-
|
|
432
|
+
|
|
433
|
+
def _search_concepts(
|
|
434
|
+
mandatory_list: List[BuildConcept],
|
|
435
|
+
environment: BuildEnvironment,
|
|
436
|
+
depth: int,
|
|
437
|
+
g: ReferenceGraph,
|
|
438
|
+
history: History,
|
|
439
|
+
accept_partial: bool = False,
|
|
440
|
+
conditions: BuildWhereClause | None = None,
|
|
441
|
+
) -> StrategyNode | None:
|
|
442
|
+
|
|
443
|
+
context = initialize_loop_context(
|
|
444
|
+
mandatory_list=mandatory_list,
|
|
445
|
+
environment=environment,
|
|
446
|
+
depth=depth,
|
|
447
|
+
g=g,
|
|
448
|
+
history=history,
|
|
449
|
+
accept_partial=accept_partial,
|
|
450
|
+
conditions=conditions,
|
|
981
451
|
)
|
|
982
|
-
if complete == ValidationResult.COMPLETE:
|
|
983
|
-
condition_required = True
|
|
984
|
-
non_virtual = [c for c in completion_mandatory if c.address not in virtual]
|
|
985
|
-
non_virtual_output = [c for c in original_mandatory if c.address not in virtual]
|
|
986
|
-
non_virtual_different = len(completion_mandatory) != len(original_mandatory)
|
|
987
|
-
non_virtual_difference_values = set(
|
|
988
|
-
[x.address for x in completion_mandatory]
|
|
989
|
-
).difference(set([x.address for x in original_mandatory]))
|
|
990
|
-
if not conditions:
|
|
991
|
-
condition_required = False
|
|
992
|
-
non_virtual = [c for c in mandatory_list if c.address not in virtual]
|
|
993
452
|
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
453
|
+
while context.incomplete:
|
|
454
|
+
priority_concept = get_priority_concept(
|
|
455
|
+
context.mandatory_list,
|
|
456
|
+
context.attempted,
|
|
457
|
+
found_concepts=context.found,
|
|
458
|
+
depth=depth,
|
|
459
|
+
)
|
|
997
460
|
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
}
|
|
1003
|
-
logger.info(
|
|
1004
|
-
f"Condition {conditions} not required, parents included filtering! {parent_map }"
|
|
1005
|
-
)
|
|
1006
|
-
if len(stack) == 1:
|
|
1007
|
-
output: StrategyNode = stack[0]
|
|
1008
|
-
if non_virtual_different:
|
|
1009
|
-
logger.info(
|
|
1010
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found different non-virtual output concepts ({non_virtual_difference_values}), removing condition injected values"
|
|
1011
|
-
)
|
|
1012
|
-
output.set_output_concepts(
|
|
1013
|
-
[
|
|
1014
|
-
x
|
|
1015
|
-
for x in output.output_concepts
|
|
1016
|
-
if x.address in non_virtual_output
|
|
1017
|
-
],
|
|
1018
|
-
rebuild=False,
|
|
1019
|
-
)
|
|
461
|
+
local_conditions = evaluate_loop_conditions(context, priority_concept)
|
|
462
|
+
logger.info(
|
|
463
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)} derivation {priority_concept.derivation} granularity {priority_concept.granularity} with conditions {local_conditions}"
|
|
464
|
+
)
|
|
1020
465
|
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
environment=environment,
|
|
1032
|
-
parents=stack,
|
|
1033
|
-
depth=depth,
|
|
1034
|
-
)
|
|
466
|
+
candidates = [
|
|
467
|
+
c for c in context.mandatory_list if c.address != priority_concept.address
|
|
468
|
+
]
|
|
469
|
+
candidate_list = generate_candidates_restrictive(
|
|
470
|
+
priority_concept,
|
|
471
|
+
candidates,
|
|
472
|
+
context.skip,
|
|
473
|
+
depth=depth,
|
|
474
|
+
conditions=context.conditions,
|
|
475
|
+
)
|
|
1035
476
|
|
|
1036
|
-
# ensure we can resolve our final merge
|
|
1037
|
-
output.resolve()
|
|
1038
|
-
if condition_required and conditions:
|
|
1039
|
-
output.add_condition(conditions.conditional)
|
|
1040
|
-
if conditions.existence_arguments:
|
|
1041
|
-
append_existence_check(
|
|
1042
|
-
output, environment, g, where=conditions, history=history
|
|
1043
|
-
)
|
|
1044
|
-
elif conditions:
|
|
1045
|
-
output.preexisting_conditions = conditions.conditional
|
|
1046
477
|
logger.info(
|
|
1047
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX}
|
|
478
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Beginning sourcing loop for {priority_concept.address}, accept_partial {accept_partial}, optional {[v.address for v in candidate_list]}, exhausted {[c for c in context.skip]}"
|
|
1048
479
|
)
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
480
|
+
node = generate_node(
|
|
481
|
+
priority_concept,
|
|
482
|
+
candidate_list,
|
|
483
|
+
environment,
|
|
484
|
+
g,
|
|
485
|
+
depth,
|
|
486
|
+
source_concepts=search_concepts,
|
|
487
|
+
accept_partial=accept_partial,
|
|
488
|
+
history=history,
|
|
489
|
+
conditions=local_conditions,
|
|
490
|
+
)
|
|
491
|
+
if node:
|
|
492
|
+
context.stack.append(node)
|
|
493
|
+
node.resolve()
|
|
494
|
+
# these concepts should not be attempted to be sourced again
|
|
495
|
+
# as fetching them requires operating on a subset of concepts
|
|
496
|
+
if priority_concept.derivation in SKIPPED_DERIVATIONS:
|
|
497
|
+
context.skip.add(priority_concept.address)
|
|
498
|
+
context.attempted.add(priority_concept.address)
|
|
499
|
+
complete, found_c, missing_c, partial, virtual = validate_stack(
|
|
500
|
+
environment,
|
|
501
|
+
context.stack,
|
|
502
|
+
context.mandatory_list,
|
|
503
|
+
context.completion_mandatory,
|
|
504
|
+
conditions=context.conditions,
|
|
505
|
+
accept_partial=accept_partial,
|
|
506
|
+
)
|
|
507
|
+
# assig
|
|
508
|
+
context.found = found_c
|
|
509
|
+
early_exit = check_for_early_exit(complete, partial, context, priority_concept)
|
|
510
|
+
if early_exit:
|
|
511
|
+
break
|
|
512
|
+
|
|
513
|
+
logger.info(
|
|
514
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {context.found} from {[n for n in context.stack]} (missing {context.all_mandatory - context.found}), attempted {context.attempted}, virtual {virtual}"
|
|
515
|
+
)
|
|
516
|
+
if complete == ValidationResult.COMPLETE:
|
|
517
|
+
return generate_loop_completion(context, virtual)
|
|
1073
518
|
|
|
1074
519
|
# if we can't find it after expanding to a merge, then
|
|
1075
520
|
# accept partials in join paths
|
|
1076
|
-
|
|
1077
521
|
if not accept_partial:
|
|
1078
522
|
logger.info(
|
|
1079
523
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Stack is not connected graph, flag for accepting partial addresses is {accept_partial}, changing flag"
|
|
1080
524
|
)
|
|
1081
525
|
partial_search = search_concepts(
|
|
526
|
+
# use the original mandatory list
|
|
1082
527
|
mandatory_list=mandatory_list,
|
|
1083
528
|
environment=environment,
|
|
1084
529
|
depth=depth,
|
|
@@ -1093,7 +538,7 @@ def _search_concepts(
|
|
|
1093
538
|
)
|
|
1094
539
|
return partial_search
|
|
1095
540
|
logger.error(
|
|
1096
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found},"
|
|
541
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {context.all_mandatory - context.found},"
|
|
1097
542
|
)
|
|
1098
543
|
|
|
1099
544
|
return None
|