pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
- pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +2 -2
- trilogy/core/enums.py +1 -7
- trilogy/core/env_processor.py +17 -5
- trilogy/core/environment_helpers.py +11 -25
- trilogy/core/exceptions.py +4 -0
- trilogy/core/functions.py +695 -261
- trilogy/core/graph_models.py +10 -10
- trilogy/core/internal.py +11 -2
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2110 -0
- trilogy/core/models/build.py +1859 -0
- trilogy/core/models/build_environment.py +151 -0
- trilogy/core/models/core.py +370 -0
- trilogy/core/models/datasource.py +297 -0
- trilogy/core/models/environment.py +701 -0
- trilogy/core/models/execute.py +931 -0
- trilogy/core/optimization.py +14 -16
- trilogy/core/optimizations/base_optimization.py +1 -1
- trilogy/core/optimizations/inline_constant.py +6 -6
- trilogy/core/optimizations/inline_datasource.py +17 -11
- trilogy/core/optimizations/predicate_pushdown.py +17 -16
- trilogy/core/processing/concept_strategies_v3.py +178 -145
- trilogy/core/processing/graph_utils.py +1 -1
- trilogy/core/processing/node_generators/basic_node.py +19 -18
- trilogy/core/processing/node_generators/common.py +50 -44
- trilogy/core/processing/node_generators/filter_node.py +26 -13
- trilogy/core/processing/node_generators/group_node.py +26 -21
- trilogy/core/processing/node_generators/group_to_node.py +11 -8
- trilogy/core/processing/node_generators/multiselect_node.py +60 -43
- trilogy/core/processing/node_generators/node_merge_node.py +76 -38
- trilogy/core/processing/node_generators/rowset_node.py +55 -36
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
- trilogy/core/processing/node_generators/select_merge_node.py +161 -64
- trilogy/core/processing/node_generators/select_node.py +13 -13
- trilogy/core/processing/node_generators/union_node.py +12 -11
- trilogy/core/processing/node_generators/unnest_node.py +9 -7
- trilogy/core/processing/node_generators/window_node.py +18 -16
- trilogy/core/processing/nodes/__init__.py +21 -18
- trilogy/core/processing/nodes/base_node.py +82 -66
- trilogy/core/processing/nodes/filter_node.py +19 -13
- trilogy/core/processing/nodes/group_node.py +50 -35
- trilogy/core/processing/nodes/merge_node.py +45 -36
- trilogy/core/processing/nodes/select_node_v2.py +53 -39
- trilogy/core/processing/nodes/union_node.py +5 -7
- trilogy/core/processing/nodes/unnest_node.py +7 -11
- trilogy/core/processing/nodes/window_node.py +9 -4
- trilogy/core/processing/utility.py +103 -75
- trilogy/core/query_processor.py +70 -47
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +413 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +30 -0
- trilogy/core/statements/execute.py +42 -0
- trilogy/dialect/base.py +148 -106
- trilogy/dialect/common.py +9 -10
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/enums.py +4 -2
- trilogy/dialect/presto.py +1 -1
- trilogy/dialect/sql_server.py +1 -1
- trilogy/executor.py +44 -32
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +6 -4
- trilogy/hooks/query_debugger.py +113 -97
- trilogy/parser.py +1 -1
- trilogy/parsing/common.py +307 -64
- trilogy/parsing/parse_engine.py +277 -618
- trilogy/parsing/render.py +50 -26
- trilogy/scripts/trilogy.py +2 -1
- pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
- trilogy/core/models.py +0 -4960
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -6,44 +6,61 @@ from typing import Any, Dict, List, Set, Tuple
|
|
|
6
6
|
|
|
7
7
|
import networkx as nx
|
|
8
8
|
|
|
9
|
-
from trilogy.
|
|
10
|
-
from trilogy.core.
|
|
11
|
-
|
|
12
|
-
AggregateWrapper,
|
|
13
|
-
BaseJoin,
|
|
14
|
-
CaseElse,
|
|
15
|
-
CaseWhen,
|
|
16
|
-
Comparison,
|
|
17
|
-
Concept,
|
|
18
|
-
ConceptPair,
|
|
19
|
-
Conditional,
|
|
20
|
-
Datasource,
|
|
21
|
-
DataType,
|
|
9
|
+
from trilogy.constants import MagicConstants
|
|
10
|
+
from trilogy.core.enums import (
|
|
11
|
+
BooleanOperator,
|
|
22
12
|
DatePart,
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
Function,
|
|
13
|
+
FunctionClass,
|
|
14
|
+
Granularity,
|
|
26
15
|
JoinType,
|
|
16
|
+
Purpose,
|
|
17
|
+
)
|
|
18
|
+
from trilogy.core.models.build import (
|
|
19
|
+
BuildAggregateWrapper,
|
|
20
|
+
BuildCaseElse,
|
|
21
|
+
BuildCaseWhen,
|
|
22
|
+
BuildComparison,
|
|
23
|
+
BuildConcept,
|
|
24
|
+
BuildConditional,
|
|
25
|
+
BuildDatasource,
|
|
26
|
+
BuildFilterItem,
|
|
27
|
+
BuildFunction,
|
|
28
|
+
BuildParenthetical,
|
|
29
|
+
BuildSubselectComparison,
|
|
30
|
+
BuildWindowItem,
|
|
31
|
+
LooseBuildConceptList,
|
|
32
|
+
)
|
|
33
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
34
|
+
from trilogy.core.models.core import (
|
|
35
|
+
DataType,
|
|
27
36
|
ListType,
|
|
28
37
|
ListWrapper,
|
|
29
|
-
LooseConceptList,
|
|
30
|
-
MagicConstants,
|
|
31
38
|
MapType,
|
|
32
39
|
MapWrapper,
|
|
33
|
-
MultiSelectStatement,
|
|
34
40
|
NumericType,
|
|
35
|
-
Parenthetical,
|
|
36
|
-
ProcessedQuery,
|
|
37
|
-
QueryDatasource,
|
|
38
|
-
SelectStatement,
|
|
39
|
-
SubselectComparison,
|
|
40
41
|
TupleWrapper,
|
|
42
|
+
)
|
|
43
|
+
from trilogy.core.models.execute import (
|
|
44
|
+
CTE,
|
|
45
|
+
BaseJoin,
|
|
46
|
+
ConceptPair,
|
|
47
|
+
QueryDatasource,
|
|
41
48
|
UnionCTE,
|
|
42
49
|
UnnestJoin,
|
|
43
|
-
WindowItem,
|
|
44
50
|
)
|
|
51
|
+
from trilogy.core.statements.author import MultiSelectStatement, SelectStatement
|
|
52
|
+
from trilogy.core.statements.execute import ProcessedQuery
|
|
45
53
|
from trilogy.utility import unique
|
|
46
54
|
|
|
55
|
+
AGGREGATE_TYPES = (BuildAggregateWrapper,)
|
|
56
|
+
SUBSELECT_TYPES = (BuildSubselectComparison,)
|
|
57
|
+
COMPARISON_TYPES = (BuildComparison,)
|
|
58
|
+
FUNCTION_TYPES = (BuildFunction,)
|
|
59
|
+
PARENTHETICAL_TYPES = (BuildParenthetical,)
|
|
60
|
+
CONDITIONAL_TYPES = (BuildConditional,)
|
|
61
|
+
CONCEPT_TYPES = (BuildConcept,)
|
|
62
|
+
WINDOW_TYPES = (BuildWindowItem,)
|
|
63
|
+
|
|
47
64
|
|
|
48
65
|
class NodeType(Enum):
|
|
49
66
|
CONCEPT = 1
|
|
@@ -208,11 +225,10 @@ def resolve_join_order_v2(
|
|
|
208
225
|
return output
|
|
209
226
|
|
|
210
227
|
|
|
211
|
-
def concept_to_relevant_joins(concepts: list[
|
|
212
|
-
|
|
213
|
-
sub_props = LooseConceptList(
|
|
228
|
+
def concept_to_relevant_joins(concepts: list[BuildConcept]) -> List[BuildConcept]:
|
|
229
|
+
sub_props = LooseBuildConceptList(
|
|
214
230
|
concepts=[
|
|
215
|
-
x for x in concepts if x.keys and all([key in
|
|
231
|
+
x for x in concepts if x.keys and all([key in concepts for key in x.keys])
|
|
216
232
|
]
|
|
217
233
|
)
|
|
218
234
|
final = [c for c in concepts if c.address not in sub_props]
|
|
@@ -233,7 +249,7 @@ def create_log_lambda(prefix: str, depth: int, logger: Logger):
|
|
|
233
249
|
|
|
234
250
|
|
|
235
251
|
def calculate_graph_relevance(
|
|
236
|
-
g: nx.DiGraph, subset_nodes: set[str], concepts: set[
|
|
252
|
+
g: nx.DiGraph, subset_nodes: set[str], concepts: set[BuildConcept]
|
|
237
253
|
) -> int:
|
|
238
254
|
"""Calculate the relevance of each node in a graph
|
|
239
255
|
Relevance is used to prune irrelevant nodes from the graph
|
|
@@ -268,10 +284,10 @@ def calculate_graph_relevance(
|
|
|
268
284
|
|
|
269
285
|
def add_node_join_concept(
|
|
270
286
|
graph: nx.DiGraph,
|
|
271
|
-
concept:
|
|
272
|
-
concept_map: dict[str,
|
|
287
|
+
concept: BuildConcept,
|
|
288
|
+
concept_map: dict[str, BuildConcept],
|
|
273
289
|
ds_node: str,
|
|
274
|
-
environment:
|
|
290
|
+
environment: BuildEnvironment,
|
|
275
291
|
):
|
|
276
292
|
name = f"c~{concept.address}"
|
|
277
293
|
graph.add_node(name, type=NodeType.CONCEPT)
|
|
@@ -294,8 +310,8 @@ def add_node_join_concept(
|
|
|
294
310
|
|
|
295
311
|
|
|
296
312
|
def resolve_instantiated_concept(
|
|
297
|
-
concept:
|
|
298
|
-
) ->
|
|
313
|
+
concept: BuildConcept, datasource: QueryDatasource | BuildDatasource
|
|
314
|
+
) -> BuildConcept:
|
|
299
315
|
if concept.address in datasource.output_concepts:
|
|
300
316
|
return concept
|
|
301
317
|
for k in concept.pseudonyms:
|
|
@@ -333,14 +349,14 @@ def reduce_concept_pairs(input: list[ConceptPair]) -> list[ConceptPair]:
|
|
|
333
349
|
|
|
334
350
|
|
|
335
351
|
def get_node_joins(
|
|
336
|
-
datasources: List[QueryDatasource |
|
|
337
|
-
environment:
|
|
352
|
+
datasources: List[QueryDatasource | BuildDatasource],
|
|
353
|
+
environment: BuildEnvironment,
|
|
338
354
|
# concepts:List[Concept],
|
|
339
355
|
) -> List[BaseJoin]:
|
|
340
356
|
graph = nx.Graph()
|
|
341
357
|
partials: dict[str, list[str]] = {}
|
|
342
|
-
ds_node_map: dict[str, QueryDatasource |
|
|
343
|
-
concept_map: dict[str,
|
|
358
|
+
ds_node_map: dict[str, QueryDatasource | BuildDatasource] = {}
|
|
359
|
+
concept_map: dict[str, BuildConcept] = {}
|
|
344
360
|
for datasource in datasources:
|
|
345
361
|
ds_node = f"ds~{datasource.identifier}"
|
|
346
362
|
ds_node_map[ds_node] = datasource
|
|
@@ -367,7 +383,7 @@ def get_node_joins(
|
|
|
367
383
|
concepts=[] if not j.keys else None,
|
|
368
384
|
concept_pairs=reduce_concept_pairs(
|
|
369
385
|
[
|
|
370
|
-
ConceptPair(
|
|
386
|
+
ConceptPair.model_construct(
|
|
371
387
|
left=resolve_instantiated_concept(
|
|
372
388
|
concept_map[concept], ds_node_map[k]
|
|
373
389
|
),
|
|
@@ -386,7 +402,7 @@ def get_node_joins(
|
|
|
386
402
|
|
|
387
403
|
|
|
388
404
|
def get_disconnected_components(
|
|
389
|
-
concept_map: Dict[str, Set[
|
|
405
|
+
concept_map: Dict[str, Set[BuildConcept]]
|
|
390
406
|
) -> Tuple[int, List]:
|
|
391
407
|
"""Find if any of the datasources are not linked"""
|
|
392
408
|
import networkx as nx
|
|
@@ -414,18 +430,18 @@ def is_scalar_condition(
|
|
|
414
430
|
| date
|
|
415
431
|
| datetime
|
|
416
432
|
| list[Any]
|
|
417
|
-
|
|
|
418
|
-
|
|
|
419
|
-
|
|
|
420
|
-
|
|
|
421
|
-
|
|
|
422
|
-
|
|
|
423
|
-
|
|
|
424
|
-
|
|
|
433
|
+
| BuildConcept
|
|
434
|
+
| BuildWindowItem
|
|
435
|
+
| BuildFilterItem
|
|
436
|
+
| BuildConditional
|
|
437
|
+
| BuildComparison
|
|
438
|
+
| BuildParenthetical
|
|
439
|
+
| BuildFunction
|
|
440
|
+
| BuildAggregateWrapper
|
|
441
|
+
| BuildCaseWhen
|
|
442
|
+
| BuildCaseElse
|
|
425
443
|
| MagicConstants
|
|
426
444
|
| DataType
|
|
427
|
-
| CaseWhen
|
|
428
|
-
| CaseElse
|
|
429
445
|
| MapWrapper[Any, Any]
|
|
430
446
|
| ListType
|
|
431
447
|
| MapType
|
|
@@ -436,64 +452,76 @@ def is_scalar_condition(
|
|
|
436
452
|
),
|
|
437
453
|
materialized: set[str] | None = None,
|
|
438
454
|
) -> bool:
|
|
439
|
-
if isinstance(element,
|
|
455
|
+
if isinstance(element, PARENTHETICAL_TYPES):
|
|
440
456
|
return is_scalar_condition(element.content, materialized)
|
|
441
|
-
elif isinstance(element,
|
|
457
|
+
elif isinstance(element, SUBSELECT_TYPES):
|
|
442
458
|
return True
|
|
443
|
-
elif isinstance(element,
|
|
459
|
+
elif isinstance(element, COMPARISON_TYPES):
|
|
444
460
|
return is_scalar_condition(element.left, materialized) and is_scalar_condition(
|
|
445
461
|
element.right, materialized
|
|
446
462
|
)
|
|
447
|
-
elif isinstance(element,
|
|
463
|
+
elif isinstance(element, FUNCTION_TYPES):
|
|
448
464
|
if element.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
|
|
449
465
|
return False
|
|
450
466
|
return all([is_scalar_condition(x, materialized) for x in element.arguments])
|
|
451
|
-
elif isinstance(element,
|
|
467
|
+
elif isinstance(element, CONCEPT_TYPES):
|
|
452
468
|
if materialized and element.address in materialized:
|
|
453
469
|
return True
|
|
454
|
-
if element.lineage and isinstance(element.lineage,
|
|
470
|
+
if element.lineage and isinstance(element.lineage, AGGREGATE_TYPES):
|
|
455
471
|
return is_scalar_condition(element.lineage, materialized)
|
|
456
|
-
if element.lineage and isinstance(element.lineage,
|
|
472
|
+
if element.lineage and isinstance(element.lineage, FUNCTION_TYPES):
|
|
457
473
|
return is_scalar_condition(element.lineage, materialized)
|
|
458
474
|
return True
|
|
459
|
-
elif isinstance(element,
|
|
475
|
+
elif isinstance(element, AGGREGATE_TYPES):
|
|
460
476
|
return is_scalar_condition(element.function, materialized)
|
|
461
|
-
elif isinstance(element,
|
|
477
|
+
elif isinstance(element, CONDITIONAL_TYPES):
|
|
462
478
|
return is_scalar_condition(element.left, materialized) and is_scalar_condition(
|
|
463
479
|
element.right, materialized
|
|
464
480
|
)
|
|
465
|
-
elif isinstance(element,
|
|
481
|
+
elif isinstance(element, (BuildCaseWhen,)):
|
|
466
482
|
return is_scalar_condition(
|
|
467
483
|
element.comparison, materialized
|
|
468
484
|
) and is_scalar_condition(element.expr, materialized)
|
|
469
|
-
elif isinstance(element,
|
|
485
|
+
elif isinstance(element, (BuildCaseElse,)):
|
|
470
486
|
return is_scalar_condition(element.expr, materialized)
|
|
471
487
|
elif isinstance(element, MagicConstants):
|
|
472
488
|
return True
|
|
473
489
|
return True
|
|
474
490
|
|
|
475
491
|
|
|
492
|
+
CONDITION_TYPES = (
|
|
493
|
+
BuildSubselectComparison,
|
|
494
|
+
BuildComparison,
|
|
495
|
+
BuildConditional,
|
|
496
|
+
BuildParenthetical,
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
|
|
476
500
|
def decompose_condition(
|
|
477
|
-
conditional:
|
|
478
|
-
) -> list[
|
|
479
|
-
|
|
480
|
-
|
|
501
|
+
conditional: BuildConditional | BuildComparison | BuildParenthetical,
|
|
502
|
+
) -> list[
|
|
503
|
+
BuildSubselectComparison | BuildComparison | BuildConditional | BuildParenthetical
|
|
504
|
+
]:
|
|
505
|
+
chunks: list[
|
|
506
|
+
BuildSubselectComparison
|
|
507
|
+
| BuildComparison
|
|
508
|
+
| BuildConditional
|
|
509
|
+
| BuildParenthetical
|
|
510
|
+
] = []
|
|
511
|
+
if not isinstance(conditional, BuildConditional):
|
|
481
512
|
return [conditional]
|
|
482
513
|
if conditional.operator == BooleanOperator.AND:
|
|
483
514
|
if not (
|
|
484
|
-
isinstance(
|
|
485
|
-
conditional.left,
|
|
486
|
-
(SubselectComparison, Comparison, Conditional, Parenthetical),
|
|
487
|
-
)
|
|
515
|
+
isinstance(conditional.left, CONDITION_TYPES)
|
|
488
516
|
and isinstance(
|
|
489
517
|
conditional.right,
|
|
490
|
-
|
|
518
|
+
CONDITION_TYPES,
|
|
491
519
|
)
|
|
492
520
|
):
|
|
493
521
|
chunks.append(conditional)
|
|
494
522
|
else:
|
|
495
523
|
for val in [conditional.left, conditional.right]:
|
|
496
|
-
if isinstance(val,
|
|
524
|
+
if isinstance(val, BuildConditional):
|
|
497
525
|
chunks.extend(decompose_condition(val))
|
|
498
526
|
else:
|
|
499
527
|
chunks.append(val)
|
|
@@ -503,8 +531,8 @@ def decompose_condition(
|
|
|
503
531
|
|
|
504
532
|
|
|
505
533
|
def find_nullable_concepts(
|
|
506
|
-
source_map: Dict[str, set[
|
|
507
|
-
datasources: List[
|
|
534
|
+
source_map: Dict[str, set[BuildDatasource | QueryDatasource | UnnestJoin]],
|
|
535
|
+
datasources: List[BuildDatasource | QueryDatasource],
|
|
508
536
|
joins: List[BaseJoin | UnnestJoin],
|
|
509
537
|
) -> List[str]:
|
|
510
538
|
"""give a set of datasources and joins, find the concepts
|
|
@@ -514,7 +542,7 @@ def find_nullable_concepts(
|
|
|
514
542
|
datasource_map = {
|
|
515
543
|
x.identifier: x
|
|
516
544
|
for x in datasources
|
|
517
|
-
if isinstance(x, (
|
|
545
|
+
if isinstance(x, (BuildDatasource, QueryDatasource))
|
|
518
546
|
}
|
|
519
547
|
for join in joins:
|
|
520
548
|
is_on_nullable_condition = False
|
trilogy/core/query_processor.py
CHANGED
|
@@ -7,32 +7,42 @@ from trilogy.core.constants import CONSTANT_DATASET
|
|
|
7
7
|
from trilogy.core.enums import BooleanOperator, SourceType
|
|
8
8
|
from trilogy.core.env_processor import generate_graph
|
|
9
9
|
from trilogy.core.ergonomics import generate_cte_names
|
|
10
|
-
from trilogy.core.models import
|
|
10
|
+
from trilogy.core.models.author import MultiSelectLineage, SelectLineage
|
|
11
|
+
from trilogy.core.models.build import (
|
|
12
|
+
BuildConcept,
|
|
13
|
+
BuildConditional,
|
|
14
|
+
BuildDatasource,
|
|
15
|
+
BuildMultiSelectLineage,
|
|
16
|
+
BuildSelectLineage,
|
|
17
|
+
Factory,
|
|
18
|
+
)
|
|
19
|
+
from trilogy.core.models.environment import Environment
|
|
20
|
+
from trilogy.core.models.execute import (
|
|
11
21
|
CTE,
|
|
12
22
|
BaseJoin,
|
|
13
|
-
Concept,
|
|
14
|
-
ConceptDeclarationStatement,
|
|
15
|
-
Conditional,
|
|
16
|
-
CopyStatement,
|
|
17
23
|
CTEConceptPair,
|
|
18
|
-
Datasource,
|
|
19
|
-
Environment,
|
|
20
24
|
InstantiatedUnnestJoin,
|
|
21
25
|
Join,
|
|
22
|
-
MaterializedDataset,
|
|
23
|
-
MultiSelectStatement,
|
|
24
|
-
PersistStatement,
|
|
25
|
-
ProcessedCopyStatement,
|
|
26
|
-
ProcessedQuery,
|
|
27
|
-
ProcessedQueryPersist,
|
|
28
26
|
QueryDatasource,
|
|
29
|
-
SelectStatement,
|
|
30
27
|
UnionCTE,
|
|
31
28
|
UnnestJoin,
|
|
32
29
|
)
|
|
33
30
|
from trilogy.core.optimization import optimize_ctes
|
|
34
31
|
from trilogy.core.processing.concept_strategies_v3 import source_query_concepts
|
|
35
32
|
from trilogy.core.processing.nodes import History, SelectNode, StrategyNode
|
|
33
|
+
from trilogy.core.statements.author import (
|
|
34
|
+
ConceptDeclarationStatement,
|
|
35
|
+
CopyStatement,
|
|
36
|
+
MultiSelectStatement,
|
|
37
|
+
PersistStatement,
|
|
38
|
+
SelectStatement,
|
|
39
|
+
)
|
|
40
|
+
from trilogy.core.statements.common import MaterializedDataset
|
|
41
|
+
from trilogy.core.statements.execute import (
|
|
42
|
+
ProcessedCopyStatement,
|
|
43
|
+
ProcessedQuery,
|
|
44
|
+
ProcessedQueryPersist,
|
|
45
|
+
)
|
|
36
46
|
from trilogy.hooks.base_hook import BaseHook
|
|
37
47
|
from trilogy.utility import unique
|
|
38
48
|
|
|
@@ -50,14 +60,19 @@ def base_join_to_join(
|
|
|
50
60
|
alias=base_join.alias,
|
|
51
61
|
)
|
|
52
62
|
|
|
53
|
-
def get_datasource_cte(datasource:
|
|
63
|
+
def get_datasource_cte(datasource: BuildDatasource | QueryDatasource) -> CTE:
|
|
64
|
+
eligible = set()
|
|
54
65
|
for cte in ctes:
|
|
55
66
|
if cte.source.identifier == datasource.identifier:
|
|
56
67
|
return cte
|
|
68
|
+
eligible.add(cte.source.identifier)
|
|
57
69
|
for cte in ctes:
|
|
58
70
|
if cte.source.datasources[0].identifier == datasource.identifier:
|
|
59
71
|
return cte
|
|
60
|
-
|
|
72
|
+
eligible.add(cte.source.datasources[0].identifier)
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"Could not find CTE for datasource {datasource.identifier}; have {eligible}"
|
|
75
|
+
)
|
|
61
76
|
|
|
62
77
|
if base_join.left_datasource is not None:
|
|
63
78
|
left_cte = get_datasource_cte(base_join.left_datasource)
|
|
@@ -114,7 +129,7 @@ def generate_source_map(
|
|
|
114
129
|
and isinstance(list(qdv)[0], UnnestJoin)
|
|
115
130
|
):
|
|
116
131
|
source_map[qdk] = []
|
|
117
|
-
basic = [x for x in qdv if isinstance(x,
|
|
132
|
+
basic = [x for x in qdv if isinstance(x, BuildDatasource)]
|
|
118
133
|
for base in basic:
|
|
119
134
|
source_map[qdk].append(base.safe_identifier)
|
|
120
135
|
|
|
@@ -163,8 +178,8 @@ def generate_source_map(
|
|
|
163
178
|
}, existence_source_map
|
|
164
179
|
|
|
165
180
|
|
|
166
|
-
def datasource_to_query_datasource(datasource:
|
|
167
|
-
sub_select: Dict[str, Set[Union[
|
|
181
|
+
def datasource_to_query_datasource(datasource: BuildDatasource) -> QueryDatasource:
|
|
182
|
+
sub_select: Dict[str, Set[Union[BuildDatasource, QueryDatasource, UnnestJoin]]] = {
|
|
168
183
|
**{c.address: {datasource} for c in datasource.concepts},
|
|
169
184
|
}
|
|
170
185
|
concepts = [c for c in datasource.concepts]
|
|
@@ -206,7 +221,7 @@ def resolve_cte_base_name_and_alias_v2(
|
|
|
206
221
|
raw_joins: List[Join | InstantiatedUnnestJoin],
|
|
207
222
|
) -> Tuple[str | None, str | None]:
|
|
208
223
|
if (
|
|
209
|
-
isinstance(source.datasources[0],
|
|
224
|
+
isinstance(source.datasources[0], BuildDatasource)
|
|
210
225
|
and not source.datasources[0].name == CONSTANT_DATASET
|
|
211
226
|
):
|
|
212
227
|
ds = source.datasources[0]
|
|
@@ -265,6 +280,7 @@ def datasource_to_cte(
|
|
|
265
280
|
for c in query_datasource.output_concepts
|
|
266
281
|
],
|
|
267
282
|
grain=direct_parents[0].grain,
|
|
283
|
+
order_by=query_datasource.ordering,
|
|
268
284
|
)
|
|
269
285
|
return final
|
|
270
286
|
|
|
@@ -333,6 +349,7 @@ def datasource_to_cte(
|
|
|
333
349
|
hidden_concepts=query_datasource.hidden_concepts,
|
|
334
350
|
base_name_override=base_name,
|
|
335
351
|
base_alias_override=base_alias,
|
|
352
|
+
order_by=query_datasource.ordering,
|
|
336
353
|
)
|
|
337
354
|
if cte.grain != query_datasource.grain:
|
|
338
355
|
raise ValueError("Grain was corrupted in CTE generation")
|
|
@@ -351,26 +368,33 @@ def datasource_to_cte(
|
|
|
351
368
|
|
|
352
369
|
def get_query_node(
|
|
353
370
|
environment: Environment,
|
|
354
|
-
statement:
|
|
371
|
+
statement: SelectLineage | MultiSelectLineage,
|
|
355
372
|
history: History | None = None,
|
|
356
373
|
) -> StrategyNode:
|
|
357
|
-
environment = environment.duplicate()
|
|
358
|
-
for k, v in statement.local_concepts.items():
|
|
359
|
-
environment.concepts[k] = v
|
|
360
|
-
graph = generate_graph(environment)
|
|
361
|
-
logger.info(
|
|
362
|
-
f"{LOGGER_PREFIX} getting source datasource for query with filtering {statement.where_clause_category} and grain {statement.grain}"
|
|
363
|
-
)
|
|
364
374
|
if not statement.output_components:
|
|
365
375
|
raise ValueError(f"Statement has no output components {statement}")
|
|
366
376
|
|
|
367
|
-
|
|
377
|
+
history = history or History(base_environment=environment)
|
|
378
|
+
build_statement: BuildSelectLineage | BuildMultiSelectLineage = Factory(
|
|
379
|
+
environment=environment
|
|
380
|
+
).build(statement)
|
|
381
|
+
|
|
382
|
+
# build_statement = statement
|
|
383
|
+
build_environment = environment.materialize_for_select(
|
|
384
|
+
build_statement.local_concepts
|
|
385
|
+
)
|
|
386
|
+
graph = generate_graph(build_environment)
|
|
387
|
+
logger.info(
|
|
388
|
+
f"{LOGGER_PREFIX} getting source datasource for outputs {statement.output_components} grain {build_statement.grain}"
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
search_concepts: list[BuildConcept] = build_statement.output_components
|
|
368
392
|
|
|
369
393
|
ods: StrategyNode = source_query_concepts(
|
|
370
|
-
search_concepts,
|
|
371
|
-
environment=
|
|
394
|
+
output_concepts=search_concepts,
|
|
395
|
+
environment=build_environment,
|
|
372
396
|
g=graph,
|
|
373
|
-
conditions=
|
|
397
|
+
conditions=build_statement.where_clause,
|
|
374
398
|
history=history,
|
|
375
399
|
)
|
|
376
400
|
if not ods:
|
|
@@ -378,22 +402,26 @@ def get_query_node(
|
|
|
378
402
|
f"Could not find source query concepts for {[x.address for x in search_concepts]}"
|
|
379
403
|
)
|
|
380
404
|
ds: StrategyNode = ods
|
|
381
|
-
if
|
|
382
|
-
final =
|
|
405
|
+
if build_statement.having_clause:
|
|
406
|
+
final = build_statement.having_clause.conditional
|
|
383
407
|
if ds.conditions:
|
|
384
|
-
final =
|
|
408
|
+
final = BuildConditional(
|
|
385
409
|
left=ds.conditions,
|
|
386
|
-
right=
|
|
410
|
+
right=build_statement.having_clause.conditional,
|
|
387
411
|
operator=BooleanOperator.AND,
|
|
388
412
|
)
|
|
389
413
|
ds = SelectNode(
|
|
390
|
-
output_concepts=
|
|
414
|
+
output_concepts=build_statement.output_components,
|
|
391
415
|
input_concepts=ds.output_concepts,
|
|
392
416
|
parents=[ds],
|
|
393
417
|
environment=ds.environment,
|
|
394
418
|
partial_concepts=ds.partial_concepts,
|
|
395
419
|
conditions=final,
|
|
396
420
|
)
|
|
421
|
+
ds.hidden_concepts = build_statement.hidden_components
|
|
422
|
+
ds.ordering = build_statement.order_by
|
|
423
|
+
# TODO: avoid this
|
|
424
|
+
ds.rebuild_cache()
|
|
397
425
|
return ds
|
|
398
426
|
|
|
399
427
|
|
|
@@ -402,8 +430,7 @@ def get_query_datasources(
|
|
|
402
430
|
statement: SelectStatement | MultiSelectStatement,
|
|
403
431
|
hooks: Optional[List[BaseHook]] = None,
|
|
404
432
|
) -> QueryDatasource:
|
|
405
|
-
|
|
406
|
-
ds = get_query_node(environment, statement)
|
|
433
|
+
ds = get_query_node(environment, statement.as_lineage(environment))
|
|
407
434
|
final_qds = ds.resolve()
|
|
408
435
|
if hooks:
|
|
409
436
|
for hook in hooks:
|
|
@@ -498,22 +525,18 @@ def process_query(
|
|
|
498
525
|
for cte in raw_ctes:
|
|
499
526
|
cte.parent_ctes = [seen[x.name] for x in cte.parent_ctes]
|
|
500
527
|
deduped_ctes: List[CTE | UnionCTE] = list(seen.values())
|
|
501
|
-
|
|
528
|
+
|
|
502
529
|
root_cte.limit = statement.limit
|
|
503
530
|
root_cte.hidden_concepts = statement.hidden_components
|
|
504
531
|
|
|
505
532
|
final_ctes = optimize_ctes(deduped_ctes, root_cte, statement)
|
|
533
|
+
mapping = {x.address: x for x in cte.output_columns}
|
|
506
534
|
return ProcessedQuery(
|
|
507
|
-
order_by=
|
|
508
|
-
grain=statement.grain,
|
|
535
|
+
order_by=root_cte.order_by,
|
|
509
536
|
limit=statement.limit,
|
|
510
|
-
|
|
511
|
-
having_clause=statement.having_clause,
|
|
512
|
-
output_columns=statement.output_components,
|
|
537
|
+
output_columns=[mapping[x.address] for x in statement.output_components],
|
|
513
538
|
ctes=final_ctes,
|
|
514
539
|
base=root_cte,
|
|
515
|
-
# we no longer do any joins at final level, this should always happen in parent CTEs
|
|
516
|
-
joins=[],
|
|
517
540
|
hidden_columns=set([x for x in statement.hidden_components]),
|
|
518
541
|
local_concepts=statement.local_concepts,
|
|
519
542
|
)
|
|
File without changes
|