pytrilogy 0.0.2.57__py3-none-any.whl → 0.0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
- pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +2 -2
- trilogy/core/enums.py +1 -7
- trilogy/core/env_processor.py +17 -5
- trilogy/core/environment_helpers.py +11 -25
- trilogy/core/exceptions.py +4 -0
- trilogy/core/functions.py +695 -261
- trilogy/core/graph_models.py +10 -10
- trilogy/core/internal.py +11 -2
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2110 -0
- trilogy/core/models/build.py +1845 -0
- trilogy/core/models/build_environment.py +151 -0
- trilogy/core/models/core.py +370 -0
- trilogy/core/models/datasource.py +297 -0
- trilogy/core/models/environment.py +696 -0
- trilogy/core/models/execute.py +931 -0
- trilogy/core/optimization.py +17 -22
- trilogy/core/optimizations/base_optimization.py +1 -1
- trilogy/core/optimizations/inline_constant.py +6 -6
- trilogy/core/optimizations/inline_datasource.py +17 -11
- trilogy/core/optimizations/predicate_pushdown.py +17 -16
- trilogy/core/processing/concept_strategies_v3.py +181 -146
- trilogy/core/processing/graph_utils.py +1 -1
- trilogy/core/processing/node_generators/basic_node.py +19 -18
- trilogy/core/processing/node_generators/common.py +51 -45
- trilogy/core/processing/node_generators/filter_node.py +26 -13
- trilogy/core/processing/node_generators/group_node.py +26 -21
- trilogy/core/processing/node_generators/group_to_node.py +13 -10
- trilogy/core/processing/node_generators/multiselect_node.py +60 -43
- trilogy/core/processing/node_generators/node_merge_node.py +76 -38
- trilogy/core/processing/node_generators/rowset_node.py +59 -36
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
- trilogy/core/processing/node_generators/select_merge_node.py +161 -64
- trilogy/core/processing/node_generators/select_node.py +13 -13
- trilogy/core/processing/node_generators/union_node.py +12 -11
- trilogy/core/processing/node_generators/unnest_node.py +9 -7
- trilogy/core/processing/node_generators/window_node.py +19 -16
- trilogy/core/processing/nodes/__init__.py +21 -18
- trilogy/core/processing/nodes/base_node.py +92 -77
- trilogy/core/processing/nodes/filter_node.py +19 -13
- trilogy/core/processing/nodes/group_node.py +55 -40
- trilogy/core/processing/nodes/merge_node.py +47 -38
- trilogy/core/processing/nodes/select_node_v2.py +54 -40
- trilogy/core/processing/nodes/union_node.py +5 -7
- trilogy/core/processing/nodes/unnest_node.py +7 -11
- trilogy/core/processing/nodes/window_node.py +9 -4
- trilogy/core/processing/utility.py +108 -80
- trilogy/core/query_processor.py +67 -49
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +413 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +30 -0
- trilogy/core/statements/execute.py +42 -0
- trilogy/dialect/base.py +152 -111
- trilogy/dialect/common.py +9 -10
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/enums.py +4 -2
- trilogy/dialect/presto.py +1 -1
- trilogy/dialect/sql_server.py +1 -1
- trilogy/executor.py +44 -32
- trilogy/hooks/base_hook.py +6 -4
- trilogy/hooks/query_debugger.py +110 -93
- trilogy/parser.py +1 -1
- trilogy/parsing/common.py +303 -64
- trilogy/parsing/parse_engine.py +263 -617
- trilogy/parsing/render.py +50 -26
- trilogy/scripts/trilogy.py +2 -1
- pytrilogy-0.0.2.57.dist-info/RECORD +0 -87
- trilogy/core/models.py +0 -4960
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -6,44 +6,61 @@ from typing import Any, Dict, List, Set, Tuple
|
|
|
6
6
|
|
|
7
7
|
import networkx as nx
|
|
8
8
|
|
|
9
|
-
from trilogy.
|
|
10
|
-
from trilogy.core.
|
|
11
|
-
|
|
12
|
-
AggregateWrapper,
|
|
13
|
-
BaseJoin,
|
|
14
|
-
CaseElse,
|
|
15
|
-
CaseWhen,
|
|
16
|
-
Comparison,
|
|
17
|
-
Concept,
|
|
18
|
-
ConceptPair,
|
|
19
|
-
Conditional,
|
|
20
|
-
Datasource,
|
|
21
|
-
DataType,
|
|
9
|
+
from trilogy.constants import MagicConstants
|
|
10
|
+
from trilogy.core.enums import (
|
|
11
|
+
BooleanOperator,
|
|
22
12
|
DatePart,
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
Function,
|
|
13
|
+
FunctionClass,
|
|
14
|
+
Granularity,
|
|
26
15
|
JoinType,
|
|
16
|
+
Purpose,
|
|
17
|
+
)
|
|
18
|
+
from trilogy.core.models.build import (
|
|
19
|
+
BuildAggregateWrapper,
|
|
20
|
+
BuildCaseElse,
|
|
21
|
+
BuildCaseWhen,
|
|
22
|
+
BuildComparison,
|
|
23
|
+
BuildConcept,
|
|
24
|
+
BuildConditional,
|
|
25
|
+
BuildDatasource,
|
|
26
|
+
BuildFilterItem,
|
|
27
|
+
BuildFunction,
|
|
28
|
+
BuildParenthetical,
|
|
29
|
+
BuildSubselectComparison,
|
|
30
|
+
BuildWindowItem,
|
|
31
|
+
LooseBuildConceptList,
|
|
32
|
+
)
|
|
33
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
34
|
+
from trilogy.core.models.core import (
|
|
35
|
+
DataType,
|
|
27
36
|
ListType,
|
|
28
37
|
ListWrapper,
|
|
29
|
-
LooseConceptList,
|
|
30
|
-
MagicConstants,
|
|
31
38
|
MapType,
|
|
32
39
|
MapWrapper,
|
|
33
|
-
MultiSelectStatement,
|
|
34
40
|
NumericType,
|
|
35
|
-
Parenthetical,
|
|
36
|
-
ProcessedQuery,
|
|
37
|
-
QueryDatasource,
|
|
38
|
-
SelectStatement,
|
|
39
|
-
SubselectComparison,
|
|
40
41
|
TupleWrapper,
|
|
42
|
+
)
|
|
43
|
+
from trilogy.core.models.execute import (
|
|
44
|
+
CTE,
|
|
45
|
+
BaseJoin,
|
|
46
|
+
ConceptPair,
|
|
47
|
+
QueryDatasource,
|
|
41
48
|
UnionCTE,
|
|
42
49
|
UnnestJoin,
|
|
43
|
-
WindowItem,
|
|
44
50
|
)
|
|
51
|
+
from trilogy.core.statements.author import MultiSelectStatement, SelectStatement
|
|
52
|
+
from trilogy.core.statements.execute import ProcessedQuery
|
|
45
53
|
from trilogy.utility import unique
|
|
46
54
|
|
|
55
|
+
AGGREGATE_TYPES = (BuildAggregateWrapper,)
|
|
56
|
+
SUBSELECT_TYPES = (BuildSubselectComparison,)
|
|
57
|
+
COMPARISON_TYPES = (BuildComparison,)
|
|
58
|
+
FUNCTION_TYPES = (BuildFunction,)
|
|
59
|
+
PARENTHETICAL_TYPES = (BuildParenthetical,)
|
|
60
|
+
CONDITIONAL_TYPES = (BuildConditional,)
|
|
61
|
+
CONCEPT_TYPES = (BuildConcept,)
|
|
62
|
+
WINDOW_TYPES = (BuildWindowItem,)
|
|
63
|
+
|
|
47
64
|
|
|
48
65
|
class NodeType(Enum):
|
|
49
66
|
CONCEPT = 1
|
|
@@ -208,11 +225,10 @@ def resolve_join_order_v2(
|
|
|
208
225
|
return output
|
|
209
226
|
|
|
210
227
|
|
|
211
|
-
def concept_to_relevant_joins(concepts: list[
|
|
212
|
-
|
|
213
|
-
sub_props = LooseConceptList(
|
|
228
|
+
def concept_to_relevant_joins(concepts: list[BuildConcept]) -> List[BuildConcept]:
|
|
229
|
+
sub_props = LooseBuildConceptList(
|
|
214
230
|
concepts=[
|
|
215
|
-
x for x in concepts if x.keys and all([key in
|
|
231
|
+
x for x in concepts if x.keys and all([key in concepts for key in x.keys])
|
|
216
232
|
]
|
|
217
233
|
)
|
|
218
234
|
final = [c for c in concepts if c.address not in sub_props]
|
|
@@ -233,7 +249,7 @@ def create_log_lambda(prefix: str, depth: int, logger: Logger):
|
|
|
233
249
|
|
|
234
250
|
|
|
235
251
|
def calculate_graph_relevance(
|
|
236
|
-
g: nx.DiGraph, subset_nodes: set[str], concepts: set[
|
|
252
|
+
g: nx.DiGraph, subset_nodes: set[str], concepts: set[BuildConcept]
|
|
237
253
|
) -> int:
|
|
238
254
|
"""Calculate the relevance of each node in a graph
|
|
239
255
|
Relevance is used to prune irrelevant nodes from the graph
|
|
@@ -268,10 +284,10 @@ def calculate_graph_relevance(
|
|
|
268
284
|
|
|
269
285
|
def add_node_join_concept(
|
|
270
286
|
graph: nx.DiGraph,
|
|
271
|
-
concept:
|
|
272
|
-
concept_map: dict[str,
|
|
287
|
+
concept: BuildConcept,
|
|
288
|
+
concept_map: dict[str, BuildConcept],
|
|
273
289
|
ds_node: str,
|
|
274
|
-
environment:
|
|
290
|
+
environment: BuildEnvironment,
|
|
275
291
|
):
|
|
276
292
|
name = f"c~{concept.address}"
|
|
277
293
|
graph.add_node(name, type=NodeType.CONCEPT)
|
|
@@ -294,8 +310,8 @@ def add_node_join_concept(
|
|
|
294
310
|
|
|
295
311
|
|
|
296
312
|
def resolve_instantiated_concept(
|
|
297
|
-
concept:
|
|
298
|
-
) ->
|
|
313
|
+
concept: BuildConcept, datasource: QueryDatasource | BuildDatasource
|
|
314
|
+
) -> BuildConcept:
|
|
299
315
|
if concept.address in datasource.output_concepts:
|
|
300
316
|
return concept
|
|
301
317
|
for k in concept.pseudonyms:
|
|
@@ -333,21 +349,21 @@ def reduce_concept_pairs(input: list[ConceptPair]) -> list[ConceptPair]:
|
|
|
333
349
|
|
|
334
350
|
|
|
335
351
|
def get_node_joins(
|
|
336
|
-
datasources: List[QueryDatasource |
|
|
337
|
-
environment:
|
|
352
|
+
datasources: List[QueryDatasource | BuildDatasource],
|
|
353
|
+
environment: BuildEnvironment,
|
|
338
354
|
# concepts:List[Concept],
|
|
339
355
|
) -> List[BaseJoin]:
|
|
340
356
|
graph = nx.Graph()
|
|
341
357
|
partials: dict[str, list[str]] = {}
|
|
342
|
-
ds_node_map: dict[str, QueryDatasource |
|
|
343
|
-
concept_map: dict[str,
|
|
358
|
+
ds_node_map: dict[str, QueryDatasource | BuildDatasource] = {}
|
|
359
|
+
concept_map: dict[str, BuildConcept] = {}
|
|
344
360
|
for datasource in datasources:
|
|
345
361
|
ds_node = f"ds~{datasource.identifier}"
|
|
346
362
|
ds_node_map[ds_node] = datasource
|
|
347
363
|
graph.add_node(ds_node, type=NodeType.NODE)
|
|
348
364
|
partials[ds_node] = [f"c~{c.address}" for c in datasource.partial_concepts]
|
|
349
365
|
for concept in datasource.output_concepts:
|
|
350
|
-
if concept in datasource.hidden_concepts:
|
|
366
|
+
if concept.address in datasource.hidden_concepts:
|
|
351
367
|
continue
|
|
352
368
|
add_node_join_concept(
|
|
353
369
|
graph=graph,
|
|
@@ -367,7 +383,7 @@ def get_node_joins(
|
|
|
367
383
|
concepts=[] if not j.keys else None,
|
|
368
384
|
concept_pairs=reduce_concept_pairs(
|
|
369
385
|
[
|
|
370
|
-
ConceptPair(
|
|
386
|
+
ConceptPair.model_construct(
|
|
371
387
|
left=resolve_instantiated_concept(
|
|
372
388
|
concept_map[concept], ds_node_map[k]
|
|
373
389
|
),
|
|
@@ -386,7 +402,7 @@ def get_node_joins(
|
|
|
386
402
|
|
|
387
403
|
|
|
388
404
|
def get_disconnected_components(
|
|
389
|
-
concept_map: Dict[str, Set[
|
|
405
|
+
concept_map: Dict[str, Set[BuildConcept]]
|
|
390
406
|
) -> Tuple[int, List]:
|
|
391
407
|
"""Find if any of the datasources are not linked"""
|
|
392
408
|
import networkx as nx
|
|
@@ -414,18 +430,18 @@ def is_scalar_condition(
|
|
|
414
430
|
| date
|
|
415
431
|
| datetime
|
|
416
432
|
| list[Any]
|
|
417
|
-
|
|
|
418
|
-
|
|
|
419
|
-
|
|
|
420
|
-
|
|
|
421
|
-
|
|
|
422
|
-
|
|
|
423
|
-
|
|
|
424
|
-
|
|
|
433
|
+
| BuildConcept
|
|
434
|
+
| BuildWindowItem
|
|
435
|
+
| BuildFilterItem
|
|
436
|
+
| BuildConditional
|
|
437
|
+
| BuildComparison
|
|
438
|
+
| BuildParenthetical
|
|
439
|
+
| BuildFunction
|
|
440
|
+
| BuildAggregateWrapper
|
|
441
|
+
| BuildCaseWhen
|
|
442
|
+
| BuildCaseElse
|
|
425
443
|
| MagicConstants
|
|
426
444
|
| DataType
|
|
427
|
-
| CaseWhen
|
|
428
|
-
| CaseElse
|
|
429
445
|
| MapWrapper[Any, Any]
|
|
430
446
|
| ListType
|
|
431
447
|
| MapType
|
|
@@ -436,64 +452,76 @@ def is_scalar_condition(
|
|
|
436
452
|
),
|
|
437
453
|
materialized: set[str] | None = None,
|
|
438
454
|
) -> bool:
|
|
439
|
-
if isinstance(element,
|
|
455
|
+
if isinstance(element, PARENTHETICAL_TYPES):
|
|
440
456
|
return is_scalar_condition(element.content, materialized)
|
|
441
|
-
elif isinstance(element,
|
|
457
|
+
elif isinstance(element, SUBSELECT_TYPES):
|
|
442
458
|
return True
|
|
443
|
-
elif isinstance(element,
|
|
459
|
+
elif isinstance(element, COMPARISON_TYPES):
|
|
444
460
|
return is_scalar_condition(element.left, materialized) and is_scalar_condition(
|
|
445
461
|
element.right, materialized
|
|
446
462
|
)
|
|
447
|
-
elif isinstance(element,
|
|
463
|
+
elif isinstance(element, FUNCTION_TYPES):
|
|
448
464
|
if element.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
|
|
449
465
|
return False
|
|
450
466
|
return all([is_scalar_condition(x, materialized) for x in element.arguments])
|
|
451
|
-
elif isinstance(element,
|
|
467
|
+
elif isinstance(element, CONCEPT_TYPES):
|
|
452
468
|
if materialized and element.address in materialized:
|
|
453
469
|
return True
|
|
454
|
-
if element.lineage and isinstance(element.lineage,
|
|
470
|
+
if element.lineage and isinstance(element.lineage, AGGREGATE_TYPES):
|
|
455
471
|
return is_scalar_condition(element.lineage, materialized)
|
|
456
|
-
if element.lineage and isinstance(element.lineage,
|
|
472
|
+
if element.lineage and isinstance(element.lineage, FUNCTION_TYPES):
|
|
457
473
|
return is_scalar_condition(element.lineage, materialized)
|
|
458
474
|
return True
|
|
459
|
-
elif isinstance(element,
|
|
475
|
+
elif isinstance(element, AGGREGATE_TYPES):
|
|
460
476
|
return is_scalar_condition(element.function, materialized)
|
|
461
|
-
elif isinstance(element,
|
|
477
|
+
elif isinstance(element, CONDITIONAL_TYPES):
|
|
462
478
|
return is_scalar_condition(element.left, materialized) and is_scalar_condition(
|
|
463
479
|
element.right, materialized
|
|
464
480
|
)
|
|
465
|
-
elif isinstance(element,
|
|
481
|
+
elif isinstance(element, (BuildCaseWhen,)):
|
|
466
482
|
return is_scalar_condition(
|
|
467
483
|
element.comparison, materialized
|
|
468
484
|
) and is_scalar_condition(element.expr, materialized)
|
|
469
|
-
elif isinstance(element,
|
|
485
|
+
elif isinstance(element, (BuildCaseElse,)):
|
|
470
486
|
return is_scalar_condition(element.expr, materialized)
|
|
471
487
|
elif isinstance(element, MagicConstants):
|
|
472
488
|
return True
|
|
473
489
|
return True
|
|
474
490
|
|
|
475
491
|
|
|
492
|
+
CONDITION_TYPES = (
|
|
493
|
+
BuildSubselectComparison,
|
|
494
|
+
BuildComparison,
|
|
495
|
+
BuildConditional,
|
|
496
|
+
BuildParenthetical,
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
|
|
476
500
|
def decompose_condition(
|
|
477
|
-
conditional:
|
|
478
|
-
) -> list[
|
|
479
|
-
|
|
480
|
-
|
|
501
|
+
conditional: BuildConditional | BuildComparison | BuildParenthetical,
|
|
502
|
+
) -> list[
|
|
503
|
+
BuildSubselectComparison | BuildComparison | BuildConditional | BuildParenthetical
|
|
504
|
+
]:
|
|
505
|
+
chunks: list[
|
|
506
|
+
BuildSubselectComparison
|
|
507
|
+
| BuildComparison
|
|
508
|
+
| BuildConditional
|
|
509
|
+
| BuildParenthetical
|
|
510
|
+
] = []
|
|
511
|
+
if not isinstance(conditional, BuildConditional):
|
|
481
512
|
return [conditional]
|
|
482
513
|
if conditional.operator == BooleanOperator.AND:
|
|
483
514
|
if not (
|
|
484
|
-
isinstance(
|
|
485
|
-
conditional.left,
|
|
486
|
-
(SubselectComparison, Comparison, Conditional, Parenthetical),
|
|
487
|
-
)
|
|
515
|
+
isinstance(conditional.left, CONDITION_TYPES)
|
|
488
516
|
and isinstance(
|
|
489
517
|
conditional.right,
|
|
490
|
-
|
|
518
|
+
CONDITION_TYPES,
|
|
491
519
|
)
|
|
492
520
|
):
|
|
493
521
|
chunks.append(conditional)
|
|
494
522
|
else:
|
|
495
523
|
for val in [conditional.left, conditional.right]:
|
|
496
|
-
if isinstance(val,
|
|
524
|
+
if isinstance(val, BuildConditional):
|
|
497
525
|
chunks.extend(decompose_condition(val))
|
|
498
526
|
else:
|
|
499
527
|
chunks.append(val)
|
|
@@ -503,8 +531,8 @@ def decompose_condition(
|
|
|
503
531
|
|
|
504
532
|
|
|
505
533
|
def find_nullable_concepts(
|
|
506
|
-
source_map: Dict[str, set[
|
|
507
|
-
datasources: List[
|
|
534
|
+
source_map: Dict[str, set[BuildDatasource | QueryDatasource | UnnestJoin]],
|
|
535
|
+
datasources: List[BuildDatasource | QueryDatasource],
|
|
508
536
|
joins: List[BaseJoin | UnnestJoin],
|
|
509
537
|
) -> List[str]:
|
|
510
538
|
"""give a set of datasources and joins, find the concepts
|
|
@@ -514,7 +542,7 @@ def find_nullable_concepts(
|
|
|
514
542
|
datasource_map = {
|
|
515
543
|
x.identifier: x
|
|
516
544
|
for x in datasources
|
|
517
|
-
if isinstance(x, (
|
|
545
|
+
if isinstance(x, (BuildDatasource, QueryDatasource))
|
|
518
546
|
}
|
|
519
547
|
for join in joins:
|
|
520
548
|
is_on_nullable_condition = False
|
|
@@ -567,9 +595,8 @@ def find_nullable_concepts(
|
|
|
567
595
|
def sort_select_output_processed(
|
|
568
596
|
cte: CTE | UnionCTE, query: ProcessedQuery
|
|
569
597
|
) -> CTE | UnionCTE:
|
|
570
|
-
hidden_addresses = [c.address for c in query.hidden_columns]
|
|
571
598
|
output_addresses = [
|
|
572
|
-
c.address for c in query.output_columns if c.address not in
|
|
599
|
+
c.address for c in query.output_columns if c.address not in query.hidden_columns
|
|
573
600
|
]
|
|
574
601
|
|
|
575
602
|
mapping = {x.address: x for x in cte.output_columns}
|
|
@@ -586,9 +613,10 @@ def sort_select_output(
|
|
|
586
613
|
) -> CTE | UnionCTE:
|
|
587
614
|
if isinstance(query, ProcessedQuery):
|
|
588
615
|
return sort_select_output_processed(cte, query)
|
|
589
|
-
hidden_addresses = [c.address for c in query.hidden_components]
|
|
590
616
|
output_addresses = [
|
|
591
|
-
c.address
|
|
617
|
+
c.address
|
|
618
|
+
for c in query.output_components
|
|
619
|
+
if c.address not in query.hidden_components
|
|
592
620
|
]
|
|
593
621
|
|
|
594
622
|
mapping = {x.address: x for x in cte.output_columns}
|
trilogy/core/query_processor.py
CHANGED
|
@@ -7,32 +7,40 @@ from trilogy.core.constants import CONSTANT_DATASET
|
|
|
7
7
|
from trilogy.core.enums import BooleanOperator, SourceType
|
|
8
8
|
from trilogy.core.env_processor import generate_graph
|
|
9
9
|
from trilogy.core.ergonomics import generate_cte_names
|
|
10
|
-
from trilogy.core.models import
|
|
10
|
+
from trilogy.core.models.author import MultiSelectLineage, SelectLineage
|
|
11
|
+
from trilogy.core.models.build import (
|
|
12
|
+
BuildConcept,
|
|
13
|
+
BuildConditional,
|
|
14
|
+
BuildDatasource,
|
|
15
|
+
Factory,
|
|
16
|
+
)
|
|
17
|
+
from trilogy.core.models.environment import Environment
|
|
18
|
+
from trilogy.core.models.execute import (
|
|
11
19
|
CTE,
|
|
12
20
|
BaseJoin,
|
|
13
|
-
Concept,
|
|
14
|
-
ConceptDeclarationStatement,
|
|
15
|
-
Conditional,
|
|
16
|
-
CopyStatement,
|
|
17
21
|
CTEConceptPair,
|
|
18
|
-
Datasource,
|
|
19
|
-
Environment,
|
|
20
22
|
InstantiatedUnnestJoin,
|
|
21
23
|
Join,
|
|
22
|
-
MaterializedDataset,
|
|
23
|
-
MultiSelectStatement,
|
|
24
|
-
PersistStatement,
|
|
25
|
-
ProcessedCopyStatement,
|
|
26
|
-
ProcessedQuery,
|
|
27
|
-
ProcessedQueryPersist,
|
|
28
24
|
QueryDatasource,
|
|
29
|
-
SelectStatement,
|
|
30
25
|
UnionCTE,
|
|
31
26
|
UnnestJoin,
|
|
32
27
|
)
|
|
33
28
|
from trilogy.core.optimization import optimize_ctes
|
|
34
29
|
from trilogy.core.processing.concept_strategies_v3 import source_query_concepts
|
|
35
30
|
from trilogy.core.processing.nodes import History, SelectNode, StrategyNode
|
|
31
|
+
from trilogy.core.statements.author import (
|
|
32
|
+
ConceptDeclarationStatement,
|
|
33
|
+
CopyStatement,
|
|
34
|
+
MultiSelectStatement,
|
|
35
|
+
PersistStatement,
|
|
36
|
+
SelectStatement,
|
|
37
|
+
)
|
|
38
|
+
from trilogy.core.statements.common import MaterializedDataset
|
|
39
|
+
from trilogy.core.statements.execute import (
|
|
40
|
+
ProcessedCopyStatement,
|
|
41
|
+
ProcessedQuery,
|
|
42
|
+
ProcessedQueryPersist,
|
|
43
|
+
)
|
|
36
44
|
from trilogy.hooks.base_hook import BaseHook
|
|
37
45
|
from trilogy.utility import unique
|
|
38
46
|
|
|
@@ -50,14 +58,19 @@ def base_join_to_join(
|
|
|
50
58
|
alias=base_join.alias,
|
|
51
59
|
)
|
|
52
60
|
|
|
53
|
-
def get_datasource_cte(datasource:
|
|
61
|
+
def get_datasource_cte(datasource: BuildDatasource | QueryDatasource) -> CTE:
|
|
62
|
+
eligible = set()
|
|
54
63
|
for cte in ctes:
|
|
55
64
|
if cte.source.identifier == datasource.identifier:
|
|
56
65
|
return cte
|
|
66
|
+
eligible.add(cte.source.identifier)
|
|
57
67
|
for cte in ctes:
|
|
58
68
|
if cte.source.datasources[0].identifier == datasource.identifier:
|
|
59
69
|
return cte
|
|
60
|
-
|
|
70
|
+
eligible.add(cte.source.datasources[0].identifier)
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"Could not find CTE for datasource {datasource.identifier}; have {eligible}"
|
|
73
|
+
)
|
|
61
74
|
|
|
62
75
|
if base_join.left_datasource is not None:
|
|
63
76
|
left_cte = get_datasource_cte(base_join.left_datasource)
|
|
@@ -114,7 +127,7 @@ def generate_source_map(
|
|
|
114
127
|
and isinstance(list(qdv)[0], UnnestJoin)
|
|
115
128
|
):
|
|
116
129
|
source_map[qdk] = []
|
|
117
|
-
basic = [x for x in qdv if isinstance(x,
|
|
130
|
+
basic = [x for x in qdv if isinstance(x, BuildDatasource)]
|
|
118
131
|
for base in basic:
|
|
119
132
|
source_map[qdk].append(base.safe_identifier)
|
|
120
133
|
|
|
@@ -163,8 +176,8 @@ def generate_source_map(
|
|
|
163
176
|
}, existence_source_map
|
|
164
177
|
|
|
165
178
|
|
|
166
|
-
def datasource_to_query_datasource(datasource:
|
|
167
|
-
sub_select: Dict[str, Set[Union[
|
|
179
|
+
def datasource_to_query_datasource(datasource: BuildDatasource) -> QueryDatasource:
|
|
180
|
+
sub_select: Dict[str, Set[Union[BuildDatasource, QueryDatasource, UnnestJoin]]] = {
|
|
168
181
|
**{c.address: {datasource} for c in datasource.concepts},
|
|
169
182
|
}
|
|
170
183
|
concepts = [c for c in datasource.concepts]
|
|
@@ -206,7 +219,7 @@ def resolve_cte_base_name_and_alias_v2(
|
|
|
206
219
|
raw_joins: List[Join | InstantiatedUnnestJoin],
|
|
207
220
|
) -> Tuple[str | None, str | None]:
|
|
208
221
|
if (
|
|
209
|
-
isinstance(source.datasources[0],
|
|
222
|
+
isinstance(source.datasources[0], BuildDatasource)
|
|
210
223
|
and not source.datasources[0].name == CONSTANT_DATASET
|
|
211
224
|
):
|
|
212
225
|
ds = source.datasources[0]
|
|
@@ -265,6 +278,7 @@ def datasource_to_cte(
|
|
|
265
278
|
for c in query_datasource.output_concepts
|
|
266
279
|
],
|
|
267
280
|
grain=direct_parents[0].grain,
|
|
281
|
+
order_by=query_datasource.ordering,
|
|
268
282
|
)
|
|
269
283
|
return final
|
|
270
284
|
|
|
@@ -333,6 +347,7 @@ def datasource_to_cte(
|
|
|
333
347
|
hidden_concepts=query_datasource.hidden_concepts,
|
|
334
348
|
base_name_override=base_name,
|
|
335
349
|
base_alias_override=base_alias,
|
|
350
|
+
order_by=query_datasource.ordering,
|
|
336
351
|
)
|
|
337
352
|
if cte.grain != query_datasource.grain:
|
|
338
353
|
raise ValueError("Grain was corrupted in CTE generation")
|
|
@@ -351,26 +366,30 @@ def datasource_to_cte(
|
|
|
351
366
|
|
|
352
367
|
def get_query_node(
|
|
353
368
|
environment: Environment,
|
|
354
|
-
statement:
|
|
369
|
+
statement: SelectLineage | MultiSelectLineage,
|
|
355
370
|
history: History | None = None,
|
|
356
371
|
) -> StrategyNode:
|
|
357
|
-
environment = environment.duplicate()
|
|
358
|
-
for k, v in statement.local_concepts.items():
|
|
359
|
-
environment.concepts[k] = v
|
|
360
|
-
graph = generate_graph(environment)
|
|
361
|
-
logger.info(
|
|
362
|
-
f"{LOGGER_PREFIX} getting source datasource for query with filtering {statement.where_clause_category} and grain {statement.grain}"
|
|
363
|
-
)
|
|
364
372
|
if not statement.output_components:
|
|
365
373
|
raise ValueError(f"Statement has no output components {statement}")
|
|
366
374
|
|
|
367
|
-
|
|
375
|
+
history = history or History(base_environment=environment)
|
|
376
|
+
build_statement = Factory(environment=environment).build(statement)
|
|
377
|
+
# build_statement = statement
|
|
378
|
+
build_environment = environment.materialize_for_select(
|
|
379
|
+
build_statement.local_concepts
|
|
380
|
+
)
|
|
381
|
+
graph = generate_graph(build_environment)
|
|
382
|
+
logger.info(
|
|
383
|
+
f"{LOGGER_PREFIX} getting source datasource for outputs {statement.output_components} grain {build_statement.grain}"
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
search_concepts: list[BuildConcept] = build_statement.output_components
|
|
368
387
|
|
|
369
388
|
ods: StrategyNode = source_query_concepts(
|
|
370
|
-
search_concepts,
|
|
371
|
-
environment=
|
|
389
|
+
output_concepts=search_concepts,
|
|
390
|
+
environment=build_environment,
|
|
372
391
|
g=graph,
|
|
373
|
-
conditions=
|
|
392
|
+
conditions=build_statement.where_clause,
|
|
374
393
|
history=history,
|
|
375
394
|
)
|
|
376
395
|
if not ods:
|
|
@@ -378,22 +397,26 @@ def get_query_node(
|
|
|
378
397
|
f"Could not find source query concepts for {[x.address for x in search_concepts]}"
|
|
379
398
|
)
|
|
380
399
|
ds: StrategyNode = ods
|
|
381
|
-
if
|
|
382
|
-
final =
|
|
400
|
+
if build_statement.having_clause:
|
|
401
|
+
final = build_statement.having_clause.conditional
|
|
383
402
|
if ds.conditions:
|
|
384
|
-
final =
|
|
403
|
+
final = BuildConditional(
|
|
385
404
|
left=ds.conditions,
|
|
386
|
-
right=
|
|
405
|
+
right=build_statement.having_clause.conditional,
|
|
387
406
|
operator=BooleanOperator.AND,
|
|
388
407
|
)
|
|
389
408
|
ds = SelectNode(
|
|
390
|
-
output_concepts=
|
|
409
|
+
output_concepts=build_statement.output_components,
|
|
391
410
|
input_concepts=ds.output_concepts,
|
|
392
411
|
parents=[ds],
|
|
393
412
|
environment=ds.environment,
|
|
394
413
|
partial_concepts=ds.partial_concepts,
|
|
395
414
|
conditions=final,
|
|
396
415
|
)
|
|
416
|
+
ds.hidden_concepts = build_statement.hidden_components
|
|
417
|
+
ds.ordering = build_statement.order_by
|
|
418
|
+
# TODO: avoid this
|
|
419
|
+
ds.rebuild_cache()
|
|
397
420
|
return ds
|
|
398
421
|
|
|
399
422
|
|
|
@@ -402,8 +425,7 @@ def get_query_datasources(
|
|
|
402
425
|
statement: SelectStatement | MultiSelectStatement,
|
|
403
426
|
hooks: Optional[List[BaseHook]] = None,
|
|
404
427
|
) -> QueryDatasource:
|
|
405
|
-
|
|
406
|
-
ds = get_query_node(environment, statement)
|
|
428
|
+
ds = get_query_node(environment, statement.as_lineage(environment))
|
|
407
429
|
final_qds = ds.resolve()
|
|
408
430
|
if hooks:
|
|
409
431
|
for hook in hooks:
|
|
@@ -498,22 +520,18 @@ def process_query(
|
|
|
498
520
|
for cte in raw_ctes:
|
|
499
521
|
cte.parent_ctes = [seen[x.name] for x in cte.parent_ctes]
|
|
500
522
|
deduped_ctes: List[CTE | UnionCTE] = list(seen.values())
|
|
501
|
-
|
|
523
|
+
|
|
502
524
|
root_cte.limit = statement.limit
|
|
503
|
-
root_cte.hidden_concepts =
|
|
525
|
+
root_cte.hidden_concepts = statement.hidden_components
|
|
504
526
|
|
|
505
527
|
final_ctes = optimize_ctes(deduped_ctes, root_cte, statement)
|
|
528
|
+
mapping = {x.address: x for x in cte.output_columns}
|
|
506
529
|
return ProcessedQuery(
|
|
507
|
-
order_by=
|
|
508
|
-
grain=statement.grain,
|
|
530
|
+
order_by=root_cte.order_by,
|
|
509
531
|
limit=statement.limit,
|
|
510
|
-
|
|
511
|
-
having_clause=statement.having_clause,
|
|
512
|
-
output_columns=statement.output_components,
|
|
532
|
+
output_columns=[mapping[x.address] for x in statement.output_components],
|
|
513
533
|
ctes=final_ctes,
|
|
514
534
|
base=root_cte,
|
|
515
|
-
|
|
516
|
-
joins=[],
|
|
517
|
-
hidden_columns=[x for x in statement.hidden_components],
|
|
535
|
+
hidden_columns=set([x for x in statement.hidden_components]),
|
|
518
536
|
local_concepts=statement.local_concepts,
|
|
519
537
|
)
|
|
File without changes
|