pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1845 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +696 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +180 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +57 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +19 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +65 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +146 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/base_hook.py +6 -4
  65. trilogy/hooks/query_debugger.py +110 -93
  66. trilogy/parser.py +1 -1
  67. trilogy/parsing/common.py +303 -64
  68. trilogy/parsing/parse_engine.py +263 -617
  69. trilogy/parsing/render.py +50 -26
  70. trilogy/scripts/trilogy.py +2 -1
  71. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  72. trilogy/core/models.py +0 -4960
  73. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
@@ -6,44 +6,61 @@ from typing import Any, Dict, List, Set, Tuple
6
6
 
7
7
  import networkx as nx
8
8
 
9
- from trilogy.core.enums import BooleanOperator, FunctionClass, Granularity, Purpose
10
- from trilogy.core.models import (
11
- CTE,
12
- AggregateWrapper,
13
- BaseJoin,
14
- CaseElse,
15
- CaseWhen,
16
- Comparison,
17
- Concept,
18
- ConceptPair,
19
- Conditional,
20
- Datasource,
21
- DataType,
9
+ from trilogy.constants import MagicConstants
10
+ from trilogy.core.enums import (
11
+ BooleanOperator,
22
12
  DatePart,
23
- Environment,
24
- FilterItem,
25
- Function,
13
+ FunctionClass,
14
+ Granularity,
26
15
  JoinType,
16
+ Purpose,
17
+ )
18
+ from trilogy.core.models.build import (
19
+ BuildAggregateWrapper,
20
+ BuildCaseElse,
21
+ BuildCaseWhen,
22
+ BuildComparison,
23
+ BuildConcept,
24
+ BuildConditional,
25
+ BuildDatasource,
26
+ BuildFilterItem,
27
+ BuildFunction,
28
+ BuildParenthetical,
29
+ BuildSubselectComparison,
30
+ BuildWindowItem,
31
+ LooseBuildConceptList,
32
+ )
33
+ from trilogy.core.models.build_environment import BuildEnvironment
34
+ from trilogy.core.models.core import (
35
+ DataType,
27
36
  ListType,
28
37
  ListWrapper,
29
- LooseConceptList,
30
- MagicConstants,
31
38
  MapType,
32
39
  MapWrapper,
33
- MultiSelectStatement,
34
40
  NumericType,
35
- Parenthetical,
36
- ProcessedQuery,
37
- QueryDatasource,
38
- SelectStatement,
39
- SubselectComparison,
40
41
  TupleWrapper,
42
+ )
43
+ from trilogy.core.models.execute import (
44
+ CTE,
45
+ BaseJoin,
46
+ ConceptPair,
47
+ QueryDatasource,
41
48
  UnionCTE,
42
49
  UnnestJoin,
43
- WindowItem,
44
50
  )
51
+ from trilogy.core.statements.author import MultiSelectStatement, SelectStatement
52
+ from trilogy.core.statements.execute import ProcessedQuery
45
53
  from trilogy.utility import unique
46
54
 
55
+ AGGREGATE_TYPES = (BuildAggregateWrapper,)
56
+ SUBSELECT_TYPES = (BuildSubselectComparison,)
57
+ COMPARISON_TYPES = (BuildComparison,)
58
+ FUNCTION_TYPES = (BuildFunction,)
59
+ PARENTHETICAL_TYPES = (BuildParenthetical,)
60
+ CONDITIONAL_TYPES = (BuildConditional,)
61
+ CONCEPT_TYPES = (BuildConcept,)
62
+ WINDOW_TYPES = (BuildWindowItem,)
63
+
47
64
 
48
65
  class NodeType(Enum):
49
66
  CONCEPT = 1
@@ -208,11 +225,10 @@ def resolve_join_order_v2(
208
225
  return output
209
226
 
210
227
 
211
- def concept_to_relevant_joins(concepts: list[Concept]) -> List[Concept]:
212
- addresses = LooseConceptList(concepts=concepts)
213
- sub_props = LooseConceptList(
228
+ def concept_to_relevant_joins(concepts: list[BuildConcept]) -> List[BuildConcept]:
229
+ sub_props = LooseBuildConceptList(
214
230
  concepts=[
215
- x for x in concepts if x.keys and all([key in addresses for key in x.keys])
231
+ x for x in concepts if x.keys and all([key in concepts for key in x.keys])
216
232
  ]
217
233
  )
218
234
  final = [c for c in concepts if c.address not in sub_props]
@@ -233,7 +249,7 @@ def create_log_lambda(prefix: str, depth: int, logger: Logger):
233
249
 
234
250
 
235
251
  def calculate_graph_relevance(
236
- g: nx.DiGraph, subset_nodes: set[str], concepts: set[Concept]
252
+ g: nx.DiGraph, subset_nodes: set[str], concepts: set[BuildConcept]
237
253
  ) -> int:
238
254
  """Calculate the relevance of each node in a graph
239
255
  Relevance is used to prune irrelevant nodes from the graph
@@ -268,10 +284,10 @@ def calculate_graph_relevance(
268
284
 
269
285
  def add_node_join_concept(
270
286
  graph: nx.DiGraph,
271
- concept: Concept,
272
- concept_map: dict[str, Concept],
287
+ concept: BuildConcept,
288
+ concept_map: dict[str, BuildConcept],
273
289
  ds_node: str,
274
- environment: Environment,
290
+ environment: BuildEnvironment,
275
291
  ):
276
292
  name = f"c~{concept.address}"
277
293
  graph.add_node(name, type=NodeType.CONCEPT)
@@ -294,8 +310,8 @@ def add_node_join_concept(
294
310
 
295
311
 
296
312
  def resolve_instantiated_concept(
297
- concept: Concept, datasource: QueryDatasource | Datasource
298
- ) -> Concept:
313
+ concept: BuildConcept, datasource: QueryDatasource | BuildDatasource
314
+ ) -> BuildConcept:
299
315
  if concept.address in datasource.output_concepts:
300
316
  return concept
301
317
  for k in concept.pseudonyms:
@@ -333,14 +349,14 @@ def reduce_concept_pairs(input: list[ConceptPair]) -> list[ConceptPair]:
333
349
 
334
350
 
335
351
  def get_node_joins(
336
- datasources: List[QueryDatasource | Datasource],
337
- environment: Environment,
352
+ datasources: List[QueryDatasource | BuildDatasource],
353
+ environment: BuildEnvironment,
338
354
  # concepts:List[Concept],
339
355
  ) -> List[BaseJoin]:
340
356
  graph = nx.Graph()
341
357
  partials: dict[str, list[str]] = {}
342
- ds_node_map: dict[str, QueryDatasource | Datasource] = {}
343
- concept_map: dict[str, Concept] = {}
358
+ ds_node_map: dict[str, QueryDatasource | BuildDatasource] = {}
359
+ concept_map: dict[str, BuildConcept] = {}
344
360
  for datasource in datasources:
345
361
  ds_node = f"ds~{datasource.identifier}"
346
362
  ds_node_map[ds_node] = datasource
@@ -367,7 +383,7 @@ def get_node_joins(
367
383
  concepts=[] if not j.keys else None,
368
384
  concept_pairs=reduce_concept_pairs(
369
385
  [
370
- ConceptPair(
386
+ ConceptPair.model_construct(
371
387
  left=resolve_instantiated_concept(
372
388
  concept_map[concept], ds_node_map[k]
373
389
  ),
@@ -386,7 +402,7 @@ def get_node_joins(
386
402
 
387
403
 
388
404
  def get_disconnected_components(
389
- concept_map: Dict[str, Set[Concept]]
405
+ concept_map: Dict[str, Set[BuildConcept]]
390
406
  ) -> Tuple[int, List]:
391
407
  """Find if any of the datasources are not linked"""
392
408
  import networkx as nx
@@ -414,18 +430,18 @@ def is_scalar_condition(
414
430
  | date
415
431
  | datetime
416
432
  | list[Any]
417
- | WindowItem
418
- | FilterItem
419
- | Concept
420
- | Comparison
421
- | Conditional
422
- | Parenthetical
423
- | Function
424
- | AggregateWrapper
433
+ | BuildConcept
434
+ | BuildWindowItem
435
+ | BuildFilterItem
436
+ | BuildConditional
437
+ | BuildComparison
438
+ | BuildParenthetical
439
+ | BuildFunction
440
+ | BuildAggregateWrapper
441
+ | BuildCaseWhen
442
+ | BuildCaseElse
425
443
  | MagicConstants
426
444
  | DataType
427
- | CaseWhen
428
- | CaseElse
429
445
  | MapWrapper[Any, Any]
430
446
  | ListType
431
447
  | MapType
@@ -436,64 +452,76 @@ def is_scalar_condition(
436
452
  ),
437
453
  materialized: set[str] | None = None,
438
454
  ) -> bool:
439
- if isinstance(element, Parenthetical):
455
+ if isinstance(element, PARENTHETICAL_TYPES):
440
456
  return is_scalar_condition(element.content, materialized)
441
- elif isinstance(element, SubselectComparison):
457
+ elif isinstance(element, SUBSELECT_TYPES):
442
458
  return True
443
- elif isinstance(element, Comparison):
459
+ elif isinstance(element, COMPARISON_TYPES):
444
460
  return is_scalar_condition(element.left, materialized) and is_scalar_condition(
445
461
  element.right, materialized
446
462
  )
447
- elif isinstance(element, Function):
463
+ elif isinstance(element, FUNCTION_TYPES):
448
464
  if element.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
449
465
  return False
450
466
  return all([is_scalar_condition(x, materialized) for x in element.arguments])
451
- elif isinstance(element, Concept):
467
+ elif isinstance(element, CONCEPT_TYPES):
452
468
  if materialized and element.address in materialized:
453
469
  return True
454
- if element.lineage and isinstance(element.lineage, AggregateWrapper):
470
+ if element.lineage and isinstance(element.lineage, AGGREGATE_TYPES):
455
471
  return is_scalar_condition(element.lineage, materialized)
456
- if element.lineage and isinstance(element.lineage, Function):
472
+ if element.lineage and isinstance(element.lineage, FUNCTION_TYPES):
457
473
  return is_scalar_condition(element.lineage, materialized)
458
474
  return True
459
- elif isinstance(element, AggregateWrapper):
475
+ elif isinstance(element, AGGREGATE_TYPES):
460
476
  return is_scalar_condition(element.function, materialized)
461
- elif isinstance(element, Conditional):
477
+ elif isinstance(element, CONDITIONAL_TYPES):
462
478
  return is_scalar_condition(element.left, materialized) and is_scalar_condition(
463
479
  element.right, materialized
464
480
  )
465
- elif isinstance(element, CaseWhen):
481
+ elif isinstance(element, (BuildCaseWhen,)):
466
482
  return is_scalar_condition(
467
483
  element.comparison, materialized
468
484
  ) and is_scalar_condition(element.expr, materialized)
469
- elif isinstance(element, CaseElse):
485
+ elif isinstance(element, (BuildCaseElse,)):
470
486
  return is_scalar_condition(element.expr, materialized)
471
487
  elif isinstance(element, MagicConstants):
472
488
  return True
473
489
  return True
474
490
 
475
491
 
492
+ CONDITION_TYPES = (
493
+ BuildSubselectComparison,
494
+ BuildComparison,
495
+ BuildConditional,
496
+ BuildParenthetical,
497
+ )
498
+
499
+
476
500
  def decompose_condition(
477
- conditional: Conditional | Comparison | Parenthetical,
478
- ) -> list[SubselectComparison | Comparison | Conditional | Parenthetical]:
479
- chunks: list[SubselectComparison | Comparison | Conditional | Parenthetical] = []
480
- if not isinstance(conditional, Conditional):
501
+ conditional: BuildConditional | BuildComparison | BuildParenthetical,
502
+ ) -> list[
503
+ BuildSubselectComparison | BuildComparison | BuildConditional | BuildParenthetical
504
+ ]:
505
+ chunks: list[
506
+ BuildSubselectComparison
507
+ | BuildComparison
508
+ | BuildConditional
509
+ | BuildParenthetical
510
+ ] = []
511
+ if not isinstance(conditional, BuildConditional):
481
512
  return [conditional]
482
513
  if conditional.operator == BooleanOperator.AND:
483
514
  if not (
484
- isinstance(
485
- conditional.left,
486
- (SubselectComparison, Comparison, Conditional, Parenthetical),
487
- )
515
+ isinstance(conditional.left, CONDITION_TYPES)
488
516
  and isinstance(
489
517
  conditional.right,
490
- (SubselectComparison, Comparison, Conditional, Parenthetical),
518
+ CONDITION_TYPES,
491
519
  )
492
520
  ):
493
521
  chunks.append(conditional)
494
522
  else:
495
523
  for val in [conditional.left, conditional.right]:
496
- if isinstance(val, Conditional):
524
+ if isinstance(val, BuildConditional):
497
525
  chunks.extend(decompose_condition(val))
498
526
  else:
499
527
  chunks.append(val)
@@ -503,8 +531,8 @@ def decompose_condition(
503
531
 
504
532
 
505
533
  def find_nullable_concepts(
506
- source_map: Dict[str, set[Datasource | QueryDatasource | UnnestJoin]],
507
- datasources: List[Datasource | QueryDatasource],
534
+ source_map: Dict[str, set[BuildDatasource | QueryDatasource | UnnestJoin]],
535
+ datasources: List[BuildDatasource | QueryDatasource],
508
536
  joins: List[BaseJoin | UnnestJoin],
509
537
  ) -> List[str]:
510
538
  """give a set of datasources and joins, find the concepts
@@ -514,7 +542,7 @@ def find_nullable_concepts(
514
542
  datasource_map = {
515
543
  x.identifier: x
516
544
  for x in datasources
517
- if isinstance(x, (Datasource, QueryDatasource))
545
+ if isinstance(x, (BuildDatasource, QueryDatasource))
518
546
  }
519
547
  for join in joins:
520
548
  is_on_nullable_condition = False
@@ -7,32 +7,40 @@ from trilogy.core.constants import CONSTANT_DATASET
7
7
  from trilogy.core.enums import BooleanOperator, SourceType
8
8
  from trilogy.core.env_processor import generate_graph
9
9
  from trilogy.core.ergonomics import generate_cte_names
10
- from trilogy.core.models import (
10
+ from trilogy.core.models.author import MultiSelectLineage, SelectLineage
11
+ from trilogy.core.models.build import (
12
+ BuildConcept,
13
+ BuildConditional,
14
+ BuildDatasource,
15
+ Factory,
16
+ )
17
+ from trilogy.core.models.environment import Environment
18
+ from trilogy.core.models.execute import (
11
19
  CTE,
12
20
  BaseJoin,
13
- Concept,
14
- ConceptDeclarationStatement,
15
- Conditional,
16
- CopyStatement,
17
21
  CTEConceptPair,
18
- Datasource,
19
- Environment,
20
22
  InstantiatedUnnestJoin,
21
23
  Join,
22
- MaterializedDataset,
23
- MultiSelectStatement,
24
- PersistStatement,
25
- ProcessedCopyStatement,
26
- ProcessedQuery,
27
- ProcessedQueryPersist,
28
24
  QueryDatasource,
29
- SelectStatement,
30
25
  UnionCTE,
31
26
  UnnestJoin,
32
27
  )
33
28
  from trilogy.core.optimization import optimize_ctes
34
29
  from trilogy.core.processing.concept_strategies_v3 import source_query_concepts
35
30
  from trilogy.core.processing.nodes import History, SelectNode, StrategyNode
31
+ from trilogy.core.statements.author import (
32
+ ConceptDeclarationStatement,
33
+ CopyStatement,
34
+ MultiSelectStatement,
35
+ PersistStatement,
36
+ SelectStatement,
37
+ )
38
+ from trilogy.core.statements.common import MaterializedDataset
39
+ from trilogy.core.statements.execute import (
40
+ ProcessedCopyStatement,
41
+ ProcessedQuery,
42
+ ProcessedQueryPersist,
43
+ )
36
44
  from trilogy.hooks.base_hook import BaseHook
37
45
  from trilogy.utility import unique
38
46
 
@@ -50,14 +58,19 @@ def base_join_to_join(
50
58
  alias=base_join.alias,
51
59
  )
52
60
 
53
- def get_datasource_cte(datasource: Datasource | QueryDatasource) -> CTE:
61
+ def get_datasource_cte(datasource: BuildDatasource | QueryDatasource) -> CTE:
62
+ eligible = set()
54
63
  for cte in ctes:
55
64
  if cte.source.identifier == datasource.identifier:
56
65
  return cte
66
+ eligible.add(cte.source.identifier)
57
67
  for cte in ctes:
58
68
  if cte.source.datasources[0].identifier == datasource.identifier:
59
69
  return cte
60
- raise ValueError(f"Could not find CTE for datasource {datasource.identifier}")
70
+ eligible.add(cte.source.datasources[0].identifier)
71
+ raise ValueError(
72
+ f"Could not find CTE for datasource {datasource.identifier}; have {eligible}"
73
+ )
61
74
 
62
75
  if base_join.left_datasource is not None:
63
76
  left_cte = get_datasource_cte(base_join.left_datasource)
@@ -114,7 +127,7 @@ def generate_source_map(
114
127
  and isinstance(list(qdv)[0], UnnestJoin)
115
128
  ):
116
129
  source_map[qdk] = []
117
- basic = [x for x in qdv if isinstance(x, Datasource)]
130
+ basic = [x for x in qdv if isinstance(x, BuildDatasource)]
118
131
  for base in basic:
119
132
  source_map[qdk].append(base.safe_identifier)
120
133
 
@@ -163,8 +176,8 @@ def generate_source_map(
163
176
  }, existence_source_map
164
177
 
165
178
 
166
- def datasource_to_query_datasource(datasource: Datasource) -> QueryDatasource:
167
- sub_select: Dict[str, Set[Union[Datasource, QueryDatasource, UnnestJoin]]] = {
179
+ def datasource_to_query_datasource(datasource: BuildDatasource) -> QueryDatasource:
180
+ sub_select: Dict[str, Set[Union[BuildDatasource, QueryDatasource, UnnestJoin]]] = {
168
181
  **{c.address: {datasource} for c in datasource.concepts},
169
182
  }
170
183
  concepts = [c for c in datasource.concepts]
@@ -206,7 +219,7 @@ def resolve_cte_base_name_and_alias_v2(
206
219
  raw_joins: List[Join | InstantiatedUnnestJoin],
207
220
  ) -> Tuple[str | None, str | None]:
208
221
  if (
209
- isinstance(source.datasources[0], Datasource)
222
+ isinstance(source.datasources[0], BuildDatasource)
210
223
  and not source.datasources[0].name == CONSTANT_DATASET
211
224
  ):
212
225
  ds = source.datasources[0]
@@ -265,6 +278,7 @@ def datasource_to_cte(
265
278
  for c in query_datasource.output_concepts
266
279
  ],
267
280
  grain=direct_parents[0].grain,
281
+ order_by=query_datasource.ordering,
268
282
  )
269
283
  return final
270
284
 
@@ -333,6 +347,7 @@ def datasource_to_cte(
333
347
  hidden_concepts=query_datasource.hidden_concepts,
334
348
  base_name_override=base_name,
335
349
  base_alias_override=base_alias,
350
+ order_by=query_datasource.ordering,
336
351
  )
337
352
  if cte.grain != query_datasource.grain:
338
353
  raise ValueError("Grain was corrupted in CTE generation")
@@ -351,26 +366,30 @@ def datasource_to_cte(
351
366
 
352
367
  def get_query_node(
353
368
  environment: Environment,
354
- statement: SelectStatement | MultiSelectStatement,
369
+ statement: SelectLineage | MultiSelectLineage,
355
370
  history: History | None = None,
356
371
  ) -> StrategyNode:
357
- environment = environment.duplicate()
358
- for k, v in statement.local_concepts.items():
359
- environment.concepts[k] = v
360
- graph = generate_graph(environment)
361
- logger.info(
362
- f"{LOGGER_PREFIX} getting source datasource for query with filtering {statement.where_clause_category} and grain {statement.grain}"
363
- )
364
372
  if not statement.output_components:
365
373
  raise ValueError(f"Statement has no output components {statement}")
366
374
 
367
- search_concepts: list[Concept] = statement.output_components
375
+ history = history or History(base_environment=environment)
376
+ build_statement = Factory(environment=environment).build(statement)
377
+ # build_statement = statement
378
+ build_environment = environment.materialize_for_select(
379
+ build_statement.local_concepts
380
+ )
381
+ graph = generate_graph(build_environment)
382
+ logger.info(
383
+ f"{LOGGER_PREFIX} getting source datasource for outputs {statement.output_components} grain {build_statement.grain}"
384
+ )
385
+
386
+ search_concepts: list[BuildConcept] = build_statement.output_components
368
387
 
369
388
  ods: StrategyNode = source_query_concepts(
370
- search_concepts,
371
- environment=environment,
389
+ output_concepts=search_concepts,
390
+ environment=build_environment,
372
391
  g=graph,
373
- conditions=(statement.where_clause if statement.where_clause else None),
392
+ conditions=build_statement.where_clause,
374
393
  history=history,
375
394
  )
376
395
  if not ods:
@@ -378,22 +397,26 @@ def get_query_node(
378
397
  f"Could not find source query concepts for {[x.address for x in search_concepts]}"
379
398
  )
380
399
  ds: StrategyNode = ods
381
- if statement.having_clause:
382
- final = statement.having_clause.conditional
400
+ if build_statement.having_clause:
401
+ final = build_statement.having_clause.conditional
383
402
  if ds.conditions:
384
- final = Conditional(
403
+ final = BuildConditional(
385
404
  left=ds.conditions,
386
- right=statement.having_clause.conditional,
405
+ right=build_statement.having_clause.conditional,
387
406
  operator=BooleanOperator.AND,
388
407
  )
389
408
  ds = SelectNode(
390
- output_concepts=statement.output_components,
409
+ output_concepts=build_statement.output_components,
391
410
  input_concepts=ds.output_concepts,
392
411
  parents=[ds],
393
412
  environment=ds.environment,
394
413
  partial_concepts=ds.partial_concepts,
395
414
  conditions=final,
396
415
  )
416
+ ds.hidden_concepts = build_statement.hidden_components
417
+ ds.ordering = build_statement.order_by
418
+ # TODO: avoid this
419
+ ds.rebuild_cache()
397
420
  return ds
398
421
 
399
422
 
@@ -402,8 +425,7 @@ def get_query_datasources(
402
425
  statement: SelectStatement | MultiSelectStatement,
403
426
  hooks: Optional[List[BaseHook]] = None,
404
427
  ) -> QueryDatasource:
405
-
406
- ds = get_query_node(environment, statement)
428
+ ds = get_query_node(environment, statement.as_lineage(environment))
407
429
  final_qds = ds.resolve()
408
430
  if hooks:
409
431
  for hook in hooks:
@@ -498,22 +520,18 @@ def process_query(
498
520
  for cte in raw_ctes:
499
521
  cte.parent_ctes = [seen[x.name] for x in cte.parent_ctes]
500
522
  deduped_ctes: List[CTE | UnionCTE] = list(seen.values())
501
- root_cte.order_by = statement.order_by
523
+
502
524
  root_cte.limit = statement.limit
503
525
  root_cte.hidden_concepts = statement.hidden_components
504
526
 
505
527
  final_ctes = optimize_ctes(deduped_ctes, root_cte, statement)
528
+ mapping = {x.address: x for x in cte.output_columns}
506
529
  return ProcessedQuery(
507
- order_by=statement.order_by,
508
- grain=statement.grain,
530
+ order_by=root_cte.order_by,
509
531
  limit=statement.limit,
510
- where_clause=statement.where_clause,
511
- having_clause=statement.having_clause,
512
- output_columns=statement.output_components,
532
+ output_columns=[mapping[x.address] for x in statement.output_components],
513
533
  ctes=final_ctes,
514
534
  base=root_cte,
515
- # we no longer do any joins at final level, this should always happen in parent CTEs
516
- joins=[],
517
535
  hidden_columns=set([x for x in statement.hidden_components]),
518
536
  local_concepts=statement.local_concepts,
519
537
  )
File without changes