pytrilogy 0.0.2.47__py3-none-any.whl → 0.0.2.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (69) hide show
  1. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/METADATA +1 -1
  2. pytrilogy-0.0.2.49.dist-info/RECORD +85 -0
  3. trilogy/__init__.py +2 -2
  4. trilogy/constants.py +4 -2
  5. trilogy/core/enums.py +7 -1
  6. trilogy/core/env_processor.py +1 -2
  7. trilogy/core/environment_helpers.py +5 -5
  8. trilogy/core/functions.py +11 -10
  9. trilogy/core/internal.py +2 -3
  10. trilogy/core/models.py +449 -393
  11. trilogy/core/optimization.py +37 -21
  12. trilogy/core/optimizations/__init__.py +1 -1
  13. trilogy/core/optimizations/base_optimization.py +6 -6
  14. trilogy/core/optimizations/inline_constant.py +7 -4
  15. trilogy/core/optimizations/inline_datasource.py +14 -5
  16. trilogy/core/optimizations/predicate_pushdown.py +20 -10
  17. trilogy/core/processing/concept_strategies_v3.py +43 -24
  18. trilogy/core/processing/graph_utils.py +2 -3
  19. trilogy/core/processing/node_generators/__init__.py +7 -5
  20. trilogy/core/processing/node_generators/basic_node.py +4 -4
  21. trilogy/core/processing/node_generators/common.py +10 -11
  22. trilogy/core/processing/node_generators/filter_node.py +7 -9
  23. trilogy/core/processing/node_generators/group_node.py +10 -11
  24. trilogy/core/processing/node_generators/group_to_node.py +5 -5
  25. trilogy/core/processing/node_generators/multiselect_node.py +10 -12
  26. trilogy/core/processing/node_generators/node_merge_node.py +7 -9
  27. trilogy/core/processing/node_generators/rowset_node.py +36 -15
  28. trilogy/core/processing/node_generators/select_merge_node.py +11 -10
  29. trilogy/core/processing/node_generators/select_node.py +5 -5
  30. trilogy/core/processing/node_generators/union_node.py +75 -0
  31. trilogy/core/processing/node_generators/unnest_node.py +2 -3
  32. trilogy/core/processing/node_generators/window_node.py +3 -4
  33. trilogy/core/processing/nodes/__init__.py +9 -5
  34. trilogy/core/processing/nodes/base_node.py +45 -13
  35. trilogy/core/processing/nodes/filter_node.py +3 -4
  36. trilogy/core/processing/nodes/group_node.py +17 -13
  37. trilogy/core/processing/nodes/merge_node.py +14 -12
  38. trilogy/core/processing/nodes/select_node_v2.py +13 -9
  39. trilogy/core/processing/nodes/union_node.py +50 -0
  40. trilogy/core/processing/nodes/unnest_node.py +2 -3
  41. trilogy/core/processing/nodes/window_node.py +2 -3
  42. trilogy/core/processing/utility.py +38 -41
  43. trilogy/core/query_processor.py +71 -51
  44. trilogy/dialect/base.py +95 -53
  45. trilogy/dialect/bigquery.py +2 -3
  46. trilogy/dialect/common.py +5 -4
  47. trilogy/dialect/config.py +0 -2
  48. trilogy/dialect/duckdb.py +2 -2
  49. trilogy/dialect/enums.py +5 -5
  50. trilogy/dialect/postgres.py +2 -2
  51. trilogy/dialect/presto.py +3 -4
  52. trilogy/dialect/snowflake.py +2 -2
  53. trilogy/dialect/sql_server.py +3 -4
  54. trilogy/engine.py +2 -1
  55. trilogy/executor.py +43 -30
  56. trilogy/hooks/base_hook.py +5 -4
  57. trilogy/hooks/graph_hook.py +2 -1
  58. trilogy/hooks/query_debugger.py +18 -8
  59. trilogy/parsing/common.py +15 -20
  60. trilogy/parsing/parse_engine.py +125 -88
  61. trilogy/parsing/render.py +32 -35
  62. trilogy/parsing/trilogy.lark +8 -1
  63. trilogy/scripts/trilogy.py +6 -4
  64. trilogy/utility.py +1 -1
  65. pytrilogy-0.0.2.47.dist-info/RECORD +0 -83
  66. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/LICENSE.md +0 -0
  67. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/WHEEL +0 -0
  68. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/entry_points.txt +0 -0
  69. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,50 @@
1
+ from typing import List
2
+
3
+ from trilogy.core.models import (
4
+ Concept,
5
+ QueryDatasource,
6
+ SourceType,
7
+ )
8
+ from trilogy.core.processing.nodes.base_node import StrategyNode
9
+
10
+
11
+ class UnionNode(StrategyNode):
12
+ """Union nodes represent combining two keyspaces"""
13
+
14
+ source_type = SourceType.UNION
15
+
16
+ def __init__(
17
+ self,
18
+ input_concepts: List[Concept],
19
+ output_concepts: List[Concept],
20
+ environment,
21
+ g,
22
+ whole_grain: bool = False,
23
+ parents: List["StrategyNode"] | None = None,
24
+ depth: int = 0,
25
+ ):
26
+ super().__init__(
27
+ input_concepts=input_concepts,
28
+ output_concepts=output_concepts,
29
+ environment=environment,
30
+ g=g,
31
+ whole_grain=whole_grain,
32
+ parents=parents,
33
+ depth=depth,
34
+ )
35
+
36
+ def _resolve(self) -> QueryDatasource:
37
+ """We need to ensure that any filtered values are removed from the output to avoid inappropriate references"""
38
+ base = super()._resolve()
39
+ return base
40
+
41
+ def copy(self) -> "UnionNode":
42
+ return UnionNode(
43
+ input_concepts=list(self.input_concepts),
44
+ output_concepts=list(self.output_concepts),
45
+ environment=self.environment,
46
+ g=self.g,
47
+ whole_grain=self.whole_grain,
48
+ parents=self.parents,
49
+ depth=self.depth,
50
+ )
@@ -1,12 +1,11 @@
1
1
  from typing import List
2
2
 
3
-
4
3
  from trilogy.core.models import (
4
+ Concept,
5
+ Function,
5
6
  QueryDatasource,
6
7
  SourceType,
7
- Concept,
8
8
  UnnestJoin,
9
- Function,
10
9
  )
11
10
  from trilogy.core.processing.nodes.base_node import StrategyNode
12
11
 
@@ -1,8 +1,7 @@
1
1
  from typing import List
2
2
 
3
-
4
- from trilogy.core.models import SourceType, Concept
5
- from trilogy.core.processing.nodes.base_node import StrategyNode, QueryDatasource
3
+ from trilogy.core.models import Concept, SourceType
4
+ from trilogy.core.processing.nodes.base_node import QueryDatasource, StrategyNode
6
5
 
7
6
 
8
7
  class WindowNode(StrategyNode):
@@ -1,52 +1,48 @@
1
- from typing import List, Tuple, Dict, Set, Any
1
+ from dataclasses import dataclass
2
+ from enum import Enum
3
+ from logging import Logger
4
+ from typing import Any, Dict, List, Set, Tuple
5
+
2
6
  import networkx as nx
7
+
8
+ from trilogy.core.enums import BooleanOperator, FunctionClass, Granularity, Purpose
3
9
  from trilogy.core.models import (
4
- Datasource,
5
- JoinType,
10
+ CTE,
11
+ AggregateWrapper,
6
12
  BaseJoin,
13
+ CaseElse,
14
+ CaseWhen,
15
+ Comparison,
7
16
  Concept,
8
- QueryDatasource,
9
- LooseConceptList,
10
- Environment,
17
+ ConceptPair,
11
18
  Conditional,
12
- SubselectComparison,
13
- Comparison,
14
- Parenthetical,
15
- Function,
16
- FilterItem,
17
- MagicConstants,
18
- WindowItem,
19
- AggregateWrapper,
19
+ Datasource,
20
20
  DataType,
21
- ConceptPair,
22
- UnnestJoin,
23
- CaseWhen,
24
- CaseElse,
25
- MapWrapper,
26
- ListWrapper,
27
- MapType,
28
21
  DatePart,
29
- NumericType,
22
+ Environment,
23
+ FilterItem,
24
+ Function,
25
+ JoinType,
30
26
  ListType,
31
- TupleWrapper,
32
- CTE,
27
+ ListWrapper,
28
+ LooseConceptList,
29
+ MagicConstants,
30
+ MapType,
31
+ MapWrapper,
33
32
  MultiSelectStatement,
34
- SelectStatement,
33
+ NumericType,
34
+ Parenthetical,
35
35
  ProcessedQuery,
36
+ QueryDatasource,
37
+ SelectStatement,
38
+ SubselectComparison,
39
+ TupleWrapper,
40
+ UnionCTE,
41
+ UnnestJoin,
42
+ WindowItem,
36
43
  )
37
-
38
- from trilogy.core.enums import Purpose, Granularity, BooleanOperator
39
- from enum import Enum
40
44
  from trilogy.utility import unique
41
45
 
42
- from logging import Logger
43
-
44
-
45
- from trilogy.core.enums import FunctionClass
46
-
47
-
48
- from dataclasses import dataclass
49
-
50
46
 
51
47
  class NodeType(Enum):
52
48
  CONCEPT = 1
@@ -218,7 +214,7 @@ def concept_to_relevant_joins(concepts: list[Concept]) -> List[Concept]:
218
214
  x for x in concepts if x.keys and all([key in addresses for key in x.keys])
219
215
  ]
220
216
  )
221
- final = [c for c in concepts if c not in sub_props]
217
+ final = [c for c in concepts if c.address not in sub_props]
222
218
  return unique(final, "address")
223
219
 
224
220
 
@@ -314,7 +310,6 @@ def get_node_joins(
314
310
  environment: Environment,
315
311
  # concepts:List[Concept],
316
312
  ):
317
-
318
313
  graph = nx.Graph()
319
314
  partials: dict[str, list[str]] = {}
320
315
  ds_node_map: dict[str, QueryDatasource | Datasource] = {}
@@ -536,7 +531,9 @@ def find_nullable_concepts(
536
531
  return list(sorted(final_nullable))
537
532
 
538
533
 
539
- def sort_select_output_processed(cte: CTE, query: ProcessedQuery) -> CTE:
534
+ def sort_select_output_processed(
535
+ cte: CTE | UnionCTE, query: ProcessedQuery
536
+ ) -> CTE | UnionCTE:
540
537
  hidden_addresses = [c.address for c in query.hidden_columns]
541
538
  output_addresses = [
542
539
  c.address for c in query.output_columns if c.address not in hidden_addresses
@@ -552,8 +549,8 @@ def sort_select_output_processed(cte: CTE, query: ProcessedQuery) -> CTE:
552
549
 
553
550
 
554
551
  def sort_select_output(
555
- cte: CTE, query: SelectStatement | MultiSelectStatement | ProcessedQuery
556
- ) -> CTE:
552
+ cte: CTE | UnionCTE, query: SelectStatement | MultiSelectStatement | ProcessedQuery
553
+ ) -> CTE | UnionCTE:
557
554
  if isinstance(query, ProcessedQuery):
558
555
  return sort_select_output_processed(cte, query)
559
556
  hidden_addresses = [c.address for c in query.hidden_components]
@@ -1,43 +1,40 @@
1
- from typing import List, Optional, Set, Union, Dict, Tuple
1
+ from collections import defaultdict
2
+ from math import ceil
3
+ from typing import Dict, List, Optional, Set, Tuple, Union
2
4
 
3
- from trilogy.core.env_processor import generate_graph
4
- from trilogy.core.graph_models import ReferenceGraph
5
+ from trilogy.constants import CONFIG, logger
5
6
  from trilogy.core.constants import CONSTANT_DATASET
6
- from trilogy.core.processing.concept_strategies_v3 import source_query_concepts
7
- from trilogy.core.enums import BooleanOperator
8
- from trilogy.constants import CONFIG
9
- from trilogy.core.processing.nodes import SelectNode, StrategyNode, History
7
+ from trilogy.core.enums import BooleanOperator, SourceType
8
+ from trilogy.core.env_processor import generate_graph
9
+ from trilogy.core.ergonomics import generate_cte_names
10
10
  from trilogy.core.models import (
11
+ CTE,
12
+ BaseJoin,
11
13
  Concept,
12
- Environment,
13
- PersistStatement,
14
14
  ConceptDeclarationStatement,
15
- SelectStatement,
16
- MultiSelectStatement,
17
- CTE,
15
+ Conditional,
16
+ CopyStatement,
17
+ CTEConceptPair,
18
+ Datasource,
19
+ Environment,
20
+ InstantiatedUnnestJoin,
18
21
  Join,
19
- UnnestJoin,
20
22
  MaterializedDataset,
23
+ MultiSelectStatement,
24
+ PersistStatement,
25
+ ProcessedCopyStatement,
21
26
  ProcessedQuery,
22
27
  ProcessedQueryPersist,
23
28
  QueryDatasource,
24
- Datasource,
25
- BaseJoin,
26
- InstantiatedUnnestJoin,
27
- Conditional,
28
- ProcessedCopyStatement,
29
- CopyStatement,
30
- CTEConceptPair,
29
+ SelectStatement,
30
+ UnionCTE,
31
+ UnnestJoin,
31
32
  )
32
-
33
- from trilogy.utility import unique
34
-
35
- from trilogy.hooks.base_hook import BaseHook
36
- from trilogy.constants import logger
37
- from trilogy.core.ergonomics import generate_cte_names
38
33
  from trilogy.core.optimization import optimize_ctes
39
- from math import ceil
40
- from collections import defaultdict
34
+ from trilogy.core.processing.concept_strategies_v3 import source_query_concepts
35
+ from trilogy.core.processing.nodes import History, SelectNode, StrategyNode
36
+ from trilogy.hooks.base_hook import BaseHook
37
+ from trilogy.utility import unique
41
38
 
42
39
  LOGGER_PREFIX = "[QUERY BUILD]"
43
40
 
@@ -103,7 +100,7 @@ def base_join_to_join(
103
100
 
104
101
 
105
102
  def generate_source_map(
106
- query_datasource: QueryDatasource, all_new_ctes: List[CTE]
103
+ query_datasource: QueryDatasource, all_new_ctes: List[CTE | UnionCTE]
107
104
  ) -> Tuple[Dict[str, list[str]], Dict[str, list[str]]]:
108
105
  source_map: Dict[str, list[str]] = defaultdict(list)
109
106
  # now populate anything derived in this level
@@ -246,24 +243,44 @@ def resolve_cte_base_name_and_alias_v2(
246
243
  return None, None
247
244
 
248
245
 
249
- def datasource_to_ctes(
246
+ def datasource_to_cte(
250
247
  query_datasource: QueryDatasource, name_map: dict[str, str]
251
- ) -> List[CTE]:
252
- output: List[CTE] = []
253
- parents: list[CTE] = []
248
+ ) -> CTE | UnionCTE:
249
+ parents: list[CTE | UnionCTE] = []
250
+ if query_datasource.source_type == SourceType.UNION:
251
+ direct_parents: list[CTE | UnionCTE] = []
252
+ for child in query_datasource.datasources:
253
+ assert isinstance(child, QueryDatasource)
254
+ child_cte = datasource_to_cte(child, name_map=name_map)
255
+ direct_parents.append(child_cte)
256
+ parents += child_cte.parent_ctes
257
+ human_id = generate_cte_name(query_datasource.identifier, name_map)
258
+ final = UnionCTE(
259
+ name=human_id,
260
+ source=query_datasource,
261
+ parent_ctes=parents,
262
+ internal_ctes=direct_parents,
263
+ output_columns=[
264
+ c.with_grain(query_datasource.grain)
265
+ for c in query_datasource.output_concepts
266
+ ],
267
+ grain=direct_parents[0].grain,
268
+ )
269
+ return final
270
+
254
271
  if len(query_datasource.datasources) > 1 or any(
255
272
  [isinstance(x, QueryDatasource) for x in query_datasource.datasources]
256
273
  ):
257
- all_new_ctes: List[CTE] = []
274
+ all_new_ctes: List[CTE | UnionCTE] = []
258
275
  for datasource in query_datasource.datasources:
259
276
  if isinstance(datasource, QueryDatasource):
260
277
  sub_datasource = datasource
261
278
  else:
262
279
  sub_datasource = datasource_to_query_datasource(datasource)
263
280
 
264
- sub_cte = datasource_to_ctes(sub_datasource, name_map)
265
- parents += sub_cte
266
- all_new_ctes += sub_cte
281
+ sub_cte = datasource_to_cte(sub_datasource, name_map)
282
+ parents.append(sub_cte)
283
+ all_new_ctes.append(sub_cte)
267
284
  source_map, existence_map = generate_source_map(query_datasource, all_new_ctes)
268
285
 
269
286
  else:
@@ -284,7 +301,10 @@ def datasource_to_ctes(
284
301
 
285
302
  human_id = generate_cte_name(query_datasource.identifier, name_map)
286
303
 
287
- final_joins = [base_join_to_join(join, parents) for join in query_datasource.joins]
304
+ final_joins = [
305
+ base_join_to_join(join, [x for x in parents if isinstance(x, CTE)])
306
+ for join in query_datasource.joins
307
+ ]
288
308
 
289
309
  base_name, base_alias = resolve_cte_base_name_and_alias_v2(
290
310
  human_id, query_datasource, source_map, final_joins
@@ -326,17 +346,18 @@ def datasource_to_ctes(
326
346
  f"Missing {x.address} in {cte.source_map}, source map {cte.source.source_map.keys()} "
327
347
  )
328
348
 
329
- output.append(cte)
330
- return output
349
+ return cte
331
350
 
332
351
 
333
352
  def get_query_node(
334
353
  environment: Environment,
335
354
  statement: SelectStatement | MultiSelectStatement,
336
- graph: Optional[ReferenceGraph] = None,
337
355
  history: History | None = None,
338
356
  ) -> StrategyNode:
339
- graph = graph or generate_graph(environment)
357
+ environment = environment.duplicate()
358
+ for k, v in statement.local_concepts.items():
359
+ environment.concepts[k] = v
360
+ graph = generate_graph(environment)
340
361
  logger.info(
341
362
  f"{LOGGER_PREFIX} getting source datasource for query with filtering {statement.where_clause_category} and output {[str(c) for c in statement.output_components]}"
342
363
  )
@@ -380,11 +401,10 @@ def get_query_node(
380
401
  def get_query_datasources(
381
402
  environment: Environment,
382
403
  statement: SelectStatement | MultiSelectStatement,
383
- graph: Optional[ReferenceGraph] = None,
384
404
  hooks: Optional[List[BaseHook]] = None,
385
405
  ) -> QueryDatasource:
386
406
 
387
- ds = get_query_node(environment, statement, graph)
407
+ ds = get_query_node(environment, statement)
388
408
  final_qds = ds.resolve()
389
409
  if hooks:
390
410
  for hook in hooks:
@@ -393,7 +413,7 @@ def get_query_datasources(
393
413
  return final_qds
394
414
 
395
415
 
396
- def flatten_ctes(input: CTE) -> list[CTE]:
416
+ def flatten_ctes(input: CTE | UnionCTE) -> list[CTE | UnionCTE]:
397
417
  output = [input]
398
418
  for cte in input.parent_ctes:
399
419
  output += flatten_ctes(cte)
@@ -456,18 +476,17 @@ def process_query(
456
476
  hooks: List[BaseHook] | None = None,
457
477
  ) -> ProcessedQuery:
458
478
  hooks = hooks or []
459
- statement.refresh_bindings(environment)
460
- graph = generate_graph(environment)
479
+
461
480
  root_datasource = get_query_datasources(
462
- environment=environment, graph=graph, statement=statement, hooks=hooks
481
+ environment=environment, statement=statement, hooks=hooks
463
482
  )
464
483
  for hook in hooks:
465
484
  hook.process_root_datasource(root_datasource)
466
485
  # this should always return 1 - TODO, refactor
467
- root_cte = datasource_to_ctes(root_datasource, environment.cte_name_map)[0]
486
+ root_cte = datasource_to_cte(root_datasource, environment.cte_name_map)
468
487
  for hook in hooks:
469
488
  hook.process_root_cte(root_cte)
470
- raw_ctes: List[CTE] = list(reversed(flatten_ctes(root_cte)))
489
+ raw_ctes: List[CTE | UnionCTE] = list(reversed(flatten_ctes(root_cte)))
471
490
  seen = dict()
472
491
  # we can have duplicate CTEs at this point
473
492
  # so merge them together
@@ -479,7 +498,7 @@ def process_query(
479
498
  seen[cte.name] = seen[cte.name] + cte
480
499
  for cte in raw_ctes:
481
500
  cte.parent_ctes = [seen[x.name] for x in cte.parent_ctes]
482
- deduped_ctes: List[CTE] = list(seen.values())
501
+ deduped_ctes: List[CTE | UnionCTE] = list(seen.values())
483
502
  root_cte.order_by = statement.order_by
484
503
  root_cte.limit = statement.limit
485
504
  root_cte.hidden_concepts = [x for x in statement.hidden_components]
@@ -497,4 +516,5 @@ def process_query(
497
516
  # we no longer do any joins at final level, this should always happen in parent CTEs
498
517
  joins=[],
499
518
  hidden_columns=[x for x in statement.hidden_components],
519
+ local_concepts=statement.local_concepts,
500
520
  )
trilogy/dialect/base.py CHANGED
@@ -1,66 +1,67 @@
1
- from typing import List, Union, Optional, Dict, Any, Sequence, Callable
1
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Union
2
2
 
3
3
  from jinja2 import Template
4
4
 
5
- from trilogy.core.processing.utility import (
6
- is_scalar_condition,
7
- decompose_condition,
8
- sort_select_output,
9
- )
10
- from trilogy.constants import CONFIG, logger, MagicConstants
11
- from trilogy.core.internal import DEFAULT_CONCEPTS
5
+ from trilogy.constants import CONFIG, MagicConstants, logger
12
6
  from trilogy.core.enums import (
7
+ DatePart,
13
8
  FunctionType,
9
+ UnnestMode,
14
10
  WindowType,
15
- DatePart,
16
11
  )
12
+ from trilogy.core.internal import DEFAULT_CONCEPTS
17
13
  from trilogy.core.models import (
18
- ListType,
19
- DataType,
20
- Concept,
21
14
  CTE,
22
- ProcessedQuery,
23
- ProcessedQueryPersist,
24
- ProcessedShowStatement,
15
+ AggregateWrapper,
16
+ CaseElse,
17
+ CaseWhen,
18
+ Comparison,
25
19
  CompiledCTE,
20
+ Concept,
21
+ ConceptDeclarationStatement,
26
22
  Conditional,
27
- Comparison,
28
- SubselectComparison,
29
- OrderItem,
30
- WindowItem,
23
+ CopyStatement,
24
+ Datasource,
25
+ DataType,
26
+ Environment,
31
27
  FilterItem,
32
28
  Function,
33
- AggregateWrapper,
34
- Parenthetical,
35
- CaseWhen,
36
- CaseElse,
37
- SelectStatement,
38
- PersistStatement,
39
- Environment,
40
- RawColumnExpr,
29
+ ImportStatement,
30
+ ListType,
41
31
  ListWrapper,
42
- TupleWrapper,
32
+ MapType,
43
33
  MapWrapper,
44
- ShowStatement,
45
- RowsetItem,
34
+ MergeStatementV2,
46
35
  MultiSelectStatement,
47
- RowsetDerivationStatement,
48
- ConceptDeclarationStatement,
49
- ImportStatement,
50
- RawSQLStatement,
51
- ProcessedRawSQLStatement,
52
36
  NumericType,
53
- MapType,
54
- StructType,
55
- MergeStatementV2,
56
- Datasource,
57
- CopyStatement,
37
+ OrderItem,
38
+ Parenthetical,
39
+ PersistStatement,
58
40
  ProcessedCopyStatement,
41
+ ProcessedQuery,
42
+ ProcessedQueryPersist,
43
+ ProcessedRawSQLStatement,
44
+ ProcessedShowStatement,
45
+ RawColumnExpr,
46
+ RawSQLStatement,
47
+ RowsetDerivationStatement,
48
+ RowsetItem,
49
+ SelectStatement,
50
+ ShowStatement,
51
+ StructType,
52
+ SubselectComparison,
53
+ TupleWrapper,
54
+ UnionCTE,
55
+ WindowItem,
59
56
  )
60
- from trilogy.core.query_processor import process_query, process_persist, process_copy
57
+ from trilogy.core.processing.utility import (
58
+ decompose_condition,
59
+ is_scalar_condition,
60
+ sort_select_output,
61
+ )
62
+ from trilogy.core.query_processor import process_copy, process_persist, process_query
61
63
  from trilogy.dialect.common import render_join, render_unnest
62
64
  from trilogy.hooks.base_hook import BaseHook
63
- from trilogy.core.enums import UnnestMode
64
65
 
65
66
  LOGGER_PREFIX = "[RENDERING]"
66
67
 
@@ -130,7 +131,7 @@ FUNCTION_MAP = {
130
131
  FunctionType.SPLIT: lambda x: f"split({x[0]}, {x[1]})",
131
132
  FunctionType.IS_NULL: lambda x: f"isnull({x[0]})",
132
133
  FunctionType.BOOL: lambda x: f"CASE WHEN {x[0]} THEN TRUE ELSE FALSE END",
133
- # complex
134
+ # Complex
134
135
  FunctionType.INDEX_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
135
136
  FunctionType.MAP_ACCESS: lambda x: f"{x[0]}[{x[1]}][1]",
136
137
  FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
@@ -230,7 +231,7 @@ def safe_quote(string: str, quote_char: str):
230
231
  return ".".join([f"{quote_char}{string}{quote_char}" for string in components])
231
232
 
232
233
 
233
- def safe_get_cte_value(coalesce, cte: CTE, c: Concept, quote_char: str):
234
+ def safe_get_cte_value(coalesce, cte: CTE | UnionCTE, c: Concept, quote_char: str):
234
235
  address = c.address
235
236
  raw = cte.source_map.get(address, None)
236
237
 
@@ -255,15 +256,26 @@ class BaseDialect:
255
256
  UNNEST_MODE = UnnestMode.CROSS_APPLY
256
257
 
257
258
  def render_order_item(
258
- self, order_item: OrderItem, cte: CTE, final: bool = False
259
+ self,
260
+ order_item: OrderItem,
261
+ cte: CTE | UnionCTE,
262
+ final: bool = False,
263
+ alias: bool = True,
259
264
  ) -> str:
260
265
  if final:
266
+ if not alias:
267
+ return f"{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
268
+
261
269
  return f"{cte.name}.{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
262
270
 
263
271
  return f"{self.render_concept_sql(order_item.expr, cte=cte, alias=False)} {order_item.order.value}"
264
272
 
265
273
  def render_concept_sql(
266
- self, c: Concept, cte: CTE, alias: bool = True, raise_invalid: bool = False
274
+ self,
275
+ c: Concept,
276
+ cte: CTE | UnionCTE,
277
+ alias: bool = True,
278
+ raise_invalid: bool = False,
267
279
  ) -> str:
268
280
  result = None
269
281
  if c.pseudonyms:
@@ -290,7 +302,7 @@ class BaseDialect:
290
302
  return result
291
303
 
292
304
  def _render_concept_sql(
293
- self, c: Concept, cte: CTE, raise_invalid: bool = False
305
+ self, c: Concept, cte: CTE | UnionCTE, raise_invalid: bool = False
294
306
  ) -> str:
295
307
  # only recurse while it's in sources of the current cte
296
308
  logger.debug(
@@ -348,6 +360,20 @@ class BaseDialect:
348
360
  " target grain"
349
361
  )
350
362
  rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.function.operator](args)}"
363
+ elif (
364
+ isinstance(c.lineage, Function)
365
+ and c.lineage.operator == FunctionType.UNION
366
+ ):
367
+ local_matched = [
368
+ x
369
+ for x in c.lineage.arguments
370
+ if isinstance(x, Concept) and x.address in cte.output_columns
371
+ ]
372
+ if not local_matched:
373
+ raise SyntaxError(
374
+ "Could not find appropriate source element for union"
375
+ )
376
+ rval = self.render_expr(local_matched[0], cte)
351
377
  elif (
352
378
  isinstance(c.lineage, Function)
353
379
  and c.lineage.operator == FunctionType.CONSTANT
@@ -447,13 +473,11 @@ class BaseDialect:
447
473
  FilterItem,
448
474
  # FilterItem
449
475
  ],
450
- cte: Optional[CTE] = None,
451
- cte_map: Optional[Dict[str, CTE]] = None,
476
+ cte: Optional[CTE | UnionCTE] = None,
477
+ cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
452
478
  raise_invalid: bool = False,
453
479
  ) -> str:
454
-
455
480
  if isinstance(e, SubselectComparison):
456
-
457
481
  if isinstance(e.right, Concept):
458
482
  # we won't always have an existnce map
459
483
  # so fall back to the normal map
@@ -559,6 +583,13 @@ class BaseDialect:
559
583
  elif isinstance(e, FilterItem):
560
584
  return f"CASE WHEN {self.render_expr(e.where.conditional,cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} THEN {self.render_expr(e.content, cte, cte_map=cte_map, raise_invalid=raise_invalid)} ELSE NULL END"
561
585
  elif isinstance(e, Concept):
586
+ if (
587
+ isinstance(e.lineage, Function)
588
+ and e.lineage.operator == FunctionType.CONSTANT
589
+ and CONFIG.rendering.parameters is True
590
+ and e.datatype.data_type != DataType.MAP
591
+ ):
592
+ return f":{e.safe_address}"
562
593
  if cte:
563
594
  return self.render_concept_sql(
564
595
  e, cte, alias=False, raise_invalid=raise_invalid
@@ -592,7 +623,18 @@ class BaseDialect:
592
623
  else:
593
624
  raise ValueError(f"Unable to render type {type(e)} {e}")
594
625
 
595
- def render_cte(self, cte: CTE, auto_sort: bool = True) -> CompiledCTE:
626
+ def render_cte(self, cte: CTE | UnionCTE, auto_sort: bool = True) -> CompiledCTE:
627
+ if isinstance(cte, UnionCTE):
628
+ base_statement = f"\n{cte.operator}\n".join(
629
+ [self.render_cte(child).statement for child in cte.internal_ctes]
630
+ )
631
+ if cte.order_by:
632
+ ordering = [
633
+ self.render_order_item(i, cte, final=True, alias=False)
634
+ for i in cte.order_by.items
635
+ ]
636
+ base_statement += "\nORDER BY " + ",".join(ordering)
637
+ return CompiledCTE(name=cte.name, statement=base_statement)
596
638
  if self.UNNEST_MODE in (
597
639
  UnnestMode.CROSS_APPLY,
598
640
  UnnestMode.CROSS_JOIN,
@@ -863,7 +905,7 @@ class BaseDialect:
863
905
  if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
864
906
  raise ValueError(
865
907
  f"Invalid reference string found in query: {final}, this should never"
866
- " occur. Please report this issue."
908
+ " occur. Please create a GitHub issue to report this."
867
909
  )
868
910
  logger.info(f"{LOGGER_PREFIX} Compiled query: {final}")
869
911
  return final
@@ -1,11 +1,10 @@
1
- from typing import Mapping, Callable, Any
1
+ from typing import Any, Callable, Mapping
2
2
 
3
3
  from jinja2 import Template
4
4
 
5
- from trilogy.core.enums import FunctionType, WindowType, UnnestMode
5
+ from trilogy.core.enums import FunctionType, UnnestMode, WindowType
6
6
  from trilogy.dialect.base import BaseDialect
7
7
 
8
-
9
8
  WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
10
9
 
11
10
  FUNCTION_MAP = {