pytrilogy 0.0.1.104__tar.gz → 0.0.1.106__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (101) hide show
  1. {pytrilogy-0.0.1.104/pytrilogy.egg-info → pytrilogy-0.0.1.106}/PKG-INFO +1 -1
  2. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106/pytrilogy.egg-info}/PKG-INFO +1 -1
  3. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/pytrilogy.egg-info/SOURCES.txt +1 -0
  4. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_select.py +2 -1
  5. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/__init__.py +3 -2
  6. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/constants.py +1 -0
  7. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/models.py +226 -49
  8. pytrilogy-0.0.1.106/trilogy/core/optimization.py +141 -0
  9. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/concept_strategies_v3.py +1 -0
  10. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/common.py +19 -7
  11. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/filter_node.py +37 -10
  12. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/merge_node.py +11 -1
  13. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/nodes/base_node.py +4 -2
  14. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/nodes/group_node.py +5 -2
  15. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/nodes/merge_node.py +13 -8
  16. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/query_processor.py +5 -2
  17. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/dialect/base.py +85 -54
  18. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/dialect/bigquery.py +6 -4
  19. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/dialect/common.py +8 -6
  20. pytrilogy-0.0.1.106/trilogy/dialect/config.py +123 -0
  21. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/dialect/duckdb.py +5 -4
  22. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/dialect/enums.py +40 -19
  23. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/dialect/postgres.py +4 -2
  24. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/dialect/presto.py +6 -4
  25. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/dialect/snowflake.py +6 -4
  26. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/dialect/sql_server.py +4 -1
  27. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/executor.py +18 -5
  28. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/parsing/common.py +30 -0
  29. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/parsing/parse_engine.py +43 -83
  30. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/parsing/render.py +0 -122
  31. pytrilogy-0.0.1.104/trilogy/dialect/config.py +0 -55
  32. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/LICENSE.md +0 -0
  33. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/README.md +0 -0
  34. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/pyproject.toml +0 -0
  35. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/pytrilogy.egg-info/dependency_links.txt +0 -0
  36. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/pytrilogy.egg-info/entry_points.txt +0 -0
  37. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/pytrilogy.egg-info/requires.txt +0 -0
  38. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/pytrilogy.egg-info/top_level.txt +0 -0
  39. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/setup.cfg +0 -0
  40. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/setup.py +0 -0
  41. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_declarations.py +0 -0
  42. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_derived_concepts.py +0 -0
  43. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_discovery_nodes.py +0 -0
  44. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_environment.py +0 -0
  45. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_functions.py +0 -0
  46. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_imports.py +0 -0
  47. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_metadata.py +0 -0
  48. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_models.py +0 -0
  49. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_multi_join_assignments.py +0 -0
  50. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_parsing.py +0 -0
  51. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_partial_handling.py +0 -0
  52. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_query_processing.py +0 -0
  53. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_statements.py +0 -0
  54. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_undefined_concept.py +0 -0
  55. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/tests/test_where_clause.py +0 -0
  56. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/compiler.py +0 -0
  57. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/__init__.py +0 -0
  58. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/constants.py +0 -0
  59. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/enums.py +0 -0
  60. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/env_processor.py +0 -0
  61. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/environment_helpers.py +0 -0
  62. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/ergonomics.py +0 -0
  63. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/exceptions.py +0 -0
  64. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/functions.py +0 -0
  65. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/graph_models.py +0 -0
  66. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/internal.py +0 -0
  67. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/__init__.py +0 -0
  68. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/graph_utils.py +0 -0
  69. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/__init__.py +0 -0
  70. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/basic_node.py +0 -0
  71. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/concept_merge.py +0 -0
  72. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/group_node.py +0 -0
  73. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
  74. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
  75. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/rowset_node.py +0 -0
  76. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/select_node.py +0 -0
  77. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/unnest_node.py +0 -0
  78. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/node_generators/window_node.py +0 -0
  79. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/nodes/__init__.py +0 -0
  80. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/nodes/filter_node.py +0 -0
  81. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/nodes/select_node_v2.py +0 -0
  82. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/nodes/unnest_node.py +0 -0
  83. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/nodes/window_node.py +0 -0
  84. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/core/processing/utility.py +0 -0
  85. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/dialect/__init__.py +0 -0
  86. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/docs/__init__.py +0 -0
  87. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/engine.py +0 -0
  88. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/hooks/__init__.py +0 -0
  89. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/hooks/base_hook.py +0 -0
  90. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/hooks/graph_hook.py +0 -0
  91. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/hooks/query_debugger.py +0 -0
  92. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/metadata/__init__.py +0 -0
  93. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/parser.py +0 -0
  94. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/parsing/__init__.py +0 -0
  95. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/parsing/config.py +0 -0
  96. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/parsing/exceptions.py +0 -0
  97. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/parsing/helpers.py +0 -0
  98. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/py.typed +0 -0
  99. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/scripts/__init__.py +0 -0
  100. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/scripts/trilogy.py +0 -0
  101. {pytrilogy-0.0.1.104 → pytrilogy-0.0.1.106}/trilogy/utility.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.104
3
+ Version: 0.0.1.106
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.104
3
+ Version: 0.0.1.106
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -43,6 +43,7 @@ trilogy/core/functions.py
43
43
  trilogy/core/graph_models.py
44
44
  trilogy/core/internal.py
45
45
  trilogy/core/models.py
46
+ trilogy/core/optimization.py
46
47
  trilogy/core/query_processor.py
47
48
  trilogy/core/processing/__init__.py
48
49
  trilogy/core/processing/concept_strategies_v3.py
@@ -125,4 +125,5 @@ def test_modifiers():
125
125
  generator = BigqueryDialect()
126
126
 
127
127
  text = generator.compile_statement(query)
128
- assert "`b` = 2" in text
128
+ assert "2 = 2" in text
129
+ assert "as `b`" not in text
@@ -2,7 +2,8 @@ from trilogy.core.models import Environment
2
2
  from trilogy.dialect.enums import Dialects
3
3
  from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
+ from trilogy.constants import CONFIG
5
6
 
6
- __version__ = "0.0.1.104"
7
+ __version__ = "0.0.1.106"
7
8
 
8
- __all__ = ["parse", "Executor", "Dialects", "Environment"]
9
+ __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
@@ -23,6 +23,7 @@ NULL_VALUE = MagicConstants.NULL
23
23
  class Config:
24
24
  strict_mode: bool = True
25
25
  human_identifiers: bool = True
26
+ inline_datasources: bool = True
26
27
 
27
28
 
28
29
  CONFIG = Config()
@@ -34,7 +34,11 @@ from pydantic import (
34
34
  from lark.tree import Meta
35
35
  from pathlib import Path
36
36
  from trilogy.constants import logger, DEFAULT_NAMESPACE, ENV_CACHE_NAME, MagicConstants
37
- from trilogy.core.constants import ALL_ROWS_CONCEPT, INTERNAL_NAMESPACE
37
+ from trilogy.core.constants import (
38
+ ALL_ROWS_CONCEPT,
39
+ INTERNAL_NAMESPACE,
40
+ CONSTANT_DATASET,
41
+ )
38
42
  from trilogy.core.enums import (
39
43
  InfiniteFunctionArgs,
40
44
  Purpose,
@@ -113,15 +117,27 @@ NAMESPACED_TYPES = Union[
113
117
 
114
118
 
115
119
  class Namespaced(ABC):
116
- pass
117
120
 
118
121
  def with_namespace(self, namespace: str):
119
122
  raise NotImplementedError
120
123
 
121
124
 
122
- class SelectGrain(ABC):
123
- pass
125
+ class ConceptArgs(ABC):
126
+
127
+ @property
128
+ def concept_arguments(self) -> List["Concept"]:
129
+ raise NotImplementedError
130
+
131
+ @property
132
+ def existence_arguments(self) -> List["Concept"]:
133
+ return []
134
+
135
+ @property
136
+ def row_arguments(self) -> List["Concept"]:
137
+ return self.concept_arguments
138
+
124
139
 
140
+ class SelectGrain(ABC):
125
141
  def with_select_grain(self, grain: Grain):
126
142
  raise NotImplementedError
127
143
 
@@ -1395,16 +1411,11 @@ class MultiSelectStatement(Namespaced, BaseModel):
1395
1411
  return output
1396
1412
 
1397
1413
  def find_source(self, concept: Concept, cte: CTE) -> Concept:
1398
- all = []
1399
1414
  for x in self.align.items:
1400
1415
  if concept.name == x.alias:
1401
1416
  for c in x.concepts:
1402
1417
  if c.address in cte.output_lcl:
1403
- all.append(c)
1404
-
1405
- if len(all) == 1:
1406
- return all[0]
1407
-
1418
+ return c
1408
1419
  raise SyntaxError(
1409
1420
  f"Could not find upstream map for multiselect {str(concept)} on cte ({cte})"
1410
1421
  )
@@ -1427,6 +1438,7 @@ class MultiSelectStatement(Namespaced, BaseModel):
1427
1438
 
1428
1439
  class Address(BaseModel):
1429
1440
  location: str
1441
+ is_query: bool = False
1430
1442
 
1431
1443
 
1432
1444
  class Query(BaseModel):
@@ -1522,10 +1534,27 @@ class Datasource(Namespaced, BaseModel):
1522
1534
  default_factory=lambda: DatasourceMetadata(freshness_concept=None)
1523
1535
  )
1524
1536
 
1537
+ @property
1538
+ def condition(self):
1539
+ return None
1540
+
1525
1541
  @cached_property
1526
1542
  def output_lcl(self) -> LooseConceptList:
1527
1543
  return LooseConceptList(concepts=self.output_concepts)
1528
1544
 
1545
+ @property
1546
+ def can_be_inlined(self) -> bool:
1547
+ if isinstance(self.address, Address) and self.address.is_query:
1548
+ return False
1549
+ for x in self.columns:
1550
+ if not isinstance(x.alias, str):
1551
+ return False
1552
+ return True
1553
+
1554
+ @property
1555
+ def non_partial_concept_addresses(self) -> set[str]:
1556
+ return set([c.address for c in self.full_concepts])
1557
+
1529
1558
  @field_validator("namespace", mode="plain")
1530
1559
  @classmethod
1531
1560
  def namespace_validation(cls, v):
@@ -1739,7 +1768,7 @@ class QueryDatasource(BaseModel):
1739
1768
  input_concepts: List[Concept]
1740
1769
  output_concepts: List[Concept]
1741
1770
  source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
1742
- datasources: Sequence[Union[Datasource, "QueryDatasource"]]
1771
+ datasources: List[Union[Datasource, "QueryDatasource"]]
1743
1772
  grain: Grain
1744
1773
  joins: List[BaseJoin | UnnestJoin]
1745
1774
  limit: Optional[int] = None
@@ -1790,10 +1819,7 @@ class QueryDatasource(BaseModel):
1790
1819
  c.address for c in values["input_concepts"]
1791
1820
  )
1792
1821
  seen = set()
1793
- for k, val in v.items():
1794
- # if val:
1795
- # if len(val) != 1:
1796
- # raise SyntaxError(f"source map {k} has multiple values {len(val)}")
1822
+ for k, _ in v.items():
1797
1823
  seen.add(k)
1798
1824
  for x in expected:
1799
1825
  if x not in seen:
@@ -1915,18 +1941,18 @@ class QueryDatasource(BaseModel):
1915
1941
  )
1916
1942
 
1917
1943
  def get_alias(
1918
- self, concept: Concept, use_raw_name: bool = False, force_alias: bool = False
1944
+ self,
1945
+ concept: Concept,
1946
+ use_raw_name: bool = False,
1947
+ force_alias: bool = False,
1948
+ source: str | None = None,
1919
1949
  ):
1920
- # if we should use the raw datasource name to access
1921
- use_raw_name = (
1922
- True
1923
- if (len(self.datasources) == 1 or use_raw_name) and not force_alias
1924
- # if ((len(self.datasources) == 1 and isinstance(self.datasources[0], Datasource)) or use_raw_name) and not force_alias
1925
- else False
1926
- )
1927
1950
  for x in self.datasources:
1928
1951
  # query datasources should be referenced by their alias, always
1929
1952
  force_alias = isinstance(x, QueryDatasource)
1953
+ use_raw_name = isinstance(x, Datasource) and not force_alias
1954
+ if source and x.identifier != source:
1955
+ continue
1930
1956
  try:
1931
1957
  return x.get_alias(
1932
1958
  concept.with_grain(self.grain),
@@ -1960,8 +1986,7 @@ class Comment(BaseModel):
1960
1986
 
1961
1987
  class CTE(BaseModel):
1962
1988
  name: str
1963
- source: "QueryDatasource" # TODO: make recursive
1964
- # output columns are what are selected/grouped by
1989
+ source: "QueryDatasource"
1965
1990
  output_columns: List[Concept]
1966
1991
  source_map: Dict[str, str | list[str]]
1967
1992
  grain: Grain
@@ -1972,6 +1997,10 @@ class CTE(BaseModel):
1972
1997
  condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
1973
1998
  partial_concepts: List[Concept] = Field(default_factory=list)
1974
1999
  join_derived_concepts: List[Concept] = Field(default_factory=list)
2000
+ order_by: Optional[OrderBy] = None
2001
+ limit: Optional[int] = None
2002
+ requires_nesting: bool = True
2003
+ base_name_override: Optional[str] = None
1975
2004
 
1976
2005
  @computed_field # type: ignore
1977
2006
  @property
@@ -1982,6 +2011,40 @@ class CTE(BaseModel):
1982
2011
  def validate_output_columns(cls, v):
1983
2012
  return unique(v, "address")
1984
2013
 
2014
+ def inline_parent_datasource(self, parent: CTE) -> bool:
2015
+ qds_being_inlined = parent.source
2016
+ ds_being_inlined = qds_being_inlined.datasources[0]
2017
+ if not isinstance(ds_being_inlined, Datasource):
2018
+ return False
2019
+ self.source.datasources = [
2020
+ ds_being_inlined,
2021
+ *[
2022
+ x
2023
+ for x in self.source.datasources
2024
+ if x.identifier != qds_being_inlined.identifier
2025
+ ],
2026
+ ]
2027
+ # need to identify this before updating joins
2028
+ if self.base_name == parent.name:
2029
+ self.base_name_override = ds_being_inlined.safe_location
2030
+
2031
+ for join in self.joins:
2032
+ if isinstance(join, InstantiatedUnnestJoin):
2033
+ continue
2034
+ if join.left_cte.name == parent.name:
2035
+ join.left_cte = ds_being_inlined
2036
+ if join.right_cte.name == parent.name:
2037
+ join.right_cte = ds_being_inlined
2038
+ for k, v in self.source_map.items():
2039
+ if isinstance(v, list):
2040
+ self.source_map[k] = [
2041
+ ds_being_inlined.name if x == parent.name else x for x in v
2042
+ ]
2043
+ elif v == parent.name:
2044
+ self.source_map[k] = ds_being_inlined.name
2045
+ self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
2046
+ return True
2047
+
1985
2048
  def __add__(self, other: "CTE"):
1986
2049
  logger.info('Merging two copies of CTE "%s"', self.name)
1987
2050
  if not self.grain == other.grain:
@@ -2024,16 +2087,25 @@ class CTE(BaseModel):
2024
2087
  def relevant_base_ctes(self):
2025
2088
  return self.parent_ctes
2026
2089
 
2090
+ @property
2091
+ def is_root_datasource(self) -> bool:
2092
+ return (
2093
+ len(self.source.datasources) == 1
2094
+ and isinstance(self.source.datasources[0], Datasource)
2095
+ and not self.source.datasources[0].name == CONSTANT_DATASET
2096
+ )
2097
+
2027
2098
  @property
2028
2099
  def base_name(self) -> str:
2100
+ if self.base_name_override:
2101
+ return self.base_name_override
2029
2102
  # if this cte selects from a single datasource, select right from it
2030
2103
  valid_joins: List[Join] = [
2031
2104
  join for join in self.joins if isinstance(join, Join)
2032
2105
  ]
2033
- if len(self.source.datasources) == 1 and isinstance(
2034
- self.source.datasources[0], Datasource
2035
- ):
2106
+ if self.is_root_datasource:
2036
2107
  return self.source.datasources[0].safe_location
2108
+
2037
2109
  # if we have multiple joined CTEs, pick the base
2038
2110
  # as the root
2039
2111
  elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
@@ -2059,11 +2131,10 @@ class CTE(BaseModel):
2059
2131
 
2060
2132
  @property
2061
2133
  def base_alias(self) -> str:
2134
+
2135
+ if self.is_root_datasource:
2136
+ return self.source.datasources[0].identifier
2062
2137
  relevant_joins = [j for j in self.joins if isinstance(j, Join)]
2063
- if len(self.source.datasources) == 1 and isinstance(
2064
- self.source.datasources[0], Datasource
2065
- ):
2066
- return self.source.datasources[0].full_name.replace(".", "_")
2067
2138
  if relevant_joins:
2068
2139
  return relevant_joins[0].left_cte.name
2069
2140
  elif self.relevant_base_ctes:
@@ -2072,12 +2143,16 @@ class CTE(BaseModel):
2072
2143
  return self.parent_ctes[0].name
2073
2144
  return self.name
2074
2145
 
2075
- def get_alias(self, concept: Concept) -> str:
2146
+ def get_alias(self, concept: Concept, source: str | None = None) -> str:
2076
2147
  for cte in self.parent_ctes:
2077
2148
  if concept.address in [x.address for x in cte.output_columns]:
2149
+ if source and source != cte.name:
2150
+ continue
2078
2151
  return concept.safe_address
2079
2152
  try:
2080
- source = self.source.get_alias(concept)
2153
+ source = self.source.get_alias(concept, source=source)
2154
+ if not source:
2155
+ raise ValueError("No source found")
2081
2156
  return source
2082
2157
  except ValueError as e:
2083
2158
  return f"INVALID_ALIAS: {str(e)}"
@@ -2090,6 +2165,11 @@ class CTE(BaseModel):
2090
2165
  and not self.group_to_grain
2091
2166
  ):
2092
2167
  return False
2168
+ if (
2169
+ len(self.source.datasources) == 1
2170
+ and self.source.datasources[0].name == CONSTANT_DATASET
2171
+ ):
2172
+ return False
2093
2173
  return True
2094
2174
 
2095
2175
  @property
@@ -2123,19 +2203,43 @@ class JoinKey(BaseModel):
2123
2203
 
2124
2204
 
2125
2205
  class Join(BaseModel):
2126
- left_cte: CTE
2127
- right_cte: CTE
2206
+ left_cte: CTE | Datasource
2207
+ right_cte: CTE | Datasource
2128
2208
  jointype: JoinType
2129
2209
  joinkeys: List[JoinKey]
2130
2210
 
2211
+ @property
2212
+ def left_name(self) -> str:
2213
+ if isinstance(self.left_cte, Datasource):
2214
+ return self.left_cte.identifier
2215
+ return self.left_cte.name
2216
+
2217
+ @property
2218
+ def right_name(self) -> str:
2219
+ if isinstance(self.right_cte, Datasource):
2220
+ return self.right_cte.identifier
2221
+ return self.right_cte.name
2222
+
2223
+ @property
2224
+ def left_ref(self) -> str:
2225
+ if isinstance(self.left_cte, Datasource):
2226
+ return f"{self.left_cte.safe_location} as {self.left_cte.identifier}"
2227
+ return self.left_cte.name
2228
+
2229
+ @property
2230
+ def right_ref(self) -> str:
2231
+ if isinstance(self.right_cte, Datasource):
2232
+ return f"{self.right_cte.safe_location} as {self.right_cte.identifier}"
2233
+ return self.right_cte.name
2234
+
2131
2235
  @property
2132
2236
  def unique_id(self) -> str:
2133
- return self.left_cte.name + self.right_cte.name + self.jointype.value
2237
+ return self.left_name + self.right_name + self.jointype.value
2134
2238
 
2135
2239
  def __str__(self):
2136
2240
  return (
2137
- f"{self.jointype.value} JOIN {self.left_cte.name} and"
2138
- f" {self.right_cte.name} on {','.join([str(k) for k in self.joinkeys])}"
2241
+ f"{self.jointype.value} JOIN {self.left_name} and"
2242
+ f" {self.right_name} on {','.join([str(k) for k in self.joinkeys])}"
2139
2243
  )
2140
2244
 
2141
2245
 
@@ -2546,7 +2650,7 @@ class LazyEnvironment(Environment):
2546
2650
  return super().__getattribute__(name)
2547
2651
 
2548
2652
 
2549
- class Comparison(Namespaced, SelectGrain, BaseModel):
2653
+ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2550
2654
  left: Union[
2551
2655
  int,
2552
2656
  str,
@@ -2598,7 +2702,7 @@ class Comparison(Namespaced, SelectGrain, BaseModel):
2598
2702
  return f"{str(self.left)} {self.operator.value} {str(self.right)}"
2599
2703
 
2600
2704
  def with_namespace(self, namespace: str):
2601
- return Comparison(
2705
+ return self.__class__(
2602
2706
  left=(
2603
2707
  self.left.with_namespace(namespace)
2604
2708
  if isinstance(self.left, Namespaced)
@@ -2613,7 +2717,7 @@ class Comparison(Namespaced, SelectGrain, BaseModel):
2613
2717
  )
2614
2718
 
2615
2719
  def with_select_grain(self, grain: Grain):
2616
- return Comparison(
2720
+ return self.__class__(
2617
2721
  left=(
2618
2722
  self.left.with_select_grain(grain)
2619
2723
  if isinstance(self.left, SelectGrain)
@@ -2632,7 +2736,9 @@ class Comparison(Namespaced, SelectGrain, BaseModel):
2632
2736
  output: List[Concept] = []
2633
2737
  if isinstance(self.left, (Concept,)):
2634
2738
  output += [self.left]
2635
- if isinstance(self.left, (Conditional, Parenthetical)):
2739
+ if isinstance(
2740
+ self.left, (Comparison, SubselectComparison, Conditional, Parenthetical)
2741
+ ):
2636
2742
  output += self.left.input
2637
2743
  if isinstance(self.left, FilterItem):
2638
2744
  output += self.left.concept_arguments
@@ -2641,7 +2747,9 @@ class Comparison(Namespaced, SelectGrain, BaseModel):
2641
2747
 
2642
2748
  if isinstance(self.right, (Concept,)):
2643
2749
  output += [self.right]
2644
- if isinstance(self.right, (Conditional, Parenthetical)):
2750
+ if isinstance(
2751
+ self.right, (Comparison, SubselectComparison, Conditional, Parenthetical)
2752
+ ):
2645
2753
  output += self.right.input
2646
2754
  if isinstance(self.right, FilterItem):
2647
2755
  output += self.right.concept_arguments
@@ -2658,8 +2766,31 @@ class Comparison(Namespaced, SelectGrain, BaseModel):
2658
2766
  return output
2659
2767
 
2660
2768
 
2769
+ class SubselectComparison(Comparison):
2770
+
2771
+ @property
2772
+ def row_arguments(self) -> List[Concept]:
2773
+ return get_concept_arguments(self.left)
2774
+
2775
+ @property
2776
+ def existence_arguments(self) -> List[Concept]:
2777
+ return get_concept_arguments(self.right)
2778
+
2779
+ def with_select_grain(self, grain: Grain):
2780
+ # there's no need to pass the select grain through to a subselect comparison
2781
+ return self.__class__(
2782
+ left=(
2783
+ self.left.with_select_grain(grain)
2784
+ if isinstance(self.left, SelectGrain)
2785
+ else self.left
2786
+ ),
2787
+ right=self.right,
2788
+ operator=self.operator,
2789
+ )
2790
+
2791
+
2661
2792
  class CaseWhen(Namespaced, SelectGrain, BaseModel):
2662
- comparison: Conditional | Comparison
2793
+ comparison: Conditional | SubselectComparison | Comparison
2663
2794
  expr: "Expr"
2664
2795
 
2665
2796
  @property
@@ -2726,7 +2857,7 @@ class CaseElse(Namespaced, SelectGrain, BaseModel):
2726
2857
  )
2727
2858
 
2728
2859
 
2729
- class Conditional(Namespaced, SelectGrain, BaseModel):
2860
+ class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2730
2861
  left: Union[
2731
2862
  int,
2732
2863
  str,
@@ -2821,6 +2952,32 @@ class Conditional(Namespaced, SelectGrain, BaseModel):
2821
2952
  output += get_concept_arguments(self.right)
2822
2953
  return output
2823
2954
 
2955
+ @property
2956
+ def row_arguments(self) -> List[Concept]:
2957
+ output = []
2958
+ if isinstance(self.left, ConceptArgs):
2959
+ output += self.left.row_arguments
2960
+ else:
2961
+ output += get_concept_arguments(self.left)
2962
+ if isinstance(self.right, ConceptArgs):
2963
+ output += self.right.row_arguments
2964
+ else:
2965
+ output += get_concept_arguments(self.right)
2966
+ return output
2967
+
2968
+ @property
2969
+ def existence_arguments(self) -> List[Concept]:
2970
+ output = []
2971
+ if isinstance(self.left, ConceptArgs):
2972
+ output += self.left.existence_arguments
2973
+ else:
2974
+ output += get_concept_arguments(self.left)
2975
+ if isinstance(self.right, ConceptArgs):
2976
+ output += self.right.existence_arguments
2977
+ else:
2978
+ output += get_concept_arguments(self.right)
2979
+ return output
2980
+
2824
2981
 
2825
2982
  class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
2826
2983
  function: Function
@@ -2864,8 +3021,8 @@ class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
2864
3021
  return AggregateWrapper(function=self.function.with_select_grain(grain), by=by)
2865
3022
 
2866
3023
 
2867
- class WhereClause(Namespaced, SelectGrain, BaseModel):
2868
- conditional: Union[Comparison, Conditional, "Parenthetical"]
3024
+ class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3025
+ conditional: Union[SubselectComparison, Comparison, Conditional, "Parenthetical"]
2869
3026
 
2870
3027
  @property
2871
3028
  def input(self) -> List[Concept]:
@@ -2875,6 +3032,14 @@ class WhereClause(Namespaced, SelectGrain, BaseModel):
2875
3032
  def concept_arguments(self) -> List[Concept]:
2876
3033
  return self.conditional.concept_arguments
2877
3034
 
3035
+ @property
3036
+ def row_arguments(self) -> List[Concept]:
3037
+ return self.conditional.row_arguments
3038
+
3039
+ @property
3040
+ def existence_arguments(self) -> List[Concept]:
3041
+ return self.conditional.existence_arguments
3042
+
2878
3043
  def with_namespace(self, namespace: str) -> WhereClause:
2879
3044
  return WhereClause(conditional=self.conditional.with_namespace(namespace))
2880
3045
 
@@ -3062,7 +3227,7 @@ class RowsetItem(Namespaced, BaseModel):
3062
3227
  return [self.content]
3063
3228
 
3064
3229
 
3065
- class Parenthetical(Namespaced, SelectGrain, BaseModel):
3230
+ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3066
3231
  content: "Expr"
3067
3232
 
3068
3233
  def __str__(self):
@@ -3106,6 +3271,18 @@ class Parenthetical(Namespaced, SelectGrain, BaseModel):
3106
3271
  base.append(x)
3107
3272
  return base
3108
3273
 
3274
+ @property
3275
+ def row_arguments(self) -> List[Concept]:
3276
+ if isinstance(self.content, ConceptArgs):
3277
+ return self.content.row_arguments
3278
+ return self.concept_arguments
3279
+
3280
+ @property
3281
+ def existence_arguments(self) -> List[Concept]:
3282
+ if isinstance(self.content, ConceptArgs):
3283
+ return self.content.existence_arguments
3284
+ return self.concept_arguments
3285
+
3109
3286
  @property
3110
3287
  def input(self):
3111
3288
  base = []
@@ -0,0 +1,141 @@
1
+ from trilogy.core.models import (
2
+ CTE,
3
+ SelectStatement,
4
+ PersistStatement,
5
+ Datasource,
6
+ MultiSelectStatement,
7
+ )
8
+ from trilogy.core.enums import PurposeLineage
9
+ from trilogy.constants import logger
10
+ from abc import ABC
11
+
12
+
13
+ class OptimizationRule(ABC):
14
+
15
+ def optimize(self, cte: CTE) -> bool:
16
+ raise NotImplementedError
17
+
18
+ def log(self, message: str):
19
+ logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
20
+
21
+
22
+ class InlineDatasource(OptimizationRule):
23
+
24
+ def optimize(self, cte: CTE) -> bool:
25
+ if not cte.parent_ctes:
26
+ return False
27
+
28
+ optimized = False
29
+ self.log(
30
+ f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
31
+ )
32
+ to_inline: list[CTE] = []
33
+ for parent_cte in cte.parent_ctes:
34
+ if not parent_cte.is_root_datasource:
35
+ self.log(f"parent {parent_cte.name} is not root")
36
+ continue
37
+ if parent_cte.parent_ctes:
38
+ self.log(f"parent {parent_cte.name} has parents")
39
+ continue
40
+ raw_root = parent_cte.source.datasources[0]
41
+ if not isinstance(raw_root, Datasource):
42
+ self.log(f"parent {parent_cte.name} is not datasource")
43
+ continue
44
+ root: Datasource = raw_root
45
+ if not root.can_be_inlined:
46
+ self.log(f"parent {parent_cte.name} datasource is not inlineable")
47
+ continue
48
+ root_outputs = {x.address for x in root.output_concepts}
49
+ cte_outputs = {x.address for x in parent_cte.output_columns}
50
+ if not cte_outputs.issubset(root_outputs):
51
+ self.log(f"Not all {parent_cte.name} outputs are found on datasource")
52
+ continue
53
+
54
+ to_inline.append(parent_cte)
55
+
56
+ for replaceable in to_inline:
57
+ self.log(f"Inlining parent {replaceable.name}")
58
+ cte.inline_parent_datasource(replaceable)
59
+
60
+ return optimized
61
+
62
+
63
+ REGISTERED_RULES: list[OptimizationRule] = [InlineDatasource()]
64
+
65
+
66
+ def filter_irrelevant_ctes(input: list[CTE], root_cte: CTE):
67
+ relevant_ctes = set()
68
+
69
+ def recurse(cte: CTE):
70
+ relevant_ctes.add(cte.name)
71
+ for cte in cte.parent_ctes:
72
+ recurse(cte)
73
+
74
+ recurse(root_cte)
75
+ return [cte for cte in input if cte.name in relevant_ctes]
76
+
77
+
78
+ def is_direct_return_eligible(
79
+ cte: CTE, select: SelectStatement | PersistStatement | MultiSelectStatement
80
+ ) -> bool:
81
+ if isinstance(select, (PersistStatement, MultiSelectStatement)):
82
+ return False
83
+ derived_concepts = [
84
+ c for c in cte.source.output_concepts if c not in cte.source.input_concepts
85
+ ]
86
+ eligible = True
87
+ conditions = (
88
+ set(x.address for x in select.where_clause.concept_arguments)
89
+ if select.where_clause
90
+ else set()
91
+ )
92
+ if conditions and select.limit:
93
+ return False
94
+ for x in derived_concepts:
95
+ if x.derivation == PurposeLineage.WINDOW:
96
+ return False
97
+ if x.derivation == PurposeLineage.AGGREGATE:
98
+ if x.address in conditions:
99
+ return False
100
+ logger.info(
101
+ f"Upleveling output select to final CTE with derived_concepts {[x.address for x in derived_concepts]}"
102
+ )
103
+ return eligible
104
+
105
+
106
+ def sort_select_output(cte: CTE, query: SelectStatement | MultiSelectStatement):
107
+ hidden_addresses = [c.address for c in query.hidden_components]
108
+ output_addresses = [
109
+ c.address for c in query.output_components if c.address not in hidden_addresses
110
+ ]
111
+
112
+ mapping = {x.address: x for x in cte.output_columns}
113
+
114
+ new_output = []
115
+ for x in output_addresses:
116
+ new_output.append(mapping[x])
117
+ cte.output_columns = new_output
118
+
119
+
120
+ def optimize_ctes(
121
+ input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
122
+ ):
123
+ complete = False
124
+
125
+ while not complete:
126
+ actions_taken = False
127
+ for rule in REGISTERED_RULES:
128
+ for cte in input:
129
+ actions_taken = rule.optimize(cte)
130
+ complete = not actions_taken
131
+
132
+ if is_direct_return_eligible(root_cte, select):
133
+ root_cte.order_by = select.order_by
134
+ root_cte.limit = select.limit
135
+ root_cte.condition = (
136
+ select.where_clause.conditional if select.where_clause else None
137
+ )
138
+ root_cte.requires_nesting = False
139
+ sort_select_output(cte, select)
140
+
141
+ return filter_irrelevant_ctes(input, root_cte)