pytrilogy 0.0.1.110__tar.gz → 0.0.1.111__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (99) hide show
  1. {pytrilogy-0.0.1.110/pytrilogy.egg-info → pytrilogy-0.0.1.111}/PKG-INFO +1 -1
  2. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111/pytrilogy.egg-info}/PKG-INFO +1 -1
  3. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_models.py +2 -2
  4. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_parsing.py +35 -0
  5. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/__init__.py +1 -1
  6. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/constants.py +1 -1
  7. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/models.py +85 -67
  8. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/optimization.py +23 -8
  9. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/concept_strategies_v3.py +44 -19
  10. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/basic_node.py +2 -0
  11. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/common.py +3 -1
  12. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
  13. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/filter_node.py +36 -6
  14. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/node_merge_node.py +34 -23
  15. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/rowset_node.py +30 -6
  16. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/select_node.py +23 -9
  17. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/unnest_node.py +24 -3
  18. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/window_node.py +4 -2
  19. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/__init__.py +7 -6
  20. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/base_node.py +40 -6
  21. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/filter_node.py +15 -1
  22. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/group_node.py +20 -1
  23. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/merge_node.py +36 -7
  24. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/select_node_v2.py +34 -39
  25. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/unnest_node.py +12 -0
  26. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/window_node.py +11 -0
  27. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/utility.py +0 -14
  28. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/query_processor.py +125 -29
  29. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/base.py +45 -40
  30. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/executor.py +31 -3
  31. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/parse_engine.py +49 -17
  32. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/LICENSE.md +0 -0
  33. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/README.md +0 -0
  34. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pyproject.toml +0 -0
  35. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/SOURCES.txt +0 -0
  36. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/dependency_links.txt +0 -0
  37. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/entry_points.txt +0 -0
  38. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/requires.txt +0 -0
  39. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/top_level.txt +0 -0
  40. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/setup.cfg +0 -0
  41. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/setup.py +0 -0
  42. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_declarations.py +0 -0
  43. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_derived_concepts.py +0 -0
  44. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_discovery_nodes.py +0 -0
  45. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_environment.py +0 -0
  46. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_functions.py +0 -0
  47. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_imports.py +0 -0
  48. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_metadata.py +0 -0
  49. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_multi_join_assignments.py +0 -0
  50. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_partial_handling.py +0 -0
  51. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_query_processing.py +0 -0
  52. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_select.py +0 -0
  53. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_statements.py +0 -0
  54. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_undefined_concept.py +0 -0
  55. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_where_clause.py +0 -0
  56. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/compiler.py +0 -0
  57. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/__init__.py +0 -0
  58. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/constants.py +0 -0
  59. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/enums.py +0 -0
  60. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/env_processor.py +0 -0
  61. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/environment_helpers.py +0 -0
  62. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/ergonomics.py +0 -0
  63. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/exceptions.py +0 -0
  64. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/functions.py +0 -0
  65. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/graph_models.py +0 -0
  66. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/internal.py +0 -0
  67. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/__init__.py +0 -0
  68. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/graph_utils.py +0 -0
  69. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/__init__.py +0 -0
  70. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/group_node.py +0 -0
  71. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
  72. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
  73. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/__init__.py +0 -0
  74. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/bigquery.py +0 -0
  75. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/common.py +0 -0
  76. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/config.py +0 -0
  77. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/duckdb.py +0 -0
  78. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/enums.py +0 -0
  79. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/postgres.py +0 -0
  80. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/presto.py +0 -0
  81. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/snowflake.py +0 -0
  82. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/sql_server.py +0 -0
  83. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/engine.py +0 -0
  84. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/hooks/__init__.py +0 -0
  85. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/hooks/base_hook.py +0 -0
  86. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/hooks/graph_hook.py +0 -0
  87. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/hooks/query_debugger.py +0 -0
  88. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/metadata/__init__.py +0 -0
  89. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parser.py +0 -0
  90. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/__init__.py +0 -0
  91. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/common.py +0 -0
  92. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/config.py +0 -0
  93. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/exceptions.py +0 -0
  94. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/helpers.py +0 -0
  95. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/render.py +0 -0
  96. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/py.typed +0 -0
  97. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/scripts/__init__.py +0 -0
  98. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/scripts/trilogy.py +0 -0
  99. {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/utility.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.110
3
+ Version: 0.0.1.111
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.110
3
+ Version: 0.0.1.111
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -34,7 +34,7 @@ def test_cte_merge(test_environment, test_environment_graph):
34
34
  joins=[],
35
35
  source_map={outputs[0].address: {datasource}},
36
36
  ),
37
- source_map={c.address: datasource.identifier for c in outputs},
37
+ source_map={c.address: [datasource.identifier] for c in outputs},
38
38
  )
39
39
  b = CTE(
40
40
  name="testb",
@@ -48,7 +48,7 @@ def test_cte_merge(test_environment, test_environment_graph):
48
48
  joins=[],
49
49
  source_map=output_map,
50
50
  ),
51
- source_map={c.address: datasource.identifier for c in outputs},
51
+ source_map={c.address: [datasource.identifier] for c in outputs},
52
52
  )
53
53
 
54
54
  merged = a + b
@@ -30,6 +30,19 @@ def test_in():
30
30
  rendered = BaseDialect().render_expr(right)
31
31
  assert rendered.strip() == "( 1,2,3 )".strip()
32
32
 
33
+ _, parsed = parse_text(
34
+ "const order_id <- 3; SELECT order_id WHERE order_id IN (1);"
35
+ )
36
+ query = parsed[-1]
37
+ right = query.where_clause.conditional.right
38
+ assert isinstance(
39
+ right,
40
+ Parenthetical,
41
+ ), type(right)
42
+ assert right.content == 1
43
+ rendered = BaseDialect().render_expr(right)
44
+ assert rendered.strip() == "( 1 )".strip()
45
+
33
46
 
34
47
  def test_not_in():
35
48
  _, parsed = parse_text(
@@ -160,6 +173,28 @@ select
160
173
  assert env.concepts[name].keys == (env.concepts["id"],)
161
174
 
162
175
 
176
+ def test_purpose_and_derivation():
177
+ env, parsed = parse_text(
178
+ """key id int;
179
+ key other_id int;
180
+ property <id, other_id>.join_id <- id*10+other_id;
181
+
182
+
183
+ select
184
+ join_id
185
+ ;
186
+ """
187
+ )
188
+
189
+ for name in ["join_id"]:
190
+ assert name in env.concepts
191
+ assert env.concepts[name].purpose == Purpose.PROPERTY
192
+ assert env.concepts[name].keys == (
193
+ env.concepts["id"],
194
+ env.concepts["other_id"],
195
+ )
196
+
197
+
163
198
  def test_output_purpose():
164
199
 
165
200
  env, parsed = parse_text(
@@ -4,6 +4,6 @@ from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
5
  from trilogy.constants import CONFIG
6
6
 
7
- __version__ = "0.0.1.110"
7
+ __version__ = "0.0.1.111"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
@@ -30,7 +30,7 @@ class Optimizations:
30
30
  class Config:
31
31
  strict_mode: bool = True
32
32
  human_identifiers: bool = True
33
- inline_datasources: bool = True
33
+ validate_missing: bool = True
34
34
  optimizations: Optimizations = field(default_factory=Optimizations)
35
35
 
36
36
 
@@ -33,7 +33,13 @@ from pydantic import (
33
33
  )
34
34
  from lark.tree import Meta
35
35
  from pathlib import Path
36
- from trilogy.constants import logger, DEFAULT_NAMESPACE, ENV_CACHE_NAME, MagicConstants
36
+ from trilogy.constants import (
37
+ logger,
38
+ DEFAULT_NAMESPACE,
39
+ ENV_CACHE_NAME,
40
+ MagicConstants,
41
+ CONFIG,
42
+ )
37
43
  from trilogy.core.constants import (
38
44
  ALL_ROWS_CONCEPT,
39
45
  INTERNAL_NAMESPACE,
@@ -61,7 +67,6 @@ from trilogy.core.enums import (
61
67
  from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
62
68
  from trilogy.utility import unique
63
69
  from collections import UserList
64
- from trilogy.utility import string_to_hash
65
70
  from functools import cached_property
66
71
  from abc import ABC
67
72
 
@@ -129,7 +134,7 @@ class ConceptArgs(ABC):
129
134
  raise NotImplementedError
130
135
 
131
136
  @property
132
- def existence_arguments(self) -> List["Concept"]:
137
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
133
138
  return []
134
139
 
135
140
  @property
@@ -281,9 +286,6 @@ class Concept(Namespaced, SelectGrain, BaseModel):
281
286
  MultiSelectStatement | MergeStatement,
282
287
  ]
283
288
  ] = None
284
- # lineage: Annotated[Optional[
285
- # Union[Function, WindowItem, FilterItem, AggregateWrapper]
286
- # ], WrapValidator(lineage_validator)] = None
287
289
  namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
288
290
  keys: Optional[Tuple["Concept", ...]] = None
289
291
  grain: "Grain" = Field(default=None, validate_default=True)
@@ -621,6 +623,12 @@ class Grain(BaseModel):
621
623
  if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
622
624
  if all([c in v2 for c in sub.keys]):
623
625
  continue
626
+ elif sub.derivation == PurposeLineage.MERGE and isinstance(
627
+ sub.lineage, MergeStatement
628
+ ):
629
+ parents = sub.lineage.concepts
630
+ if any([p in v2 for p in parents]):
631
+ continue
624
632
  final.append(sub)
625
633
  v2 = sorted(final, key=lambda x: x.name)
626
634
  return v2
@@ -966,23 +974,6 @@ class ConceptTransform(Namespaced, BaseModel):
966
974
  modifiers=self.modifiers,
967
975
  )
968
976
 
969
- def with_filter(self, where: "WhereClause") -> "ConceptTransform":
970
- id_hash = string_to_hash(str(where))
971
- new_parent_concept = Concept(
972
- name=f"_anon_concept_transform_filter_input_{id_hash}",
973
- datatype=self.output.datatype,
974
- purpose=self.output.purpose,
975
- lineage=self.output.lineage,
976
- namespace=DEFAULT_NAMESPACE,
977
- grain=self.output.grain,
978
- keys=self.output.keys,
979
- )
980
- new_parent = FilterItem(content=new_parent_concept, where=where)
981
- self.output.lineage = new_parent
982
- return ConceptTransform(
983
- function=new_parent, output=self.output, modifiers=self.modifiers
984
- )
985
-
986
977
 
987
978
  class Window(BaseModel):
988
979
  count: int
@@ -1611,13 +1602,15 @@ class Datasource(Namespaced, BaseModel):
1611
1602
  def __add__(self, other):
1612
1603
  if not other == self:
1613
1604
  raise ValueError(
1614
- "Attempted to add two datasources that are not identical, this should"
1615
- " never happen"
1605
+ "Attempted to add two datasources that are not identical, this is not a valid operation"
1616
1606
  )
1617
1607
  return self
1618
1608
 
1609
+ def __repr__(self):
1610
+ return f"Datasource<{self.namespace}.{self.identifier}@<{self.grain}>"
1611
+
1619
1612
  def __str__(self):
1620
- return f"{self.namespace}.{self.identifier}@<{self.grain}>"
1613
+ return self.__repr__()
1621
1614
 
1622
1615
  def __hash__(self):
1623
1616
  return (self.namespace + self.identifier).__hash__()
@@ -1786,6 +1779,7 @@ class QueryDatasource(BaseModel):
1786
1779
  input_concepts: List[Concept]
1787
1780
  output_concepts: List[Concept]
1788
1781
  source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
1782
+
1789
1783
  datasources: List[Union[Datasource, "QueryDatasource"]]
1790
1784
  grain: Grain
1791
1785
  joins: List[BaseJoin | UnnestJoin]
@@ -1799,6 +1793,12 @@ class QueryDatasource(BaseModel):
1799
1793
  join_derived_concepts: List[Concept] = Field(default_factory=list)
1800
1794
  hidden_concepts: List[Concept] = Field(default_factory=list)
1801
1795
  force_group: bool | None = None
1796
+ existence_source_map: Dict[str, Set[Union[Datasource, "QueryDatasource"]]] = Field(
1797
+ default_factory=dict
1798
+ )
1799
+
1800
+ def __repr__(self):
1801
+ return f"{self.identifier}@<{self.grain}>"
1802
1802
 
1803
1803
  @property
1804
1804
  def non_partial_concept_addresses(self) -> List[str]:
@@ -1841,14 +1841,14 @@ class QueryDatasource(BaseModel):
1841
1841
  for k, _ in v.items():
1842
1842
  seen.add(k)
1843
1843
  for x in expected:
1844
- if x not in seen:
1844
+ if x not in seen and CONFIG.validate_missing:
1845
1845
  raise SyntaxError(
1846
1846
  f"source map missing {x} on (expected {expected}, have {seen})"
1847
1847
  )
1848
1848
  return v
1849
1849
 
1850
1850
  def __str__(self):
1851
- return f"{self.identifier}@<{self.grain}>"
1851
+ return self.__repr__()
1852
1852
 
1853
1853
  def __hash__(self):
1854
1854
  return (self.identifier).__hash__()
@@ -2010,10 +2010,11 @@ class CTE(BaseModel):
2010
2010
  name: str
2011
2011
  source: "QueryDatasource"
2012
2012
  output_columns: List[Concept]
2013
- source_map: Dict[str, str | list[str]]
2013
+ source_map: Dict[str, list[str]]
2014
2014
  grain: Grain
2015
2015
  base: bool = False
2016
2016
  group_to_grain: bool = False
2017
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
2017
2018
  parent_ctes: List["CTE"] = Field(default_factory=list)
2018
2019
  joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
2019
2020
  condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
@@ -2024,6 +2025,7 @@ class CTE(BaseModel):
2024
2025
  limit: Optional[int] = None
2025
2026
  requires_nesting: bool = True
2026
2027
  base_name_override: Optional[str] = None
2028
+ base_alias_override: Optional[str] = None
2027
2029
 
2028
2030
  @computed_field # type: ignore
2029
2031
  @property
@@ -2034,7 +2036,7 @@ class CTE(BaseModel):
2034
2036
  def validate_output_columns(cls, v):
2035
2037
  return unique(v, "address")
2036
2038
 
2037
- def inline_parent_datasource(self, parent: CTE) -> bool:
2039
+ def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
2038
2040
  qds_being_inlined = parent.source
2039
2041
  ds_being_inlined = qds_being_inlined.datasources[0]
2040
2042
  if not isinstance(ds_being_inlined, Datasource):
@@ -2050,6 +2052,7 @@ class CTE(BaseModel):
2050
2052
  # need to identify this before updating joins
2051
2053
  if self.base_name == parent.name:
2052
2054
  self.base_name_override = ds_being_inlined.safe_location
2055
+ self.base_alias_override = ds_being_inlined.identifier
2053
2056
 
2054
2057
  for join in self.joins:
2055
2058
  if isinstance(join, InstantiatedUnnestJoin):
@@ -2066,6 +2069,8 @@ class CTE(BaseModel):
2066
2069
  elif v == parent.name:
2067
2070
  self.source_map[k] = ds_being_inlined.name
2068
2071
  self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
2072
+ if force_group:
2073
+ self.group_to_grain = True
2069
2074
  return True
2070
2075
 
2071
2076
  def __add__(self, other: "CTE"):
@@ -2126,9 +2131,6 @@ class CTE(BaseModel):
2126
2131
  if self.base_name_override:
2127
2132
  return self.base_name_override
2128
2133
  # if this cte selects from a single datasource, select right from it
2129
- valid_joins: List[Join] = [
2130
- join for join in self.joins if isinstance(join, Join)
2131
- ]
2132
2134
  if self.is_root_datasource:
2133
2135
  return self.source.datasources[0].safe_location
2134
2136
 
@@ -2136,33 +2138,16 @@ class CTE(BaseModel):
2136
2138
  # as the root
2137
2139
  elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
2138
2140
  return self.parent_ctes[0].name
2139
- elif valid_joins and len(valid_joins) > 0:
2140
- candidates = [x.left_cte.name for x in valid_joins]
2141
- disallowed = [x.right_cte.name for x in valid_joins]
2142
- try:
2143
- return [y for y in candidates if y not in disallowed][0]
2144
- except IndexError:
2145
- raise SyntaxError(
2146
- f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in self.parent_ctes]}"
2147
- )
2148
2141
  elif self.relevant_base_ctes:
2149
2142
  return self.relevant_base_ctes[0].name
2150
- elif self.parent_ctes:
2151
- raise SyntaxError(
2152
- f"{self.name} has no relevant base CTEs, {self.source_map},"
2153
- f" {[x.name for x in self.parent_ctes]}, outputs"
2154
- f" {[x.address for x in self.output_columns]}"
2155
- )
2156
2143
  return self.source.name
2157
2144
 
2158
2145
  @property
2159
2146
  def base_alias(self) -> str:
2160
-
2147
+ if self.base_alias_override:
2148
+ return self.base_alias_override
2161
2149
  if self.is_root_datasource:
2162
2150
  return self.source.datasources[0].identifier
2163
- relevant_joins = [j for j in self.joins if isinstance(j, Join)]
2164
- if relevant_joins:
2165
- return relevant_joins[0].left_cte.name
2166
2151
  elif self.relevant_base_ctes:
2167
2152
  return self.relevant_base_ctes[0].name
2168
2153
  elif self.parent_ctes:
@@ -2492,9 +2477,17 @@ class Environment(BaseModel):
2492
2477
  for datasource in self.datasources.values():
2493
2478
  for concept in datasource.output_concepts:
2494
2479
  concrete_addresses.add(concept.address)
2480
+ current_mat = [x.address for x in self.materialized_concepts]
2495
2481
  self.materialized_concepts = [
2496
2482
  c for c in self.concepts.values() if c.address in concrete_addresses
2497
2483
  ]
2484
+ new = [
2485
+ x.address
2486
+ for x in self.materialized_concepts
2487
+ if x.address not in current_mat
2488
+ ]
2489
+ if new:
2490
+ logger.info(f"Environment added new materialized concepts {new}")
2498
2491
  for concept in self.concepts.values():
2499
2492
  if concept.derivation == PurposeLineage.MERGE:
2500
2493
  ms = concept.lineage
@@ -2653,6 +2646,17 @@ class Environment(BaseModel):
2653
2646
  self.gen_concept_list_caches()
2654
2647
  return datasource
2655
2648
 
2649
+ def delete_datasource(
2650
+ self,
2651
+ address: str,
2652
+ meta: Meta | None = None,
2653
+ ) -> bool:
2654
+ if address in self.datasources:
2655
+ del self.datasources[address]
2656
+ self.gen_concept_list_caches()
2657
+ return True
2658
+ return False
2659
+
2656
2660
 
2657
2661
  class LazyEnvironment(Environment):
2658
2662
  """Variant of environment to defer parsing of a path"""
@@ -2759,11 +2763,8 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2759
2763
  if isinstance(self.left, SelectGrain)
2760
2764
  else self.left
2761
2765
  ),
2762
- right=(
2763
- self.right.with_select_grain(grain)
2764
- if isinstance(self.right, SelectGrain)
2765
- else self.right
2766
- ),
2766
+ # the right side does NOT need to inherit select grain
2767
+ right=self.right,
2767
2768
  operator=self.operator,
2768
2769
  )
2769
2770
 
@@ -2809,8 +2810,8 @@ class SubselectComparison(Comparison):
2809
2810
  return get_concept_arguments(self.left)
2810
2811
 
2811
2812
  @property
2812
- def existence_arguments(self) -> List[Concept]:
2813
- return get_concept_arguments(self.right)
2813
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
2814
+ return [tuple(get_concept_arguments(self.right))]
2814
2815
 
2815
2816
  def with_select_grain(self, grain: Grain):
2816
2817
  # there's no need to pass the select grain through to a subselect comparison
@@ -3002,18 +3003,26 @@ class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3002
3003
  return output
3003
3004
 
3004
3005
  @property
3005
- def existence_arguments(self) -> List[Concept]:
3006
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
3006
3007
  output = []
3007
3008
  if isinstance(self.left, ConceptArgs):
3008
3009
  output += self.left.existence_arguments
3009
- else:
3010
- output += get_concept_arguments(self.left)
3011
3010
  if isinstance(self.right, ConceptArgs):
3012
3011
  output += self.right.existence_arguments
3013
- else:
3014
- output += get_concept_arguments(self.right)
3015
3012
  return output
3016
3013
 
3014
+ def decompose(self):
3015
+ chunks = []
3016
+ if self.operator == BooleanOperator.AND:
3017
+ for val in [self.left, self.right]:
3018
+ if isinstance(val, Conditional):
3019
+ chunks.extend(val.decompose())
3020
+ else:
3021
+ chunks.append(val)
3022
+ else:
3023
+ chunks.append(self)
3024
+ return chunks
3025
+
3017
3026
 
3018
3027
  class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
3019
3028
  function: Function
@@ -3073,7 +3082,7 @@ class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3073
3082
  return self.conditional.row_arguments
3074
3083
 
3075
3084
  @property
3076
- def existence_arguments(self) -> List[Concept]:
3085
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
3077
3086
  return self.conditional.existence_arguments
3078
3087
 
3079
3088
  def with_namespace(self, namespace: str) -> WhereClause:
@@ -3314,10 +3323,10 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3314
3323
  return self.concept_arguments
3315
3324
 
3316
3325
  @property
3317
- def existence_arguments(self) -> List[Concept]:
3326
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
3318
3327
  if isinstance(self.content, ConceptArgs):
3319
3328
  return self.content.existence_arguments
3320
- return self.concept_arguments
3329
+ return []
3321
3330
 
3322
3331
  @property
3323
3332
  def input(self):
@@ -3386,6 +3395,12 @@ Function.model_rebuild()
3386
3395
  Grain.model_rebuild()
3387
3396
 
3388
3397
 
3398
+ def list_to_wrapper(args):
3399
+ types = [arg_to_datatype(arg) for arg in args]
3400
+ assert len(set(types)) == 1
3401
+ return ListWrapper(args, type=types[0])
3402
+
3403
+
3389
3404
  def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
3390
3405
  if isinstance(arg, Function):
3391
3406
  return arg.output_datatype
@@ -3409,5 +3424,8 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
3409
3424
  if arg.type in (WindowType.RANK, WindowType.ROW_NUMBER):
3410
3425
  return DataType.INTEGER
3411
3426
  return arg_to_datatype(arg.content)
3427
+ elif isinstance(arg, list):
3428
+ wrapper = list_to_wrapper(arg)
3429
+ return ListType(type=wrapper.type)
3412
3430
  else:
3413
3431
  raise ValueError(f"Cannot parse arg datatype for arg of raw type {type(arg)}")
@@ -38,6 +38,7 @@ class InlineDatasource(OptimizationRule):
38
38
  f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
39
39
  )
40
40
  to_inline: list[CTE] = []
41
+ force_group = False
41
42
  for parent_cte in cte.parent_ctes:
42
43
  if not parent_cte.is_root_datasource:
43
44
  self.log(f"parent {parent_cte.name} is not root")
@@ -55,15 +56,18 @@ class InlineDatasource(OptimizationRule):
55
56
  continue
56
57
  root_outputs = {x.address for x in root.output_concepts}
57
58
  cte_outputs = {x.address for x in parent_cte.output_columns}
59
+ grain_components = {x.address for x in root.grain.components}
58
60
  if not cte_outputs.issubset(root_outputs):
59
61
  self.log(f"Not all {parent_cte.name} outputs are found on datasource")
60
62
  continue
61
-
63
+ if not grain_components.issubset(cte_outputs):
64
+ self.log("Not all datasource components in cte outputs, forcing group")
65
+ force_group = True
62
66
  to_inline.append(parent_cte)
63
67
 
64
68
  for replaceable in to_inline:
65
69
  self.log(f"Inlining parent {replaceable.name}")
66
- cte.inline_parent_datasource(replaceable)
70
+ cte.inline_parent_datasource(replaceable, force_group=force_group)
67
71
 
68
72
  return optimized
69
73
 
@@ -107,14 +111,14 @@ class PredicatePushdown(OptimizationRule):
107
111
  f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
108
112
  )
109
113
  if isinstance(cte.condition, Conditional):
110
- candidates = decompose_condition(cte.condition)
114
+ candidates = cte.condition.decompose()
111
115
  else:
112
116
  candidates = [cte.condition]
113
117
  logger.info(f"Have {len(candidates)} candidates to try to push down")
114
118
  for candidate in candidates:
115
119
  conditions = {x.address for x in candidate.concept_arguments}
116
120
  for parent_cte in cte.parent_ctes:
117
- materialized = {k for k, v in parent_cte.source_map.items() if v != ""}
121
+ materialized = {k for k, v in parent_cte.source_map.items() if v != []}
118
122
  if conditions.issubset(materialized):
119
123
  if all(
120
124
  [
@@ -200,6 +204,8 @@ def is_direct_return_eligible(
200
204
  for x in derived_concepts:
201
205
  if x.derivation == PurposeLineage.WINDOW:
202
206
  return False
207
+ if x.derivation == PurposeLineage.UNNEST:
208
+ return False
203
209
  if x.derivation == PurposeLineage.AGGREGATE:
204
210
  if x.address in conditions:
205
211
  return False
@@ -236,12 +242,21 @@ def optimize_ctes(
236
242
  actions_taken = rule.optimize(cte, inverse_map)
237
243
  complete = not actions_taken
238
244
 
239
- if is_direct_return_eligible(root_cte, select):
245
+ if CONFIG.optimizations.direct_return and is_direct_return_eligible(
246
+ root_cte, select
247
+ ):
240
248
  root_cte.order_by = select.order_by
241
249
  root_cte.limit = select.limit
242
- root_cte.condition = (
243
- select.where_clause.conditional if select.where_clause else None
244
- )
250
+ if select.where_clause:
251
+
252
+ if root_cte.condition:
253
+ root_cte.condition = Conditional(
254
+ left=root_cte.condition,
255
+ operator=BooleanOperator.AND,
256
+ right=select.where_clause.conditional,
257
+ )
258
+ else:
259
+ root_cte.condition = select.where_clause.conditional
245
260
  root_cte.requires_nesting = False
246
261
  sort_select_output(root_cte, select)
247
262