pytrilogy 0.0.1.109__tar.gz → 0.0.1.111__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (100) hide show
  1. {pytrilogy-0.0.1.109/pytrilogy.egg-info → pytrilogy-0.0.1.111}/PKG-INFO +1 -1
  2. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111/pytrilogy.egg-info}/PKG-INFO +1 -1
  3. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_models.py +2 -2
  4. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_parsing.py +35 -0
  5. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/__init__.py +1 -1
  6. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/constants.py +11 -3
  7. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/enums.py +1 -0
  8. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/models.py +94 -67
  9. pytrilogy-0.0.1.111/trilogy/core/optimization.py +263 -0
  10. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/concept_strategies_v3.py +44 -19
  11. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/basic_node.py +2 -0
  12. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/common.py +3 -1
  13. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
  14. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/filter_node.py +36 -6
  15. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/node_merge_node.py +34 -23
  16. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/rowset_node.py +37 -8
  17. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/select_node.py +23 -9
  18. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/unnest_node.py +24 -3
  19. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/window_node.py +4 -2
  20. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/__init__.py +7 -6
  21. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/base_node.py +40 -6
  22. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/filter_node.py +15 -1
  23. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/group_node.py +20 -1
  24. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/merge_node.py +37 -10
  25. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/select_node_v2.py +34 -39
  26. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/unnest_node.py +12 -0
  27. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/window_node.py +11 -0
  28. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/utility.py +0 -14
  29. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/query_processor.py +125 -29
  30. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/base.py +45 -40
  31. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/executor.py +31 -3
  32. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/parse_engine.py +49 -17
  33. pytrilogy-0.0.1.109/trilogy/core/optimization.py +0 -141
  34. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/LICENSE.md +0 -0
  35. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/README.md +0 -0
  36. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pyproject.toml +0 -0
  37. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/SOURCES.txt +0 -0
  38. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/dependency_links.txt +0 -0
  39. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/entry_points.txt +0 -0
  40. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/requires.txt +0 -0
  41. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/top_level.txt +0 -0
  42. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/setup.cfg +0 -0
  43. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/setup.py +0 -0
  44. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_declarations.py +0 -0
  45. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_derived_concepts.py +0 -0
  46. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_discovery_nodes.py +0 -0
  47. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_environment.py +0 -0
  48. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_functions.py +0 -0
  49. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_imports.py +0 -0
  50. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_metadata.py +0 -0
  51. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_multi_join_assignments.py +0 -0
  52. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_partial_handling.py +0 -0
  53. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_query_processing.py +0 -0
  54. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_select.py +0 -0
  55. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_statements.py +0 -0
  56. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_undefined_concept.py +0 -0
  57. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_where_clause.py +0 -0
  58. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/compiler.py +0 -0
  59. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/__init__.py +0 -0
  60. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/constants.py +0 -0
  61. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/env_processor.py +0 -0
  62. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/environment_helpers.py +0 -0
  63. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/ergonomics.py +0 -0
  64. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/exceptions.py +0 -0
  65. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/functions.py +0 -0
  66. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/graph_models.py +0 -0
  67. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/internal.py +0 -0
  68. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/__init__.py +0 -0
  69. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/graph_utils.py +0 -0
  70. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/__init__.py +0 -0
  71. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/group_node.py +0 -0
  72. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
  73. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
  74. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/__init__.py +0 -0
  75. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/bigquery.py +0 -0
  76. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/common.py +0 -0
  77. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/config.py +0 -0
  78. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/duckdb.py +0 -0
  79. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/enums.py +0 -0
  80. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/postgres.py +0 -0
  81. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/presto.py +0 -0
  82. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/snowflake.py +0 -0
  83. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/sql_server.py +0 -0
  84. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/engine.py +0 -0
  85. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/hooks/__init__.py +0 -0
  86. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/hooks/base_hook.py +0 -0
  87. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/hooks/graph_hook.py +0 -0
  88. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/hooks/query_debugger.py +0 -0
  89. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/metadata/__init__.py +0 -0
  90. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parser.py +0 -0
  91. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/__init__.py +0 -0
  92. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/common.py +0 -0
  93. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/config.py +0 -0
  94. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/exceptions.py +0 -0
  95. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/helpers.py +0 -0
  96. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/render.py +0 -0
  97. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/py.typed +0 -0
  98. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/scripts/__init__.py +0 -0
  99. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/scripts/trilogy.py +0 -0
  100. {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/utility.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.109
3
+ Version: 0.0.1.111
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.109
3
+ Version: 0.0.1.111
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -34,7 +34,7 @@ def test_cte_merge(test_environment, test_environment_graph):
34
34
  joins=[],
35
35
  source_map={outputs[0].address: {datasource}},
36
36
  ),
37
- source_map={c.address: datasource.identifier for c in outputs},
37
+ source_map={c.address: [datasource.identifier] for c in outputs},
38
38
  )
39
39
  b = CTE(
40
40
  name="testb",
@@ -48,7 +48,7 @@ def test_cte_merge(test_environment, test_environment_graph):
48
48
  joins=[],
49
49
  source_map=output_map,
50
50
  ),
51
- source_map={c.address: datasource.identifier for c in outputs},
51
+ source_map={c.address: [datasource.identifier] for c in outputs},
52
52
  )
53
53
 
54
54
  merged = a + b
@@ -30,6 +30,19 @@ def test_in():
30
30
  rendered = BaseDialect().render_expr(right)
31
31
  assert rendered.strip() == "( 1,2,3 )".strip()
32
32
 
33
+ _, parsed = parse_text(
34
+ "const order_id <- 3; SELECT order_id WHERE order_id IN (1);"
35
+ )
36
+ query = parsed[-1]
37
+ right = query.where_clause.conditional.right
38
+ assert isinstance(
39
+ right,
40
+ Parenthetical,
41
+ ), type(right)
42
+ assert right.content == 1
43
+ rendered = BaseDialect().render_expr(right)
44
+ assert rendered.strip() == "( 1 )".strip()
45
+
33
46
 
34
47
  def test_not_in():
35
48
  _, parsed = parse_text(
@@ -160,6 +173,28 @@ select
160
173
  assert env.concepts[name].keys == (env.concepts["id"],)
161
174
 
162
175
 
176
+ def test_purpose_and_derivation():
177
+ env, parsed = parse_text(
178
+ """key id int;
179
+ key other_id int;
180
+ property <id, other_id>.join_id <- id*10+other_id;
181
+
182
+
183
+ select
184
+ join_id
185
+ ;
186
+ """
187
+ )
188
+
189
+ for name in ["join_id"]:
190
+ assert name in env.concepts
191
+ assert env.concepts[name].purpose == Purpose.PROPERTY
192
+ assert env.concepts[name].keys == (
193
+ env.concepts["id"],
194
+ env.concepts["other_id"],
195
+ )
196
+
197
+
163
198
  def test_output_purpose():
164
199
 
165
200
  env, parsed = parse_text(
@@ -4,6 +4,6 @@ from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
5
  from trilogy.constants import CONFIG
6
6
 
7
- __version__ = "0.0.1.109"
7
+ __version__ = "0.0.1.111"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
@@ -1,8 +1,8 @@
1
1
  from logging import getLogger
2
- from dataclasses import dataclass
2
+ from dataclasses import dataclass, field
3
3
  from enum import Enum
4
4
 
5
- logger = getLogger("preql")
5
+ logger = getLogger("trilogy")
6
6
 
7
7
  DEFAULT_NAMESPACE = "local"
8
8
 
@@ -18,12 +18,20 @@ class MagicConstants(Enum):
18
18
  NULL_VALUE = MagicConstants.NULL
19
19
 
20
20
 
21
+ @dataclass
22
+ class Optimizations:
23
+ predicate_pushdown: bool = True
24
+ datasource_inlining: bool = True
25
+ direct_return: bool = True
26
+
27
+
21
28
  # TODO: support loading from environments
22
29
  @dataclass
23
30
  class Config:
24
31
  strict_mode: bool = True
25
32
  human_identifiers: bool = True
26
- inline_datasources: bool = True
33
+ validate_missing: bool = True
34
+ optimizations: Optimizations = field(default_factory=Optimizations)
27
35
 
28
36
 
29
37
  CONFIG = Config()
@@ -263,6 +263,7 @@ class SourceType(Enum):
263
263
  WINDOW = "window"
264
264
  UNNEST = "unnest"
265
265
  CONSTANT = "constant"
266
+ ROWSET = "rowset"
266
267
 
267
268
 
268
269
  class ShowCategory(Enum):
@@ -33,7 +33,13 @@ from pydantic import (
33
33
  )
34
34
  from lark.tree import Meta
35
35
  from pathlib import Path
36
- from trilogy.constants import logger, DEFAULT_NAMESPACE, ENV_CACHE_NAME, MagicConstants
36
+ from trilogy.constants import (
37
+ logger,
38
+ DEFAULT_NAMESPACE,
39
+ ENV_CACHE_NAME,
40
+ MagicConstants,
41
+ CONFIG,
42
+ )
37
43
  from trilogy.core.constants import (
38
44
  ALL_ROWS_CONCEPT,
39
45
  INTERNAL_NAMESPACE,
@@ -61,7 +67,6 @@ from trilogy.core.enums import (
61
67
  from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
62
68
  from trilogy.utility import unique
63
69
  from collections import UserList
64
- from trilogy.utility import string_to_hash
65
70
  from functools import cached_property
66
71
  from abc import ABC
67
72
 
@@ -129,7 +134,7 @@ class ConceptArgs(ABC):
129
134
  raise NotImplementedError
130
135
 
131
136
  @property
132
- def existence_arguments(self) -> List["Concept"]:
137
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
133
138
  return []
134
139
 
135
140
  @property
@@ -281,9 +286,6 @@ class Concept(Namespaced, SelectGrain, BaseModel):
281
286
  MultiSelectStatement | MergeStatement,
282
287
  ]
283
288
  ] = None
284
- # lineage: Annotated[Optional[
285
- # Union[Function, WindowItem, FilterItem, AggregateWrapper]
286
- # ], WrapValidator(lineage_validator)] = None
287
289
  namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
288
290
  keys: Optional[Tuple["Concept", ...]] = None
289
291
  grain: "Grain" = Field(default=None, validate_default=True)
@@ -621,6 +623,12 @@ class Grain(BaseModel):
621
623
  if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
622
624
  if all([c in v2 for c in sub.keys]):
623
625
  continue
626
+ elif sub.derivation == PurposeLineage.MERGE and isinstance(
627
+ sub.lineage, MergeStatement
628
+ ):
629
+ parents = sub.lineage.concepts
630
+ if any([p in v2 for p in parents]):
631
+ continue
624
632
  final.append(sub)
625
633
  v2 = sorted(final, key=lambda x: x.name)
626
634
  return v2
@@ -966,23 +974,6 @@ class ConceptTransform(Namespaced, BaseModel):
966
974
  modifiers=self.modifiers,
967
975
  )
968
976
 
969
- def with_filter(self, where: "WhereClause") -> "ConceptTransform":
970
- id_hash = string_to_hash(str(where))
971
- new_parent_concept = Concept(
972
- name=f"_anon_concept_transform_filter_input_{id_hash}",
973
- datatype=self.output.datatype,
974
- purpose=self.output.purpose,
975
- lineage=self.output.lineage,
976
- namespace=DEFAULT_NAMESPACE,
977
- grain=self.output.grain,
978
- keys=self.output.keys,
979
- )
980
- new_parent = FilterItem(content=new_parent_concept, where=where)
981
- self.output.lineage = new_parent
982
- return ConceptTransform(
983
- function=new_parent, output=self.output, modifiers=self.modifiers
984
- )
985
-
986
977
 
987
978
  class Window(BaseModel):
988
979
  count: int
@@ -1611,13 +1602,15 @@ class Datasource(Namespaced, BaseModel):
1611
1602
  def __add__(self, other):
1612
1603
  if not other == self:
1613
1604
  raise ValueError(
1614
- "Attempted to add two datasources that are not identical, this should"
1615
- " never happen"
1605
+ "Attempted to add two datasources that are not identical, this is not a valid operation"
1616
1606
  )
1617
1607
  return self
1618
1608
 
1609
+ def __repr__(self):
1610
+ return f"Datasource<{self.namespace}.{self.identifier}@<{self.grain}>"
1611
+
1619
1612
  def __str__(self):
1620
- return f"{self.namespace}.{self.identifier}@<{self.grain}>"
1613
+ return self.__repr__()
1621
1614
 
1622
1615
  def __hash__(self):
1623
1616
  return (self.namespace + self.identifier).__hash__()
@@ -1786,6 +1779,7 @@ class QueryDatasource(BaseModel):
1786
1779
  input_concepts: List[Concept]
1787
1780
  output_concepts: List[Concept]
1788
1781
  source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
1782
+
1789
1783
  datasources: List[Union[Datasource, "QueryDatasource"]]
1790
1784
  grain: Grain
1791
1785
  joins: List[BaseJoin | UnnestJoin]
@@ -1799,6 +1793,12 @@ class QueryDatasource(BaseModel):
1799
1793
  join_derived_concepts: List[Concept] = Field(default_factory=list)
1800
1794
  hidden_concepts: List[Concept] = Field(default_factory=list)
1801
1795
  force_group: bool | None = None
1796
+ existence_source_map: Dict[str, Set[Union[Datasource, "QueryDatasource"]]] = Field(
1797
+ default_factory=dict
1798
+ )
1799
+
1800
+ def __repr__(self):
1801
+ return f"{self.identifier}@<{self.grain}>"
1802
1802
 
1803
1803
  @property
1804
1804
  def non_partial_concept_addresses(self) -> List[str]:
@@ -1841,14 +1841,14 @@ class QueryDatasource(BaseModel):
1841
1841
  for k, _ in v.items():
1842
1842
  seen.add(k)
1843
1843
  for x in expected:
1844
- if x not in seen:
1844
+ if x not in seen and CONFIG.validate_missing:
1845
1845
  raise SyntaxError(
1846
1846
  f"source map missing {x} on (expected {expected}, have {seen})"
1847
1847
  )
1848
1848
  return v
1849
1849
 
1850
1850
  def __str__(self):
1851
- return f"{self.identifier}@<{self.grain}>"
1851
+ return self.__repr__()
1852
1852
 
1853
1853
  def __hash__(self):
1854
1854
  return (self.identifier).__hash__()
@@ -1941,6 +1941,9 @@ class QueryDatasource(BaseModel):
1941
1941
  ),
1942
1942
  join_derived_concepts=self.join_derived_concepts,
1943
1943
  force_group=self.force_group,
1944
+ hidden_concepts=unique(
1945
+ self.hidden_concepts + other.hidden_concepts, "address"
1946
+ ),
1944
1947
  )
1945
1948
 
1946
1949
  return qds
@@ -2007,10 +2010,11 @@ class CTE(BaseModel):
2007
2010
  name: str
2008
2011
  source: "QueryDatasource"
2009
2012
  output_columns: List[Concept]
2010
- source_map: Dict[str, str | list[str]]
2013
+ source_map: Dict[str, list[str]]
2011
2014
  grain: Grain
2012
2015
  base: bool = False
2013
2016
  group_to_grain: bool = False
2017
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
2014
2018
  parent_ctes: List["CTE"] = Field(default_factory=list)
2015
2019
  joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
2016
2020
  condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
@@ -2021,6 +2025,7 @@ class CTE(BaseModel):
2021
2025
  limit: Optional[int] = None
2022
2026
  requires_nesting: bool = True
2023
2027
  base_name_override: Optional[str] = None
2028
+ base_alias_override: Optional[str] = None
2024
2029
 
2025
2030
  @computed_field # type: ignore
2026
2031
  @property
@@ -2031,7 +2036,7 @@ class CTE(BaseModel):
2031
2036
  def validate_output_columns(cls, v):
2032
2037
  return unique(v, "address")
2033
2038
 
2034
- def inline_parent_datasource(self, parent: CTE) -> bool:
2039
+ def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
2035
2040
  qds_being_inlined = parent.source
2036
2041
  ds_being_inlined = qds_being_inlined.datasources[0]
2037
2042
  if not isinstance(ds_being_inlined, Datasource):
@@ -2047,6 +2052,7 @@ class CTE(BaseModel):
2047
2052
  # need to identify this before updating joins
2048
2053
  if self.base_name == parent.name:
2049
2054
  self.base_name_override = ds_being_inlined.safe_location
2055
+ self.base_alias_override = ds_being_inlined.identifier
2050
2056
 
2051
2057
  for join in self.joins:
2052
2058
  if isinstance(join, InstantiatedUnnestJoin):
@@ -2063,6 +2069,8 @@ class CTE(BaseModel):
2063
2069
  elif v == parent.name:
2064
2070
  self.source_map[k] = ds_being_inlined.name
2065
2071
  self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
2072
+ if force_group:
2073
+ self.group_to_grain = True
2066
2074
  return True
2067
2075
 
2068
2076
  def __add__(self, other: "CTE"):
@@ -2101,6 +2109,9 @@ class CTE(BaseModel):
2101
2109
  self.source.output_concepts = unique(
2102
2110
  self.source.output_concepts + other.source.output_concepts, "address"
2103
2111
  )
2112
+ self.hidden_concepts = unique(
2113
+ self.hidden_concepts + other.hidden_concepts, "address"
2114
+ )
2104
2115
  return self
2105
2116
 
2106
2117
  @property
@@ -2120,9 +2131,6 @@ class CTE(BaseModel):
2120
2131
  if self.base_name_override:
2121
2132
  return self.base_name_override
2122
2133
  # if this cte selects from a single datasource, select right from it
2123
- valid_joins: List[Join] = [
2124
- join for join in self.joins if isinstance(join, Join)
2125
- ]
2126
2134
  if self.is_root_datasource:
2127
2135
  return self.source.datasources[0].safe_location
2128
2136
 
@@ -2130,33 +2138,16 @@ class CTE(BaseModel):
2130
2138
  # as the root
2131
2139
  elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
2132
2140
  return self.parent_ctes[0].name
2133
- elif valid_joins and len(valid_joins) > 0:
2134
- candidates = [x.left_cte.name for x in valid_joins]
2135
- disallowed = [x.right_cte.name for x in valid_joins]
2136
- try:
2137
- return [y for y in candidates if y not in disallowed][0]
2138
- except IndexError:
2139
- raise SyntaxError(
2140
- f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in self.parent_ctes]}"
2141
- )
2142
2141
  elif self.relevant_base_ctes:
2143
2142
  return self.relevant_base_ctes[0].name
2144
- elif self.parent_ctes:
2145
- raise SyntaxError(
2146
- f"{self.name} has no relevant base CTEs, {self.source_map},"
2147
- f" {[x.name for x in self.parent_ctes]}, outputs"
2148
- f" {[x.address for x in self.output_columns]}"
2149
- )
2150
2143
  return self.source.name
2151
2144
 
2152
2145
  @property
2153
2146
  def base_alias(self) -> str:
2154
-
2147
+ if self.base_alias_override:
2148
+ return self.base_alias_override
2155
2149
  if self.is_root_datasource:
2156
2150
  return self.source.datasources[0].identifier
2157
- relevant_joins = [j for j in self.joins if isinstance(j, Join)]
2158
- if relevant_joins:
2159
- return relevant_joins[0].left_cte.name
2160
2151
  elif self.relevant_base_ctes:
2161
2152
  return self.relevant_base_ctes[0].name
2162
2153
  elif self.parent_ctes:
@@ -2486,9 +2477,17 @@ class Environment(BaseModel):
2486
2477
  for datasource in self.datasources.values():
2487
2478
  for concept in datasource.output_concepts:
2488
2479
  concrete_addresses.add(concept.address)
2480
+ current_mat = [x.address for x in self.materialized_concepts]
2489
2481
  self.materialized_concepts = [
2490
2482
  c for c in self.concepts.values() if c.address in concrete_addresses
2491
2483
  ]
2484
+ new = [
2485
+ x.address
2486
+ for x in self.materialized_concepts
2487
+ if x.address not in current_mat
2488
+ ]
2489
+ if new:
2490
+ logger.info(f"Environment added new materialized concepts {new}")
2492
2491
  for concept in self.concepts.values():
2493
2492
  if concept.derivation == PurposeLineage.MERGE:
2494
2493
  ms = concept.lineage
@@ -2647,6 +2646,17 @@ class Environment(BaseModel):
2647
2646
  self.gen_concept_list_caches()
2648
2647
  return datasource
2649
2648
 
2649
+ def delete_datasource(
2650
+ self,
2651
+ address: str,
2652
+ meta: Meta | None = None,
2653
+ ) -> bool:
2654
+ if address in self.datasources:
2655
+ del self.datasources[address]
2656
+ self.gen_concept_list_caches()
2657
+ return True
2658
+ return False
2659
+
2650
2660
 
2651
2661
  class LazyEnvironment(Environment):
2652
2662
  """Variant of environment to defer parsing of a path"""
@@ -2728,6 +2738,9 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2728
2738
  def __repr__(self):
2729
2739
  return f"{str(self.left)} {self.operator.value} {str(self.right)}"
2730
2740
 
2741
+ def __str__(self):
2742
+ return self.__repr__()
2743
+
2731
2744
  def with_namespace(self, namespace: str):
2732
2745
  return self.__class__(
2733
2746
  left=(
@@ -2750,11 +2763,8 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2750
2763
  if isinstance(self.left, SelectGrain)
2751
2764
  else self.left
2752
2765
  ),
2753
- right=(
2754
- self.right.with_select_grain(grain)
2755
- if isinstance(self.right, SelectGrain)
2756
- else self.right
2757
- ),
2766
+ # the right side does NOT need to inherit select grain
2767
+ right=self.right,
2758
2768
  operator=self.operator,
2759
2769
  )
2760
2770
 
@@ -2800,8 +2810,8 @@ class SubselectComparison(Comparison):
2800
2810
  return get_concept_arguments(self.left)
2801
2811
 
2802
2812
  @property
2803
- def existence_arguments(self) -> List[Concept]:
2804
- return get_concept_arguments(self.right)
2813
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
2814
+ return [tuple(get_concept_arguments(self.right))]
2805
2815
 
2806
2816
  def with_select_grain(self, grain: Grain):
2807
2817
  # there's no need to pass the select grain through to a subselect comparison
@@ -2993,18 +3003,26 @@ class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2993
3003
  return output
2994
3004
 
2995
3005
  @property
2996
- def existence_arguments(self) -> List[Concept]:
3006
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
2997
3007
  output = []
2998
3008
  if isinstance(self.left, ConceptArgs):
2999
3009
  output += self.left.existence_arguments
3000
- else:
3001
- output += get_concept_arguments(self.left)
3002
3010
  if isinstance(self.right, ConceptArgs):
3003
3011
  output += self.right.existence_arguments
3004
- else:
3005
- output += get_concept_arguments(self.right)
3006
3012
  return output
3007
3013
 
3014
+ def decompose(self):
3015
+ chunks = []
3016
+ if self.operator == BooleanOperator.AND:
3017
+ for val in [self.left, self.right]:
3018
+ if isinstance(val, Conditional):
3019
+ chunks.extend(val.decompose())
3020
+ else:
3021
+ chunks.append(val)
3022
+ else:
3023
+ chunks.append(self)
3024
+ return chunks
3025
+
3008
3026
 
3009
3027
  class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
3010
3028
  function: Function
@@ -3064,7 +3082,7 @@ class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3064
3082
  return self.conditional.row_arguments
3065
3083
 
3066
3084
  @property
3067
- def existence_arguments(self) -> List[Concept]:
3085
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
3068
3086
  return self.conditional.existence_arguments
3069
3087
 
3070
3088
  def with_namespace(self, namespace: str) -> WhereClause:
@@ -3305,10 +3323,10 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3305
3323
  return self.concept_arguments
3306
3324
 
3307
3325
  @property
3308
- def existence_arguments(self) -> List[Concept]:
3326
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
3309
3327
  if isinstance(self.content, ConceptArgs):
3310
3328
  return self.content.existence_arguments
3311
- return self.concept_arguments
3329
+ return []
3312
3330
 
3313
3331
  @property
3314
3332
  def input(self):
@@ -3377,6 +3395,12 @@ Function.model_rebuild()
3377
3395
  Grain.model_rebuild()
3378
3396
 
3379
3397
 
3398
+ def list_to_wrapper(args):
3399
+ types = [arg_to_datatype(arg) for arg in args]
3400
+ assert len(set(types)) == 1
3401
+ return ListWrapper(args, type=types[0])
3402
+
3403
+
3380
3404
  def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
3381
3405
  if isinstance(arg, Function):
3382
3406
  return arg.output_datatype
@@ -3400,5 +3424,8 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
3400
3424
  if arg.type in (WindowType.RANK, WindowType.ROW_NUMBER):
3401
3425
  return DataType.INTEGER
3402
3426
  return arg_to_datatype(arg.content)
3427
+ elif isinstance(arg, list):
3428
+ wrapper = list_to_wrapper(arg)
3429
+ return ListType(type=wrapper.type)
3403
3430
  else:
3404
3431
  raise ValueError(f"Cannot parse arg datatype for arg of raw type {type(arg)}")