pytrilogy 0.0.1.110__py3-none-any.whl → 0.0.1.111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (33) hide show
  1. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/RECORD +33 -33
  3. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +1 -1
  6. trilogy/core/models.py +85 -67
  7. trilogy/core/optimization.py +23 -8
  8. trilogy/core/processing/concept_strategies_v3.py +44 -19
  9. trilogy/core/processing/node_generators/basic_node.py +2 -0
  10. trilogy/core/processing/node_generators/common.py +3 -1
  11. trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
  12. trilogy/core/processing/node_generators/filter_node.py +36 -6
  13. trilogy/core/processing/node_generators/node_merge_node.py +34 -23
  14. trilogy/core/processing/node_generators/rowset_node.py +30 -6
  15. trilogy/core/processing/node_generators/select_node.py +23 -9
  16. trilogy/core/processing/node_generators/unnest_node.py +24 -3
  17. trilogy/core/processing/node_generators/window_node.py +4 -2
  18. trilogy/core/processing/nodes/__init__.py +7 -6
  19. trilogy/core/processing/nodes/base_node.py +40 -6
  20. trilogy/core/processing/nodes/filter_node.py +15 -1
  21. trilogy/core/processing/nodes/group_node.py +20 -1
  22. trilogy/core/processing/nodes/merge_node.py +36 -7
  23. trilogy/core/processing/nodes/select_node_v2.py +34 -39
  24. trilogy/core/processing/nodes/unnest_node.py +12 -0
  25. trilogy/core/processing/nodes/window_node.py +11 -0
  26. trilogy/core/processing/utility.py +0 -14
  27. trilogy/core/query_processor.py +125 -29
  28. trilogy/dialect/base.py +45 -40
  29. trilogy/executor.py +31 -3
  30. trilogy/parsing/parse_engine.py +49 -17
  31. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/LICENSE.md +0 -0
  32. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/entry_points.txt +0 -0
  33. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.110
3
+ Version: 0.0.1.111
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -1,8 +1,8 @@
1
- trilogy/__init__.py,sha256=zz6RmV7fp8n1Ezl-E5jUW92Ns-EMfac6jy3fX5zQuzc,292
1
+ trilogy/__init__.py,sha256=PNtNelxhMDftdgkjjOKNn49l5DhtOeAgkI93YI77r64,292
2
2
  trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- trilogy/constants.py,sha256=C6mwa0BpVyENkcWi-dwqHorCP85ScTXDemfHFiSmLQ8,737
3
+ trilogy/constants.py,sha256=DJi3ESttmvqgy6fPRXiaQzqJVye6jYwf6XM89NHv0_M,735
4
4
  trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
5
- trilogy/executor.py,sha256=xF6wzbhP6a3wz4nrxsRCKeKF7qytUQEL75oI3BGJ2hQ,8744
5
+ trilogy/executor.py,sha256=_ZbjrKsUdWL52tWgpxqZnmccAuPXcIPEPN_dDSLNeAQ,9696
6
6
  trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
7
7
  trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
@@ -16,36 +16,36 @@ trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,56
16
16
  trilogy/core/functions.py,sha256=zkRReytiotOBAW-a3Ri5eoejZDYTt2-7Op80ZxZxUmw,9129
17
17
  trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
18
18
  trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
19
- trilogy/core/models.py,sha256=FNAMbqJrHh-KfC8QbBe3anWVdNxP-uMjs--MrbJM8QM,109943
20
- trilogy/core/optimization.py,sha256=5n5HMGGdTGkDZf479lZPpv2angLSxklZJ0D4DBrFDeA,8411
21
- trilogy/core/query_processor.py,sha256=6BqLYPwyFkRtueTIRFZi3IcVFTpbpGRNowayhSn3_AY,11805
19
+ trilogy/core/models.py,sha256=EDtmcDKNBUBc--jIwWtk2vkQM2Q7heuZ0VH7JF_M32s,109985
20
+ trilogy/core/optimization.py,sha256=B_EuAqHmJbuJiGyBfrC66FB_YPsGg-nbfnV8FjqfP6Q,9097
21
+ trilogy/core/query_processor.py,sha256=clIRJ6IcsqIVBPKFsxt8bqCLsLyajvAu02MUIcKQhTo,15713
22
22
  trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- trilogy/core/processing/concept_strategies_v3.py,sha256=27lZXFLgDEF3sh2MUR7HX_atVz7TC1fJB7z3oxa1TcY,22610
23
+ trilogy/core/processing/concept_strategies_v3.py,sha256=MYrpNMidqvPOg123RekOcqVTjcj03i_538gBo0MzoWE,23432
24
24
  trilogy/core/processing/graph_utils.py,sha256=ulCJ4hYAISbUxLD6VM2fah9RBPGIXSEHEPeRBSFl0Rs,1197
25
- trilogy/core/processing/utility.py,sha256=Gk35HgyIG2SSUyI5OHZcB0bw1PZUVC_aNc9Sre6xPQU,10535
25
+ trilogy/core/processing/utility.py,sha256=acxH5448-j8JXqxMRibyAxjz1Wqu7QudbR0PfMuucww,9902
26
26
  trilogy/core/processing/node_generators/__init__.py,sha256=LIs6uBEum8LDc-26zjyAwjxa-ay2ok9tKtPjDNvbVkE,757
27
- trilogy/core/processing/node_generators/basic_node.py,sha256=tVPmg0r0kDdABkmn6z4sxsk1hKy9yTT_Xvl1eVN2Zck,2162
28
- trilogy/core/processing/node_generators/common.py,sha256=A0zB4xr1etbEexaiSH6mVTecXY_wd7pSwWUGUt-u0eg,8882
29
- trilogy/core/processing/node_generators/concept_merge_node.py,sha256=TRbOIjLWfLB0Nl6YmMV1ao0qhPP6OQDd9M3UViWkCBU,6621
30
- trilogy/core/processing/node_generators/filter_node.py,sha256=CGALiTzKhPAvXPFAguIQfjf6I3pjlafY0uaaM9MTkIE,3414
27
+ trilogy/core/processing/node_generators/basic_node.py,sha256=HJnIhZLgkUdorKYcofe-QnKSM3Lf_3QO91cbSJhsqf4,2242
28
+ trilogy/core/processing/node_generators/common.py,sha256=liZDth7mvhkF_sUFXK7JitJsiaKD132w3ySLbF7l-nE,8956
29
+ trilogy/core/processing/node_generators/concept_merge_node.py,sha256=x4M8VVZZmBcqHDY1uq7M9KGKCBwjU6mcE_x2BOEk2Mg,7328
30
+ trilogy/core/processing/node_generators/filter_node.py,sha256=y_tqYe2So18vWHASMwVPLzDO-PnyQCO-MAlI4B-rY3Y,4526
31
31
  trilogy/core/processing/node_generators/group_node.py,sha256=xWI1xNIXEOj6jlRGD9hcv2_vVNvY6lpzJl6pQ8HuFBE,2988
32
32
  trilogy/core/processing/node_generators/group_to_node.py,sha256=BzPdYwzoo8gRMH7BDffTTXq4z-mjfCEzvfB5I-P0_nw,2941
33
33
  trilogy/core/processing/node_generators/multiselect_node.py,sha256=vP84dnLQy6dtypi6mUbt9sMAcmmrTgQ1Oz4GI6X1IEo,6421
34
- trilogy/core/processing/node_generators/node_merge_node.py,sha256=sQQ9jhw1oAJh649DBAJX6U7r_E_piFS95mxKvm7pxqQ,5818
35
- trilogy/core/processing/node_generators/rowset_node.py,sha256=BYTpXyiFJwoFp_n8kKE-HUdnwgaK_58n0rSFOz2jOVM,5141
36
- trilogy/core/processing/node_generators/select_node.py,sha256=xeCqIUEubrf3u_QQfbGdf1BG4fO0HYQ64hiFur8NUqY,20080
37
- trilogy/core/processing/node_generators/unnest_node.py,sha256=s1VXQZSf1LnX3ISeQ5JzmzmCKUw30-5OK_f0YTB9_48,1031
38
- trilogy/core/processing/node_generators/window_node.py,sha256=ekazi5eXxnShpcp-qukXNG4DHFdULoXrX-YWUWLNEpM,2527
39
- trilogy/core/processing/nodes/__init__.py,sha256=gzKxGSduIQ5QwpMWrmwSYiE8sg2mWejwVn0VvjYc6s0,3879
40
- trilogy/core/processing/nodes/base_node.py,sha256=Du7hRjVVOAiGb0okytzKIa_TQqhwTNYGU8PGNnrE1xs,9142
41
- trilogy/core/processing/nodes/filter_node.py,sha256=DqSRv8voEajPZqzeeiIsxuv4ubvsmeQcCW6x_v2CmOk,1359
42
- trilogy/core/processing/nodes/group_node.py,sha256=Y_NWB_AwFrE-YithjZ7lYYDN4e0el4su3ICq2EIr3HA,3837
43
- trilogy/core/processing/nodes/merge_node.py,sha256=uo1AfLjA02EiIpJku8T0TBjN2D-IyP6NVq5m7BfiZbQ,12413
44
- trilogy/core/processing/nodes/select_node_v2.py,sha256=tAADeVruch-flFiedbY1zi7ukMG2RpWecvxxZ5aL3ZU,6354
45
- trilogy/core/processing/nodes/unnest_node.py,sha256=t4kY3a_dR3iXistPemStfdw0uJfnxwTcoQg1HiDa3xo,1501
46
- trilogy/core/processing/nodes/window_node.py,sha256=QjAWgqBZqFSRCPwc7JBmgQJobWW50rsHI0pjJe0Zzg0,926
34
+ trilogy/core/processing/node_generators/node_merge_node.py,sha256=wNDHAbRrKSjsns-EROM_G12mRyOMjbcWpYav2uefXOE,6045
35
+ trilogy/core/processing/node_generators/rowset_node.py,sha256=eNG6rfLifUKraoRGxE8pesQMy5cKT6R5XNIaa3Wuiwk,6081
36
+ trilogy/core/processing/node_generators/select_node.py,sha256=Qb00Kizsv-877UMkGfusl5jXKXMZtZTtLks5pxU07SU,20698
37
+ trilogy/core/processing/node_generators/unnest_node.py,sha256=6CH66eGwpadNX7TzUhWZ8aqIisOtQeHINbLV6X3QBUk,1779
38
+ trilogy/core/processing/node_generators/window_node.py,sha256=9nXUXUgQrNczU1gaOqhOZPNzCUxw-lkxt0R7HORI6ss,2582
39
+ trilogy/core/processing/nodes/__init__.py,sha256=baODkJfvUoWEEbu843GEd7snubwLeOG5FQ8l-CwIaC8,3928
40
+ trilogy/core/processing/nodes/base_node.py,sha256=yhjmsAUmhHDqgbQjz_9YdfP-M5pj4xbrPRDF6Y4XVuw,10498
41
+ trilogy/core/processing/nodes/filter_node.py,sha256=rDw4vfE6tqWxuKT0arihVmIOoOWDDCyzRA-2yONX_Ek,1860
42
+ trilogy/core/processing/nodes/group_node.py,sha256=vzeU9J4xMhRrPj4-KPJTgNbH-KFu2ZS8b57SOynsdw0,4448
43
+ trilogy/core/processing/nodes/merge_node.py,sha256=FvSiTWKOzaUsXBkf6wJD8QQqQxp_aphS_I5VzNRw8Yo,13600
44
+ trilogy/core/processing/nodes/select_node_v2.py,sha256=ERCflBFzKpD5SzweMevnJLyQnxmF_-IQ6VRu5yVeiBg,6552
45
+ trilogy/core/processing/nodes/unnest_node.py,sha256=JFtm90IVM-46aCYkTNIaJah6v9ApAfonjVhcVM1HmDE,1903
46
+ trilogy/core/processing/nodes/window_node.py,sha256=X7qxLUKd3tekjUUsmH_4vz5b-U89gMnGd04VBxuu2Ns,1280
47
47
  trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- trilogy/dialect/base.py,sha256=xPB5mh6471VJLHxNdXrYvi7q7vJC_tioVR1LrLcoZc0,29394
48
+ trilogy/dialect/base.py,sha256=ii9P_OO8BhKsQVAr9A13rhx_dzRZd4wxnkL-Ul5OS74,30398
49
49
  trilogy/dialect/bigquery.py,sha256=9vxQn2BMv_oTGQSWQpoN5ho_OgqMWaHH9e-5vQVf44c,2906
50
50
  trilogy/dialect/common.py,sha256=zWrYmvevlXznocw9uGHmY5Ws1rp_kICm9zA_ulTe4eg,2165
51
51
  trilogy/dialect/config.py,sha256=tLVEMctaTDhUgARKXUNfHUcIolGaALkQ0RavUvXAY4w,2994
@@ -65,13 +65,13 @@ trilogy/parsing/common.py,sha256=lz0IyVA8v-u-DGFgzkmdb4_00I--Kegmo9HNF7CrajI,579
65
65
  trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
66
66
  trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
67
67
  trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
68
- trilogy/parsing/parse_engine.py,sha256=iOqKUCyLeHyFVwwAt-XTSJGHia4zzLUN6bYDuIfJ1Pg,63938
68
+ trilogy/parsing/parse_engine.py,sha256=Xxqyx0MLRWIcjU55jRao1XHEZ5SunhbZIPhJD9-urlE,65008
69
69
  trilogy/parsing/render.py,sha256=fxjpq2FZLgllw_d4cru-t_IXNPAz2DmYkT7v9ED0XRI,11540
70
70
  trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
71
  trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
72
- pytrilogy-0.0.1.110.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
73
- pytrilogy-0.0.1.110.dist-info/METADATA,sha256=FbH2jc_Eg9QjfxlbLhTAqBzFoCxe4aM-6EbE4HDC4TM,7882
74
- pytrilogy-0.0.1.110.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
75
- pytrilogy-0.0.1.110.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
76
- pytrilogy-0.0.1.110.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
77
- pytrilogy-0.0.1.110.dist-info/RECORD,,
72
+ pytrilogy-0.0.1.111.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
73
+ pytrilogy-0.0.1.111.dist-info/METADATA,sha256=OoRPUSENsnE0Qd1-nOtrbHz2T-izBPb_hJV4jMvlIDw,7882
74
+ pytrilogy-0.0.1.111.dist-info/WHEEL,sha256=rWxmBtp7hEUqVLOnTaDOPpR-cZpCDkzhhcBce-Zyd5k,91
75
+ pytrilogy-0.0.1.111.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
76
+ pytrilogy-0.0.1.111.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
77
+ pytrilogy-0.0.1.111.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.3.0)
2
+ Generator: setuptools (71.0.4)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
trilogy/__init__.py CHANGED
@@ -4,6 +4,6 @@ from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
5
  from trilogy.constants import CONFIG
6
6
 
7
- __version__ = "0.0.1.110"
7
+ __version__ = "0.0.1.111"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
trilogy/constants.py CHANGED
@@ -30,7 +30,7 @@ class Optimizations:
30
30
  class Config:
31
31
  strict_mode: bool = True
32
32
  human_identifiers: bool = True
33
- inline_datasources: bool = True
33
+ validate_missing: bool = True
34
34
  optimizations: Optimizations = field(default_factory=Optimizations)
35
35
 
36
36
 
trilogy/core/models.py CHANGED
@@ -33,7 +33,13 @@ from pydantic import (
33
33
  )
34
34
  from lark.tree import Meta
35
35
  from pathlib import Path
36
- from trilogy.constants import logger, DEFAULT_NAMESPACE, ENV_CACHE_NAME, MagicConstants
36
+ from trilogy.constants import (
37
+ logger,
38
+ DEFAULT_NAMESPACE,
39
+ ENV_CACHE_NAME,
40
+ MagicConstants,
41
+ CONFIG,
42
+ )
37
43
  from trilogy.core.constants import (
38
44
  ALL_ROWS_CONCEPT,
39
45
  INTERNAL_NAMESPACE,
@@ -61,7 +67,6 @@ from trilogy.core.enums import (
61
67
  from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
62
68
  from trilogy.utility import unique
63
69
  from collections import UserList
64
- from trilogy.utility import string_to_hash
65
70
  from functools import cached_property
66
71
  from abc import ABC
67
72
 
@@ -129,7 +134,7 @@ class ConceptArgs(ABC):
129
134
  raise NotImplementedError
130
135
 
131
136
  @property
132
- def existence_arguments(self) -> List["Concept"]:
137
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
133
138
  return []
134
139
 
135
140
  @property
@@ -281,9 +286,6 @@ class Concept(Namespaced, SelectGrain, BaseModel):
281
286
  MultiSelectStatement | MergeStatement,
282
287
  ]
283
288
  ] = None
284
- # lineage: Annotated[Optional[
285
- # Union[Function, WindowItem, FilterItem, AggregateWrapper]
286
- # ], WrapValidator(lineage_validator)] = None
287
289
  namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
288
290
  keys: Optional[Tuple["Concept", ...]] = None
289
291
  grain: "Grain" = Field(default=None, validate_default=True)
@@ -621,6 +623,12 @@ class Grain(BaseModel):
621
623
  if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
622
624
  if all([c in v2 for c in sub.keys]):
623
625
  continue
626
+ elif sub.derivation == PurposeLineage.MERGE and isinstance(
627
+ sub.lineage, MergeStatement
628
+ ):
629
+ parents = sub.lineage.concepts
630
+ if any([p in v2 for p in parents]):
631
+ continue
624
632
  final.append(sub)
625
633
  v2 = sorted(final, key=lambda x: x.name)
626
634
  return v2
@@ -966,23 +974,6 @@ class ConceptTransform(Namespaced, BaseModel):
966
974
  modifiers=self.modifiers,
967
975
  )
968
976
 
969
- def with_filter(self, where: "WhereClause") -> "ConceptTransform":
970
- id_hash = string_to_hash(str(where))
971
- new_parent_concept = Concept(
972
- name=f"_anon_concept_transform_filter_input_{id_hash}",
973
- datatype=self.output.datatype,
974
- purpose=self.output.purpose,
975
- lineage=self.output.lineage,
976
- namespace=DEFAULT_NAMESPACE,
977
- grain=self.output.grain,
978
- keys=self.output.keys,
979
- )
980
- new_parent = FilterItem(content=new_parent_concept, where=where)
981
- self.output.lineage = new_parent
982
- return ConceptTransform(
983
- function=new_parent, output=self.output, modifiers=self.modifiers
984
- )
985
-
986
977
 
987
978
  class Window(BaseModel):
988
979
  count: int
@@ -1611,13 +1602,15 @@ class Datasource(Namespaced, BaseModel):
1611
1602
  def __add__(self, other):
1612
1603
  if not other == self:
1613
1604
  raise ValueError(
1614
- "Attempted to add two datasources that are not identical, this should"
1615
- " never happen"
1605
+ "Attempted to add two datasources that are not identical, this is not a valid operation"
1616
1606
  )
1617
1607
  return self
1618
1608
 
1609
+ def __repr__(self):
1610
+ return f"Datasource<{self.namespace}.{self.identifier}@<{self.grain}>"
1611
+
1619
1612
  def __str__(self):
1620
- return f"{self.namespace}.{self.identifier}@<{self.grain}>"
1613
+ return self.__repr__()
1621
1614
 
1622
1615
  def __hash__(self):
1623
1616
  return (self.namespace + self.identifier).__hash__()
@@ -1786,6 +1779,7 @@ class QueryDatasource(BaseModel):
1786
1779
  input_concepts: List[Concept]
1787
1780
  output_concepts: List[Concept]
1788
1781
  source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
1782
+
1789
1783
  datasources: List[Union[Datasource, "QueryDatasource"]]
1790
1784
  grain: Grain
1791
1785
  joins: List[BaseJoin | UnnestJoin]
@@ -1799,6 +1793,12 @@ class QueryDatasource(BaseModel):
1799
1793
  join_derived_concepts: List[Concept] = Field(default_factory=list)
1800
1794
  hidden_concepts: List[Concept] = Field(default_factory=list)
1801
1795
  force_group: bool | None = None
1796
+ existence_source_map: Dict[str, Set[Union[Datasource, "QueryDatasource"]]] = Field(
1797
+ default_factory=dict
1798
+ )
1799
+
1800
+ def __repr__(self):
1801
+ return f"{self.identifier}@<{self.grain}>"
1802
1802
 
1803
1803
  @property
1804
1804
  def non_partial_concept_addresses(self) -> List[str]:
@@ -1841,14 +1841,14 @@ class QueryDatasource(BaseModel):
1841
1841
  for k, _ in v.items():
1842
1842
  seen.add(k)
1843
1843
  for x in expected:
1844
- if x not in seen:
1844
+ if x not in seen and CONFIG.validate_missing:
1845
1845
  raise SyntaxError(
1846
1846
  f"source map missing {x} on (expected {expected}, have {seen})"
1847
1847
  )
1848
1848
  return v
1849
1849
 
1850
1850
  def __str__(self):
1851
- return f"{self.identifier}@<{self.grain}>"
1851
+ return self.__repr__()
1852
1852
 
1853
1853
  def __hash__(self):
1854
1854
  return (self.identifier).__hash__()
@@ -2010,10 +2010,11 @@ class CTE(BaseModel):
2010
2010
  name: str
2011
2011
  source: "QueryDatasource"
2012
2012
  output_columns: List[Concept]
2013
- source_map: Dict[str, str | list[str]]
2013
+ source_map: Dict[str, list[str]]
2014
2014
  grain: Grain
2015
2015
  base: bool = False
2016
2016
  group_to_grain: bool = False
2017
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
2017
2018
  parent_ctes: List["CTE"] = Field(default_factory=list)
2018
2019
  joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
2019
2020
  condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
@@ -2024,6 +2025,7 @@ class CTE(BaseModel):
2024
2025
  limit: Optional[int] = None
2025
2026
  requires_nesting: bool = True
2026
2027
  base_name_override: Optional[str] = None
2028
+ base_alias_override: Optional[str] = None
2027
2029
 
2028
2030
  @computed_field # type: ignore
2029
2031
  @property
@@ -2034,7 +2036,7 @@ class CTE(BaseModel):
2034
2036
  def validate_output_columns(cls, v):
2035
2037
  return unique(v, "address")
2036
2038
 
2037
- def inline_parent_datasource(self, parent: CTE) -> bool:
2039
+ def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
2038
2040
  qds_being_inlined = parent.source
2039
2041
  ds_being_inlined = qds_being_inlined.datasources[0]
2040
2042
  if not isinstance(ds_being_inlined, Datasource):
@@ -2050,6 +2052,7 @@ class CTE(BaseModel):
2050
2052
  # need to identify this before updating joins
2051
2053
  if self.base_name == parent.name:
2052
2054
  self.base_name_override = ds_being_inlined.safe_location
2055
+ self.base_alias_override = ds_being_inlined.identifier
2053
2056
 
2054
2057
  for join in self.joins:
2055
2058
  if isinstance(join, InstantiatedUnnestJoin):
@@ -2066,6 +2069,8 @@ class CTE(BaseModel):
2066
2069
  elif v == parent.name:
2067
2070
  self.source_map[k] = ds_being_inlined.name
2068
2071
  self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
2072
+ if force_group:
2073
+ self.group_to_grain = True
2069
2074
  return True
2070
2075
 
2071
2076
  def __add__(self, other: "CTE"):
@@ -2126,9 +2131,6 @@ class CTE(BaseModel):
2126
2131
  if self.base_name_override:
2127
2132
  return self.base_name_override
2128
2133
  # if this cte selects from a single datasource, select right from it
2129
- valid_joins: List[Join] = [
2130
- join for join in self.joins if isinstance(join, Join)
2131
- ]
2132
2134
  if self.is_root_datasource:
2133
2135
  return self.source.datasources[0].safe_location
2134
2136
 
@@ -2136,33 +2138,16 @@ class CTE(BaseModel):
2136
2138
  # as the root
2137
2139
  elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
2138
2140
  return self.parent_ctes[0].name
2139
- elif valid_joins and len(valid_joins) > 0:
2140
- candidates = [x.left_cte.name for x in valid_joins]
2141
- disallowed = [x.right_cte.name for x in valid_joins]
2142
- try:
2143
- return [y for y in candidates if y not in disallowed][0]
2144
- except IndexError:
2145
- raise SyntaxError(
2146
- f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in self.parent_ctes]}"
2147
- )
2148
2141
  elif self.relevant_base_ctes:
2149
2142
  return self.relevant_base_ctes[0].name
2150
- elif self.parent_ctes:
2151
- raise SyntaxError(
2152
- f"{self.name} has no relevant base CTEs, {self.source_map},"
2153
- f" {[x.name for x in self.parent_ctes]}, outputs"
2154
- f" {[x.address for x in self.output_columns]}"
2155
- )
2156
2143
  return self.source.name
2157
2144
 
2158
2145
  @property
2159
2146
  def base_alias(self) -> str:
2160
-
2147
+ if self.base_alias_override:
2148
+ return self.base_alias_override
2161
2149
  if self.is_root_datasource:
2162
2150
  return self.source.datasources[0].identifier
2163
- relevant_joins = [j for j in self.joins if isinstance(j, Join)]
2164
- if relevant_joins:
2165
- return relevant_joins[0].left_cte.name
2166
2151
  elif self.relevant_base_ctes:
2167
2152
  return self.relevant_base_ctes[0].name
2168
2153
  elif self.parent_ctes:
@@ -2492,9 +2477,17 @@ class Environment(BaseModel):
2492
2477
  for datasource in self.datasources.values():
2493
2478
  for concept in datasource.output_concepts:
2494
2479
  concrete_addresses.add(concept.address)
2480
+ current_mat = [x.address for x in self.materialized_concepts]
2495
2481
  self.materialized_concepts = [
2496
2482
  c for c in self.concepts.values() if c.address in concrete_addresses
2497
2483
  ]
2484
+ new = [
2485
+ x.address
2486
+ for x in self.materialized_concepts
2487
+ if x.address not in current_mat
2488
+ ]
2489
+ if new:
2490
+ logger.info(f"Environment added new materialized concepts {new}")
2498
2491
  for concept in self.concepts.values():
2499
2492
  if concept.derivation == PurposeLineage.MERGE:
2500
2493
  ms = concept.lineage
@@ -2653,6 +2646,17 @@ class Environment(BaseModel):
2653
2646
  self.gen_concept_list_caches()
2654
2647
  return datasource
2655
2648
 
2649
+ def delete_datasource(
2650
+ self,
2651
+ address: str,
2652
+ meta: Meta | None = None,
2653
+ ) -> bool:
2654
+ if address in self.datasources:
2655
+ del self.datasources[address]
2656
+ self.gen_concept_list_caches()
2657
+ return True
2658
+ return False
2659
+
2656
2660
 
2657
2661
  class LazyEnvironment(Environment):
2658
2662
  """Variant of environment to defer parsing of a path"""
@@ -2759,11 +2763,8 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2759
2763
  if isinstance(self.left, SelectGrain)
2760
2764
  else self.left
2761
2765
  ),
2762
- right=(
2763
- self.right.with_select_grain(grain)
2764
- if isinstance(self.right, SelectGrain)
2765
- else self.right
2766
- ),
2766
+ # the right side does NOT need to inherit select grain
2767
+ right=self.right,
2767
2768
  operator=self.operator,
2768
2769
  )
2769
2770
 
@@ -2809,8 +2810,8 @@ class SubselectComparison(Comparison):
2809
2810
  return get_concept_arguments(self.left)
2810
2811
 
2811
2812
  @property
2812
- def existence_arguments(self) -> List[Concept]:
2813
- return get_concept_arguments(self.right)
2813
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
2814
+ return [tuple(get_concept_arguments(self.right))]
2814
2815
 
2815
2816
  def with_select_grain(self, grain: Grain):
2816
2817
  # there's no need to pass the select grain through to a subselect comparison
@@ -3002,18 +3003,26 @@ class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3002
3003
  return output
3003
3004
 
3004
3005
  @property
3005
- def existence_arguments(self) -> List[Concept]:
3006
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
3006
3007
  output = []
3007
3008
  if isinstance(self.left, ConceptArgs):
3008
3009
  output += self.left.existence_arguments
3009
- else:
3010
- output += get_concept_arguments(self.left)
3011
3010
  if isinstance(self.right, ConceptArgs):
3012
3011
  output += self.right.existence_arguments
3013
- else:
3014
- output += get_concept_arguments(self.right)
3015
3012
  return output
3016
3013
 
3014
+ def decompose(self):
3015
+ chunks = []
3016
+ if self.operator == BooleanOperator.AND:
3017
+ for val in [self.left, self.right]:
3018
+ if isinstance(val, Conditional):
3019
+ chunks.extend(val.decompose())
3020
+ else:
3021
+ chunks.append(val)
3022
+ else:
3023
+ chunks.append(self)
3024
+ return chunks
3025
+
3017
3026
 
3018
3027
  class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
3019
3028
  function: Function
@@ -3073,7 +3082,7 @@ class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3073
3082
  return self.conditional.row_arguments
3074
3083
 
3075
3084
  @property
3076
- def existence_arguments(self) -> List[Concept]:
3085
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
3077
3086
  return self.conditional.existence_arguments
3078
3087
 
3079
3088
  def with_namespace(self, namespace: str) -> WhereClause:
@@ -3314,10 +3323,10 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3314
3323
  return self.concept_arguments
3315
3324
 
3316
3325
  @property
3317
- def existence_arguments(self) -> List[Concept]:
3326
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
3318
3327
  if isinstance(self.content, ConceptArgs):
3319
3328
  return self.content.existence_arguments
3320
- return self.concept_arguments
3329
+ return []
3321
3330
 
3322
3331
  @property
3323
3332
  def input(self):
@@ -3386,6 +3395,12 @@ Function.model_rebuild()
3386
3395
  Grain.model_rebuild()
3387
3396
 
3388
3397
 
3398
+ def list_to_wrapper(args):
3399
+ types = [arg_to_datatype(arg) for arg in args]
3400
+ assert len(set(types)) == 1
3401
+ return ListWrapper(args, type=types[0])
3402
+
3403
+
3389
3404
  def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
3390
3405
  if isinstance(arg, Function):
3391
3406
  return arg.output_datatype
@@ -3409,5 +3424,8 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
3409
3424
  if arg.type in (WindowType.RANK, WindowType.ROW_NUMBER):
3410
3425
  return DataType.INTEGER
3411
3426
  return arg_to_datatype(arg.content)
3427
+ elif isinstance(arg, list):
3428
+ wrapper = list_to_wrapper(arg)
3429
+ return ListType(type=wrapper.type)
3412
3430
  else:
3413
3431
  raise ValueError(f"Cannot parse arg datatype for arg of raw type {type(arg)}")
@@ -38,6 +38,7 @@ class InlineDatasource(OptimizationRule):
38
38
  f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
39
39
  )
40
40
  to_inline: list[CTE] = []
41
+ force_group = False
41
42
  for parent_cte in cte.parent_ctes:
42
43
  if not parent_cte.is_root_datasource:
43
44
  self.log(f"parent {parent_cte.name} is not root")
@@ -55,15 +56,18 @@ class InlineDatasource(OptimizationRule):
55
56
  continue
56
57
  root_outputs = {x.address for x in root.output_concepts}
57
58
  cte_outputs = {x.address for x in parent_cte.output_columns}
59
+ grain_components = {x.address for x in root.grain.components}
58
60
  if not cte_outputs.issubset(root_outputs):
59
61
  self.log(f"Not all {parent_cte.name} outputs are found on datasource")
60
62
  continue
61
-
63
+ if not grain_components.issubset(cte_outputs):
64
+ self.log("Not all datasource components in cte outputs, forcing group")
65
+ force_group = True
62
66
  to_inline.append(parent_cte)
63
67
 
64
68
  for replaceable in to_inline:
65
69
  self.log(f"Inlining parent {replaceable.name}")
66
- cte.inline_parent_datasource(replaceable)
70
+ cte.inline_parent_datasource(replaceable, force_group=force_group)
67
71
 
68
72
  return optimized
69
73
 
@@ -107,14 +111,14 @@ class PredicatePushdown(OptimizationRule):
107
111
  f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
108
112
  )
109
113
  if isinstance(cte.condition, Conditional):
110
- candidates = decompose_condition(cte.condition)
114
+ candidates = cte.condition.decompose()
111
115
  else:
112
116
  candidates = [cte.condition]
113
117
  logger.info(f"Have {len(candidates)} candidates to try to push down")
114
118
  for candidate in candidates:
115
119
  conditions = {x.address for x in candidate.concept_arguments}
116
120
  for parent_cte in cte.parent_ctes:
117
- materialized = {k for k, v in parent_cte.source_map.items() if v != ""}
121
+ materialized = {k for k, v in parent_cte.source_map.items() if v != []}
118
122
  if conditions.issubset(materialized):
119
123
  if all(
120
124
  [
@@ -200,6 +204,8 @@ def is_direct_return_eligible(
200
204
  for x in derived_concepts:
201
205
  if x.derivation == PurposeLineage.WINDOW:
202
206
  return False
207
+ if x.derivation == PurposeLineage.UNNEST:
208
+ return False
203
209
  if x.derivation == PurposeLineage.AGGREGATE:
204
210
  if x.address in conditions:
205
211
  return False
@@ -236,12 +242,21 @@ def optimize_ctes(
236
242
  actions_taken = rule.optimize(cte, inverse_map)
237
243
  complete = not actions_taken
238
244
 
239
- if is_direct_return_eligible(root_cte, select):
245
+ if CONFIG.optimizations.direct_return and is_direct_return_eligible(
246
+ root_cte, select
247
+ ):
240
248
  root_cte.order_by = select.order_by
241
249
  root_cte.limit = select.limit
242
- root_cte.condition = (
243
- select.where_clause.conditional if select.where_clause else None
244
- )
250
+ if select.where_clause:
251
+
252
+ if root_cte.condition:
253
+ root_cte.condition = Conditional(
254
+ left=root_cte.condition,
255
+ operator=BooleanOperator.AND,
256
+ right=select.where_clause.conditional,
257
+ )
258
+ else:
259
+ root_cte.condition = select.where_clause.conditional
245
260
  root_cte.requires_nesting = False
246
261
  sort_select_output(root_cte, select)
247
262