pytrilogy 0.0.1.110__py3-none-any.whl → 0.0.1.112__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (33) hide show
  1. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/RECORD +33 -33
  3. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +1 -1
  6. trilogy/core/models.py +93 -67
  7. trilogy/core/optimization.py +33 -19
  8. trilogy/core/processing/concept_strategies_v3.py +44 -19
  9. trilogy/core/processing/node_generators/basic_node.py +2 -0
  10. trilogy/core/processing/node_generators/common.py +3 -1
  11. trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
  12. trilogy/core/processing/node_generators/filter_node.py +36 -6
  13. trilogy/core/processing/node_generators/node_merge_node.py +34 -23
  14. trilogy/core/processing/node_generators/rowset_node.py +30 -6
  15. trilogy/core/processing/node_generators/select_node.py +23 -9
  16. trilogy/core/processing/node_generators/unnest_node.py +24 -3
  17. trilogy/core/processing/node_generators/window_node.py +4 -2
  18. trilogy/core/processing/nodes/__init__.py +7 -6
  19. trilogy/core/processing/nodes/base_node.py +40 -6
  20. trilogy/core/processing/nodes/filter_node.py +15 -1
  21. trilogy/core/processing/nodes/group_node.py +20 -1
  22. trilogy/core/processing/nodes/merge_node.py +36 -7
  23. trilogy/core/processing/nodes/select_node_v2.py +34 -39
  24. trilogy/core/processing/nodes/unnest_node.py +12 -0
  25. trilogy/core/processing/nodes/window_node.py +11 -0
  26. trilogy/core/processing/utility.py +0 -14
  27. trilogy/core/query_processor.py +125 -29
  28. trilogy/dialect/base.py +45 -40
  29. trilogy/executor.py +31 -3
  30. trilogy/parsing/parse_engine.py +65 -18
  31. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/LICENSE.md +0 -0
  32. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/entry_points.txt +0 -0
  33. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.110
3
+ Version: 0.0.1.112
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -1,8 +1,8 @@
1
- trilogy/__init__.py,sha256=zz6RmV7fp8n1Ezl-E5jUW92Ns-EMfac6jy3fX5zQuzc,292
1
+ trilogy/__init__.py,sha256=DnY_8Giv8_IhdMGbFdiIMOk0nDhb3uLiFVA0tLyveas,292
2
2
  trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- trilogy/constants.py,sha256=C6mwa0BpVyENkcWi-dwqHorCP85ScTXDemfHFiSmLQ8,737
3
+ trilogy/constants.py,sha256=DJi3ESttmvqgy6fPRXiaQzqJVye6jYwf6XM89NHv0_M,735
4
4
  trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
5
- trilogy/executor.py,sha256=xF6wzbhP6a3wz4nrxsRCKeKF7qytUQEL75oI3BGJ2hQ,8744
5
+ trilogy/executor.py,sha256=_ZbjrKsUdWL52tWgpxqZnmccAuPXcIPEPN_dDSLNeAQ,9696
6
6
  trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
7
7
  trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
@@ -16,36 +16,36 @@ trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,56
16
16
  trilogy/core/functions.py,sha256=zkRReytiotOBAW-a3Ri5eoejZDYTt2-7Op80ZxZxUmw,9129
17
17
  trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
18
18
  trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
19
- trilogy/core/models.py,sha256=FNAMbqJrHh-KfC8QbBe3anWVdNxP-uMjs--MrbJM8QM,109943
20
- trilogy/core/optimization.py,sha256=5n5HMGGdTGkDZf479lZPpv2angLSxklZJ0D4DBrFDeA,8411
21
- trilogy/core/query_processor.py,sha256=6BqLYPwyFkRtueTIRFZi3IcVFTpbpGRNowayhSn3_AY,11805
19
+ trilogy/core/models.py,sha256=r8Wn-cvcSgkZ399TGqAYlJURqdiWi5zxzHMY2CUnbP4,110175
20
+ trilogy/core/optimization.py,sha256=942MnGRzscAHcG9LsfMslIRRQBslbIiPHnAvJ3w8YRg,9157
21
+ trilogy/core/query_processor.py,sha256=clIRJ6IcsqIVBPKFsxt8bqCLsLyajvAu02MUIcKQhTo,15713
22
22
  trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- trilogy/core/processing/concept_strategies_v3.py,sha256=27lZXFLgDEF3sh2MUR7HX_atVz7TC1fJB7z3oxa1TcY,22610
23
+ trilogy/core/processing/concept_strategies_v3.py,sha256=MYrpNMidqvPOg123RekOcqVTjcj03i_538gBo0MzoWE,23432
24
24
  trilogy/core/processing/graph_utils.py,sha256=ulCJ4hYAISbUxLD6VM2fah9RBPGIXSEHEPeRBSFl0Rs,1197
25
- trilogy/core/processing/utility.py,sha256=Gk35HgyIG2SSUyI5OHZcB0bw1PZUVC_aNc9Sre6xPQU,10535
25
+ trilogy/core/processing/utility.py,sha256=acxH5448-j8JXqxMRibyAxjz1Wqu7QudbR0PfMuucww,9902
26
26
  trilogy/core/processing/node_generators/__init__.py,sha256=LIs6uBEum8LDc-26zjyAwjxa-ay2ok9tKtPjDNvbVkE,757
27
- trilogy/core/processing/node_generators/basic_node.py,sha256=tVPmg0r0kDdABkmn6z4sxsk1hKy9yTT_Xvl1eVN2Zck,2162
28
- trilogy/core/processing/node_generators/common.py,sha256=A0zB4xr1etbEexaiSH6mVTecXY_wd7pSwWUGUt-u0eg,8882
29
- trilogy/core/processing/node_generators/concept_merge_node.py,sha256=TRbOIjLWfLB0Nl6YmMV1ao0qhPP6OQDd9M3UViWkCBU,6621
30
- trilogy/core/processing/node_generators/filter_node.py,sha256=CGALiTzKhPAvXPFAguIQfjf6I3pjlafY0uaaM9MTkIE,3414
27
+ trilogy/core/processing/node_generators/basic_node.py,sha256=HJnIhZLgkUdorKYcofe-QnKSM3Lf_3QO91cbSJhsqf4,2242
28
+ trilogy/core/processing/node_generators/common.py,sha256=liZDth7mvhkF_sUFXK7JitJsiaKD132w3ySLbF7l-nE,8956
29
+ trilogy/core/processing/node_generators/concept_merge_node.py,sha256=x4M8VVZZmBcqHDY1uq7M9KGKCBwjU6mcE_x2BOEk2Mg,7328
30
+ trilogy/core/processing/node_generators/filter_node.py,sha256=y_tqYe2So18vWHASMwVPLzDO-PnyQCO-MAlI4B-rY3Y,4526
31
31
  trilogy/core/processing/node_generators/group_node.py,sha256=xWI1xNIXEOj6jlRGD9hcv2_vVNvY6lpzJl6pQ8HuFBE,2988
32
32
  trilogy/core/processing/node_generators/group_to_node.py,sha256=BzPdYwzoo8gRMH7BDffTTXq4z-mjfCEzvfB5I-P0_nw,2941
33
33
  trilogy/core/processing/node_generators/multiselect_node.py,sha256=vP84dnLQy6dtypi6mUbt9sMAcmmrTgQ1Oz4GI6X1IEo,6421
34
- trilogy/core/processing/node_generators/node_merge_node.py,sha256=sQQ9jhw1oAJh649DBAJX6U7r_E_piFS95mxKvm7pxqQ,5818
35
- trilogy/core/processing/node_generators/rowset_node.py,sha256=BYTpXyiFJwoFp_n8kKE-HUdnwgaK_58n0rSFOz2jOVM,5141
36
- trilogy/core/processing/node_generators/select_node.py,sha256=xeCqIUEubrf3u_QQfbGdf1BG4fO0HYQ64hiFur8NUqY,20080
37
- trilogy/core/processing/node_generators/unnest_node.py,sha256=s1VXQZSf1LnX3ISeQ5JzmzmCKUw30-5OK_f0YTB9_48,1031
38
- trilogy/core/processing/node_generators/window_node.py,sha256=ekazi5eXxnShpcp-qukXNG4DHFdULoXrX-YWUWLNEpM,2527
39
- trilogy/core/processing/nodes/__init__.py,sha256=gzKxGSduIQ5QwpMWrmwSYiE8sg2mWejwVn0VvjYc6s0,3879
40
- trilogy/core/processing/nodes/base_node.py,sha256=Du7hRjVVOAiGb0okytzKIa_TQqhwTNYGU8PGNnrE1xs,9142
41
- trilogy/core/processing/nodes/filter_node.py,sha256=DqSRv8voEajPZqzeeiIsxuv4ubvsmeQcCW6x_v2CmOk,1359
42
- trilogy/core/processing/nodes/group_node.py,sha256=Y_NWB_AwFrE-YithjZ7lYYDN4e0el4su3ICq2EIr3HA,3837
43
- trilogy/core/processing/nodes/merge_node.py,sha256=uo1AfLjA02EiIpJku8T0TBjN2D-IyP6NVq5m7BfiZbQ,12413
44
- trilogy/core/processing/nodes/select_node_v2.py,sha256=tAADeVruch-flFiedbY1zi7ukMG2RpWecvxxZ5aL3ZU,6354
45
- trilogy/core/processing/nodes/unnest_node.py,sha256=t4kY3a_dR3iXistPemStfdw0uJfnxwTcoQg1HiDa3xo,1501
46
- trilogy/core/processing/nodes/window_node.py,sha256=QjAWgqBZqFSRCPwc7JBmgQJobWW50rsHI0pjJe0Zzg0,926
34
+ trilogy/core/processing/node_generators/node_merge_node.py,sha256=wNDHAbRrKSjsns-EROM_G12mRyOMjbcWpYav2uefXOE,6045
35
+ trilogy/core/processing/node_generators/rowset_node.py,sha256=eNG6rfLifUKraoRGxE8pesQMy5cKT6R5XNIaa3Wuiwk,6081
36
+ trilogy/core/processing/node_generators/select_node.py,sha256=Qb00Kizsv-877UMkGfusl5jXKXMZtZTtLks5pxU07SU,20698
37
+ trilogy/core/processing/node_generators/unnest_node.py,sha256=6CH66eGwpadNX7TzUhWZ8aqIisOtQeHINbLV6X3QBUk,1779
38
+ trilogy/core/processing/node_generators/window_node.py,sha256=9nXUXUgQrNczU1gaOqhOZPNzCUxw-lkxt0R7HORI6ss,2582
39
+ trilogy/core/processing/nodes/__init__.py,sha256=baODkJfvUoWEEbu843GEd7snubwLeOG5FQ8l-CwIaC8,3928
40
+ trilogy/core/processing/nodes/base_node.py,sha256=yhjmsAUmhHDqgbQjz_9YdfP-M5pj4xbrPRDF6Y4XVuw,10498
41
+ trilogy/core/processing/nodes/filter_node.py,sha256=rDw4vfE6tqWxuKT0arihVmIOoOWDDCyzRA-2yONX_Ek,1860
42
+ trilogy/core/processing/nodes/group_node.py,sha256=vzeU9J4xMhRrPj4-KPJTgNbH-KFu2ZS8b57SOynsdw0,4448
43
+ trilogy/core/processing/nodes/merge_node.py,sha256=FvSiTWKOzaUsXBkf6wJD8QQqQxp_aphS_I5VzNRw8Yo,13600
44
+ trilogy/core/processing/nodes/select_node_v2.py,sha256=ERCflBFzKpD5SzweMevnJLyQnxmF_-IQ6VRu5yVeiBg,6552
45
+ trilogy/core/processing/nodes/unnest_node.py,sha256=JFtm90IVM-46aCYkTNIaJah6v9ApAfonjVhcVM1HmDE,1903
46
+ trilogy/core/processing/nodes/window_node.py,sha256=X7qxLUKd3tekjUUsmH_4vz5b-U89gMnGd04VBxuu2Ns,1280
47
47
  trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- trilogy/dialect/base.py,sha256=xPB5mh6471VJLHxNdXrYvi7q7vJC_tioVR1LrLcoZc0,29394
48
+ trilogy/dialect/base.py,sha256=ii9P_OO8BhKsQVAr9A13rhx_dzRZd4wxnkL-Ul5OS74,30398
49
49
  trilogy/dialect/bigquery.py,sha256=9vxQn2BMv_oTGQSWQpoN5ho_OgqMWaHH9e-5vQVf44c,2906
50
50
  trilogy/dialect/common.py,sha256=zWrYmvevlXznocw9uGHmY5Ws1rp_kICm9zA_ulTe4eg,2165
51
51
  trilogy/dialect/config.py,sha256=tLVEMctaTDhUgARKXUNfHUcIolGaALkQ0RavUvXAY4w,2994
@@ -65,13 +65,13 @@ trilogy/parsing/common.py,sha256=lz0IyVA8v-u-DGFgzkmdb4_00I--Kegmo9HNF7CrajI,579
65
65
  trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
66
66
  trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
67
67
  trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
68
- trilogy/parsing/parse_engine.py,sha256=iOqKUCyLeHyFVwwAt-XTSJGHia4zzLUN6bYDuIfJ1Pg,63938
68
+ trilogy/parsing/parse_engine.py,sha256=LdxdYQpv-9pBNXXs1QdnD08fflnQ82tG6H6mVgpVeIY,65532
69
69
  trilogy/parsing/render.py,sha256=fxjpq2FZLgllw_d4cru-t_IXNPAz2DmYkT7v9ED0XRI,11540
70
70
  trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
71
  trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
72
- pytrilogy-0.0.1.110.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
73
- pytrilogy-0.0.1.110.dist-info/METADATA,sha256=FbH2jc_Eg9QjfxlbLhTAqBzFoCxe4aM-6EbE4HDC4TM,7882
74
- pytrilogy-0.0.1.110.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
75
- pytrilogy-0.0.1.110.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
76
- pytrilogy-0.0.1.110.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
77
- pytrilogy-0.0.1.110.dist-info/RECORD,,
72
+ pytrilogy-0.0.1.112.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
73
+ pytrilogy-0.0.1.112.dist-info/METADATA,sha256=jhGh-RxeT78DDSvj9YY4K_ww6-C00dhYHyD5NMs1EFY,7882
74
+ pytrilogy-0.0.1.112.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
75
+ pytrilogy-0.0.1.112.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
76
+ pytrilogy-0.0.1.112.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
77
+ pytrilogy-0.0.1.112.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.3.0)
2
+ Generator: setuptools (71.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
trilogy/__init__.py CHANGED
@@ -4,6 +4,6 @@ from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
5
  from trilogy.constants import CONFIG
6
6
 
7
- __version__ = "0.0.1.110"
7
+ __version__ = "0.0.1.112"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
trilogy/constants.py CHANGED
@@ -30,7 +30,7 @@ class Optimizations:
30
30
  class Config:
31
31
  strict_mode: bool = True
32
32
  human_identifiers: bool = True
33
- inline_datasources: bool = True
33
+ validate_missing: bool = True
34
34
  optimizations: Optimizations = field(default_factory=Optimizations)
35
35
 
36
36
 
trilogy/core/models.py CHANGED
@@ -33,7 +33,13 @@ from pydantic import (
33
33
  )
34
34
  from lark.tree import Meta
35
35
  from pathlib import Path
36
- from trilogy.constants import logger, DEFAULT_NAMESPACE, ENV_CACHE_NAME, MagicConstants
36
+ from trilogy.constants import (
37
+ logger,
38
+ DEFAULT_NAMESPACE,
39
+ ENV_CACHE_NAME,
40
+ MagicConstants,
41
+ CONFIG,
42
+ )
37
43
  from trilogy.core.constants import (
38
44
  ALL_ROWS_CONCEPT,
39
45
  INTERNAL_NAMESPACE,
@@ -61,7 +67,6 @@ from trilogy.core.enums import (
61
67
  from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
62
68
  from trilogy.utility import unique
63
69
  from collections import UserList
64
- from trilogy.utility import string_to_hash
65
70
  from functools import cached_property
66
71
  from abc import ABC
67
72
 
@@ -129,7 +134,7 @@ class ConceptArgs(ABC):
129
134
  raise NotImplementedError
130
135
 
131
136
  @property
132
- def existence_arguments(self) -> List["Concept"]:
137
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
133
138
  return []
134
139
 
135
140
  @property
@@ -281,9 +286,6 @@ class Concept(Namespaced, SelectGrain, BaseModel):
281
286
  MultiSelectStatement | MergeStatement,
282
287
  ]
283
288
  ] = None
284
- # lineage: Annotated[Optional[
285
- # Union[Function, WindowItem, FilterItem, AggregateWrapper]
286
- # ], WrapValidator(lineage_validator)] = None
287
289
  namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
288
290
  keys: Optional[Tuple["Concept", ...]] = None
289
291
  grain: "Grain" = Field(default=None, validate_default=True)
@@ -621,6 +623,12 @@ class Grain(BaseModel):
621
623
  if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
622
624
  if all([c in v2 for c in sub.keys]):
623
625
  continue
626
+ elif sub.derivation == PurposeLineage.MERGE and isinstance(
627
+ sub.lineage, MergeStatement
628
+ ):
629
+ parents = sub.lineage.concepts
630
+ if any([p in v2 for p in parents]):
631
+ continue
624
632
  final.append(sub)
625
633
  v2 = sorted(final, key=lambda x: x.name)
626
634
  return v2
@@ -966,23 +974,6 @@ class ConceptTransform(Namespaced, BaseModel):
966
974
  modifiers=self.modifiers,
967
975
  )
968
976
 
969
- def with_filter(self, where: "WhereClause") -> "ConceptTransform":
970
- id_hash = string_to_hash(str(where))
971
- new_parent_concept = Concept(
972
- name=f"_anon_concept_transform_filter_input_{id_hash}",
973
- datatype=self.output.datatype,
974
- purpose=self.output.purpose,
975
- lineage=self.output.lineage,
976
- namespace=DEFAULT_NAMESPACE,
977
- grain=self.output.grain,
978
- keys=self.output.keys,
979
- )
980
- new_parent = FilterItem(content=new_parent_concept, where=where)
981
- self.output.lineage = new_parent
982
- return ConceptTransform(
983
- function=new_parent, output=self.output, modifiers=self.modifiers
984
- )
985
-
986
977
 
987
978
  class Window(BaseModel):
988
979
  count: int
@@ -1611,13 +1602,15 @@ class Datasource(Namespaced, BaseModel):
1611
1602
  def __add__(self, other):
1612
1603
  if not other == self:
1613
1604
  raise ValueError(
1614
- "Attempted to add two datasources that are not identical, this should"
1615
- " never happen"
1605
+ "Attempted to add two datasources that are not identical, this is not a valid operation"
1616
1606
  )
1617
1607
  return self
1618
1608
 
1609
+ def __repr__(self):
1610
+ return f"Datasource<{self.namespace}.{self.identifier}@<{self.grain}>"
1611
+
1619
1612
  def __str__(self):
1620
- return f"{self.namespace}.{self.identifier}@<{self.grain}>"
1613
+ return self.__repr__()
1621
1614
 
1622
1615
  def __hash__(self):
1623
1616
  return (self.namespace + self.identifier).__hash__()
@@ -1786,6 +1779,7 @@ class QueryDatasource(BaseModel):
1786
1779
  input_concepts: List[Concept]
1787
1780
  output_concepts: List[Concept]
1788
1781
  source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
1782
+
1789
1783
  datasources: List[Union[Datasource, "QueryDatasource"]]
1790
1784
  grain: Grain
1791
1785
  joins: List[BaseJoin | UnnestJoin]
@@ -1799,6 +1793,12 @@ class QueryDatasource(BaseModel):
1799
1793
  join_derived_concepts: List[Concept] = Field(default_factory=list)
1800
1794
  hidden_concepts: List[Concept] = Field(default_factory=list)
1801
1795
  force_group: bool | None = None
1796
+ existence_source_map: Dict[str, Set[Union[Datasource, "QueryDatasource"]]] = Field(
1797
+ default_factory=dict
1798
+ )
1799
+
1800
+ def __repr__(self):
1801
+ return f"{self.identifier}@<{self.grain}>"
1802
1802
 
1803
1803
  @property
1804
1804
  def non_partial_concept_addresses(self) -> List[str]:
@@ -1841,14 +1841,14 @@ class QueryDatasource(BaseModel):
1841
1841
  for k, _ in v.items():
1842
1842
  seen.add(k)
1843
1843
  for x in expected:
1844
- if x not in seen:
1844
+ if x not in seen and CONFIG.validate_missing:
1845
1845
  raise SyntaxError(
1846
1846
  f"source map missing {x} on (expected {expected}, have {seen})"
1847
1847
  )
1848
1848
  return v
1849
1849
 
1850
1850
  def __str__(self):
1851
- return f"{self.identifier}@<{self.grain}>"
1851
+ return self.__repr__()
1852
1852
 
1853
1853
  def __hash__(self):
1854
1854
  return (self.identifier).__hash__()
@@ -2010,10 +2010,11 @@ class CTE(BaseModel):
2010
2010
  name: str
2011
2011
  source: "QueryDatasource"
2012
2012
  output_columns: List[Concept]
2013
- source_map: Dict[str, str | list[str]]
2013
+ source_map: Dict[str, list[str]]
2014
2014
  grain: Grain
2015
2015
  base: bool = False
2016
2016
  group_to_grain: bool = False
2017
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
2017
2018
  parent_ctes: List["CTE"] = Field(default_factory=list)
2018
2019
  joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
2019
2020
  condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
@@ -2024,6 +2025,7 @@ class CTE(BaseModel):
2024
2025
  limit: Optional[int] = None
2025
2026
  requires_nesting: bool = True
2026
2027
  base_name_override: Optional[str] = None
2028
+ base_alias_override: Optional[str] = None
2027
2029
 
2028
2030
  @computed_field # type: ignore
2029
2031
  @property
@@ -2034,11 +2036,19 @@ class CTE(BaseModel):
2034
2036
  def validate_output_columns(cls, v):
2035
2037
  return unique(v, "address")
2036
2038
 
2037
- def inline_parent_datasource(self, parent: CTE) -> bool:
2039
+ def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
2038
2040
  qds_being_inlined = parent.source
2039
2041
  ds_being_inlined = qds_being_inlined.datasources[0]
2040
2042
  if not isinstance(ds_being_inlined, Datasource):
2041
2043
  return False
2044
+ if any(
2045
+ [
2046
+ x.identifier == ds_being_inlined.identifier
2047
+ for x in self.source.datasources
2048
+ ]
2049
+ ):
2050
+ return False
2051
+
2042
2052
  self.source.datasources = [
2043
2053
  ds_being_inlined,
2044
2054
  *[
@@ -2050,6 +2060,7 @@ class CTE(BaseModel):
2050
2060
  # need to identify this before updating joins
2051
2061
  if self.base_name == parent.name:
2052
2062
  self.base_name_override = ds_being_inlined.safe_location
2063
+ self.base_alias_override = ds_being_inlined.identifier
2053
2064
 
2054
2065
  for join in self.joins:
2055
2066
  if isinstance(join, InstantiatedUnnestJoin):
@@ -2066,6 +2077,8 @@ class CTE(BaseModel):
2066
2077
  elif v == parent.name:
2067
2078
  self.source_map[k] = ds_being_inlined.name
2068
2079
  self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
2080
+ if force_group:
2081
+ self.group_to_grain = True
2069
2082
  return True
2070
2083
 
2071
2084
  def __add__(self, other: "CTE"):
@@ -2126,9 +2139,6 @@ class CTE(BaseModel):
2126
2139
  if self.base_name_override:
2127
2140
  return self.base_name_override
2128
2141
  # if this cte selects from a single datasource, select right from it
2129
- valid_joins: List[Join] = [
2130
- join for join in self.joins if isinstance(join, Join)
2131
- ]
2132
2142
  if self.is_root_datasource:
2133
2143
  return self.source.datasources[0].safe_location
2134
2144
 
@@ -2136,33 +2146,16 @@ class CTE(BaseModel):
2136
2146
  # as the root
2137
2147
  elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
2138
2148
  return self.parent_ctes[0].name
2139
- elif valid_joins and len(valid_joins) > 0:
2140
- candidates = [x.left_cte.name for x in valid_joins]
2141
- disallowed = [x.right_cte.name for x in valid_joins]
2142
- try:
2143
- return [y for y in candidates if y not in disallowed][0]
2144
- except IndexError:
2145
- raise SyntaxError(
2146
- f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in self.parent_ctes]}"
2147
- )
2148
2149
  elif self.relevant_base_ctes:
2149
2150
  return self.relevant_base_ctes[0].name
2150
- elif self.parent_ctes:
2151
- raise SyntaxError(
2152
- f"{self.name} has no relevant base CTEs, {self.source_map},"
2153
- f" {[x.name for x in self.parent_ctes]}, outputs"
2154
- f" {[x.address for x in self.output_columns]}"
2155
- )
2156
2151
  return self.source.name
2157
2152
 
2158
2153
  @property
2159
2154
  def base_alias(self) -> str:
2160
-
2155
+ if self.base_alias_override:
2156
+ return self.base_alias_override
2161
2157
  if self.is_root_datasource:
2162
2158
  return self.source.datasources[0].identifier
2163
- relevant_joins = [j for j in self.joins if isinstance(j, Join)]
2164
- if relevant_joins:
2165
- return relevant_joins[0].left_cte.name
2166
2159
  elif self.relevant_base_ctes:
2167
2160
  return self.relevant_base_ctes[0].name
2168
2161
  elif self.parent_ctes:
@@ -2492,9 +2485,17 @@ class Environment(BaseModel):
2492
2485
  for datasource in self.datasources.values():
2493
2486
  for concept in datasource.output_concepts:
2494
2487
  concrete_addresses.add(concept.address)
2488
+ current_mat = [x.address for x in self.materialized_concepts]
2495
2489
  self.materialized_concepts = [
2496
2490
  c for c in self.concepts.values() if c.address in concrete_addresses
2497
2491
  ]
2492
+ new = [
2493
+ x.address
2494
+ for x in self.materialized_concepts
2495
+ if x.address not in current_mat
2496
+ ]
2497
+ if new:
2498
+ logger.info(f"Environment added new materialized concepts {new}")
2498
2499
  for concept in self.concepts.values():
2499
2500
  if concept.derivation == PurposeLineage.MERGE:
2500
2501
  ms = concept.lineage
@@ -2653,6 +2654,17 @@ class Environment(BaseModel):
2653
2654
  self.gen_concept_list_caches()
2654
2655
  return datasource
2655
2656
 
2657
+ def delete_datasource(
2658
+ self,
2659
+ address: str,
2660
+ meta: Meta | None = None,
2661
+ ) -> bool:
2662
+ if address in self.datasources:
2663
+ del self.datasources[address]
2664
+ self.gen_concept_list_caches()
2665
+ return True
2666
+ return False
2667
+
2656
2668
 
2657
2669
  class LazyEnvironment(Environment):
2658
2670
  """Variant of environment to defer parsing of a path"""
@@ -2759,11 +2771,8 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2759
2771
  if isinstance(self.left, SelectGrain)
2760
2772
  else self.left
2761
2773
  ),
2762
- right=(
2763
- self.right.with_select_grain(grain)
2764
- if isinstance(self.right, SelectGrain)
2765
- else self.right
2766
- ),
2774
+ # the right side does NOT need to inherit select grain
2775
+ right=self.right,
2767
2776
  operator=self.operator,
2768
2777
  )
2769
2778
 
@@ -2809,8 +2818,8 @@ class SubselectComparison(Comparison):
2809
2818
  return get_concept_arguments(self.left)
2810
2819
 
2811
2820
  @property
2812
- def existence_arguments(self) -> List[Concept]:
2813
- return get_concept_arguments(self.right)
2821
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
2822
+ return [tuple(get_concept_arguments(self.right))]
2814
2823
 
2815
2824
  def with_select_grain(self, grain: Grain):
2816
2825
  # there's no need to pass the select grain through to a subselect comparison
@@ -3002,18 +3011,26 @@ class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3002
3011
  return output
3003
3012
 
3004
3013
  @property
3005
- def existence_arguments(self) -> List[Concept]:
3014
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
3006
3015
  output = []
3007
3016
  if isinstance(self.left, ConceptArgs):
3008
3017
  output += self.left.existence_arguments
3009
- else:
3010
- output += get_concept_arguments(self.left)
3011
3018
  if isinstance(self.right, ConceptArgs):
3012
3019
  output += self.right.existence_arguments
3013
- else:
3014
- output += get_concept_arguments(self.right)
3015
3020
  return output
3016
3021
 
3022
+ def decompose(self):
3023
+ chunks = []
3024
+ if self.operator == BooleanOperator.AND:
3025
+ for val in [self.left, self.right]:
3026
+ if isinstance(val, Conditional):
3027
+ chunks.extend(val.decompose())
3028
+ else:
3029
+ chunks.append(val)
3030
+ else:
3031
+ chunks.append(self)
3032
+ return chunks
3033
+
3017
3034
 
3018
3035
  class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
3019
3036
  function: Function
@@ -3073,7 +3090,7 @@ class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3073
3090
  return self.conditional.row_arguments
3074
3091
 
3075
3092
  @property
3076
- def existence_arguments(self) -> List[Concept]:
3093
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
3077
3094
  return self.conditional.existence_arguments
3078
3095
 
3079
3096
  def with_namespace(self, namespace: str) -> WhereClause:
@@ -3314,10 +3331,10 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3314
3331
  return self.concept_arguments
3315
3332
 
3316
3333
  @property
3317
- def existence_arguments(self) -> List[Concept]:
3334
+ def existence_arguments(self) -> list[tuple["Concept", ...]]:
3318
3335
  if isinstance(self.content, ConceptArgs):
3319
3336
  return self.content.existence_arguments
3320
- return self.concept_arguments
3337
+ return []
3321
3338
 
3322
3339
  @property
3323
3340
  def input(self):
@@ -3386,6 +3403,12 @@ Function.model_rebuild()
3386
3403
  Grain.model_rebuild()
3387
3404
 
3388
3405
 
3406
+ def list_to_wrapper(args):
3407
+ types = [arg_to_datatype(arg) for arg in args]
3408
+ assert len(set(types)) == 1
3409
+ return ListWrapper(args, type=types[0])
3410
+
3411
+
3389
3412
  def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
3390
3413
  if isinstance(arg, Function):
3391
3414
  return arg.output_datatype
@@ -3409,5 +3432,8 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
3409
3432
  if arg.type in (WindowType.RANK, WindowType.ROW_NUMBER):
3410
3433
  return DataType.INTEGER
3411
3434
  return arg_to_datatype(arg.content)
3435
+ elif isinstance(arg, list):
3436
+ wrapper = list_to_wrapper(arg)
3437
+ return ListType(type=wrapper.type)
3412
3438
  else:
3413
3439
  raise ValueError(f"Cannot parse arg datatype for arg of raw type {type(arg)}")
@@ -12,9 +12,6 @@ from trilogy.constants import logger, CONFIG
12
12
  from abc import ABC
13
13
 
14
14
 
15
- REGISTERED_RULES: list["OptimizationRule"] = []
16
-
17
-
18
15
  class OptimizationRule(ABC):
19
16
 
20
17
  def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
@@ -38,6 +35,7 @@ class InlineDatasource(OptimizationRule):
38
35
  f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
39
36
  )
40
37
  to_inline: list[CTE] = []
38
+ force_group = False
41
39
  for parent_cte in cte.parent_ctes:
42
40
  if not parent_cte.is_root_datasource:
43
41
  self.log(f"parent {parent_cte.name} is not root")
@@ -55,20 +53,25 @@ class InlineDatasource(OptimizationRule):
55
53
  continue
56
54
  root_outputs = {x.address for x in root.output_concepts}
57
55
  cte_outputs = {x.address for x in parent_cte.output_columns}
56
+ grain_components = {x.address for x in root.grain.components}
58
57
  if not cte_outputs.issubset(root_outputs):
59
58
  self.log(f"Not all {parent_cte.name} outputs are found on datasource")
60
59
  continue
61
-
60
+ if not grain_components.issubset(cte_outputs):
61
+ self.log("Not all datasource components in cte outputs, forcing group")
62
+ force_group = True
62
63
  to_inline.append(parent_cte)
63
64
 
64
65
  for replaceable in to_inline:
65
- self.log(f"Inlining parent {replaceable.name}")
66
- cte.inline_parent_datasource(replaceable)
67
66
 
67
+ result = cte.inline_parent_datasource(replaceable, force_group=force_group)
68
+ if result:
69
+ self.log(f"Inlined parent {replaceable.name}")
70
+ else:
71
+ self.log(f"Failed to inline {replaceable.name}")
68
72
  return optimized
69
73
 
70
74
 
71
- # This will be used in the future for more complex condition decomposition
72
75
  def decompose_condition(conditional: Conditional):
73
76
  chunks = []
74
77
  if conditional.operator == BooleanOperator.AND:
@@ -107,14 +110,14 @@ class PredicatePushdown(OptimizationRule):
107
110
  f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
108
111
  )
109
112
  if isinstance(cte.condition, Conditional):
110
- candidates = decompose_condition(cte.condition)
113
+ candidates = cte.condition.decompose()
111
114
  else:
112
115
  candidates = [cte.condition]
113
116
  logger.info(f"Have {len(candidates)} candidates to try to push down")
114
117
  for candidate in candidates:
115
118
  conditions = {x.address for x in candidate.concept_arguments}
116
119
  for parent_cte in cte.parent_ctes:
117
- materialized = {k for k, v in parent_cte.source_map.items() if v != ""}
120
+ materialized = {k for k, v in parent_cte.source_map.items() if v != []}
118
121
  if conditions.issubset(materialized):
119
122
  if all(
120
123
  [
@@ -150,12 +153,6 @@ class PredicatePushdown(OptimizationRule):
150
153
  return optimized
151
154
 
152
155
 
153
- if CONFIG.optimizations.datasource_inlining:
154
- REGISTERED_RULES.append(InlineDatasource())
155
- if CONFIG.optimizations.predicate_pushdown:
156
- REGISTERED_RULES.append(PredicatePushdown())
157
-
158
-
159
156
  def filter_irrelevant_ctes(
160
157
  input: list[CTE],
161
158
  root_cte: CTE,
@@ -200,6 +197,8 @@ def is_direct_return_eligible(
200
197
  for x in derived_concepts:
201
198
  if x.derivation == PurposeLineage.WINDOW:
202
199
  return False
200
+ if x.derivation == PurposeLineage.UNNEST:
201
+ return False
203
202
  if x.derivation == PurposeLineage.AGGREGATE:
204
203
  if x.address in conditions:
205
204
  return False
@@ -227,6 +226,12 @@ def optimize_ctes(
227
226
  input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
228
227
  ):
229
228
  complete = False
229
+ REGISTERED_RULES: list["OptimizationRule"] = []
230
+
231
+ if CONFIG.optimizations.datasource_inlining:
232
+ REGISTERED_RULES.append(InlineDatasource())
233
+ if CONFIG.optimizations.predicate_pushdown:
234
+ REGISTERED_RULES.append(PredicatePushdown())
230
235
 
231
236
  while not complete:
232
237
  actions_taken = False
@@ -236,12 +241,21 @@ def optimize_ctes(
236
241
  actions_taken = rule.optimize(cte, inverse_map)
237
242
  complete = not actions_taken
238
243
 
239
- if is_direct_return_eligible(root_cte, select):
244
+ if CONFIG.optimizations.direct_return and is_direct_return_eligible(
245
+ root_cte, select
246
+ ):
240
247
  root_cte.order_by = select.order_by
241
248
  root_cte.limit = select.limit
242
- root_cte.condition = (
243
- select.where_clause.conditional if select.where_clause else None
244
- )
249
+ if select.where_clause:
250
+
251
+ if root_cte.condition:
252
+ root_cte.condition = Conditional(
253
+ left=root_cte.condition,
254
+ operator=BooleanOperator.AND,
255
+ right=select.where_clause.conditional,
256
+ )
257
+ else:
258
+ root_cte.condition = select.where_clause.conditional
245
259
  root_cte.requires_nesting = False
246
260
  sort_select_output(root_cte, select)
247
261