pytrilogy 0.0.1.105__py3-none-any.whl → 0.0.1.107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.105
3
+ Version: 0.0.1.107
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -275,3 +275,81 @@ but all are worth checking out. Please open PRs/comment for anything missed!
275
275
  - [malloy](https://github.com/malloydata/malloy)
276
276
  - [preql](https://github.com/erezsh/Preql)
277
277
  - [PREQL](https://github.com/PRQL/prql)
278
+
279
+ ## Minimal Syntax Reference
280
+
281
+ #### IMPORT
282
+
283
+ `import <path> as <alias>;`
284
+
285
+ #### CONCEPT
286
+
287
+ Types: `string | int | float | bool | date | datetime | time | timestamp | interval`;
288
+
289
+ Key:
290
+ `key <name> <type>;`
291
+
292
+ Property:
293
+ `property <key>.<name> <type>;`
294
+
295
+ Transformation:
296
+ `auto <name> <- <expression>;`
297
+
298
+ #### DATASOURCE
299
+ ```sql
300
+ datasource <name>(
301
+ <column>:<concept>,
302
+ <column>:<concept>,
303
+ )
304
+ grain(<concept>, <concept>)
305
+ address <table>;
306
+ ```
307
+
308
+ #### SELECT
309
+
310
+ Primary acces
311
+
312
+ ```sql
313
+ select
314
+ <concept>,
315
+ <concept>+1 -> <alias>
316
+ WHERE
317
+ <concept> = <value>
318
+ ORDER BY
319
+ <concept> asc|desc
320
+ ;
321
+ ```
322
+
323
+ #### CTE/ROWSET
324
+
325
+ Reusable virtual set of rows. Useful for windows, filtering.
326
+
327
+ ```sql
328
+ with <alias> as
329
+ select
330
+ <concept>,
331
+ <concept>+1 -> <alias>
332
+ WHERE
333
+ <concept> = <value>
334
+
335
+ select <alias>.<concept>;
336
+
337
+ ```
338
+
339
+
340
+ #### PERSIST
341
+
342
+ Store output of a query in a warehouse table
343
+
344
+ ```sql
345
+ persist <alias> as <table_name> from
346
+ <select>;
347
+ ```
348
+
349
+ #### SHOW
350
+
351
+ Return generated SQL without executing.
352
+
353
+ ```sql
354
+ show <select>;
355
+ ```
@@ -1,8 +1,8 @@
1
- trilogy/__init__.py,sha256=7VUwx55iCGgc689L41AJd9zl_nZZvy9zumsQb-P67xc,245
1
+ trilogy/__init__.py,sha256=ouq-RNu0DVYw8n1C2ekRmcAJ_SL_PFzbRBzo2O814TM,292
2
2
  trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- trilogy/constants.py,sha256=MphpyZXP4URq7R1vP8EUtT_ZfGPSKuqKKFGMMjfMRtA,482
3
+ trilogy/constants.py,sha256=LxiK2TiVQPEa6tXkxWk9DJHOR3zsGNSqgQuqtOf66cw,518
4
4
  trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
5
- trilogy/executor.py,sha256=pQy8Xo7rxtZ9VZoIkOUpOHRzy6eQ5ayZtnBOtf3Zjyg,8359
5
+ trilogy/executor.py,sha256=xF6wzbhP6a3wz4nrxsRCKeKF7qytUQEL75oI3BGJ2hQ,8744
6
6
  trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
7
7
  trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
@@ -16,8 +16,9 @@ trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,56
16
16
  trilogy/core/functions.py,sha256=zkRReytiotOBAW-a3Ri5eoejZDYTt2-7Op80ZxZxUmw,9129
17
17
  trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
18
18
  trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
19
- trilogy/core/models.py,sha256=S2LTFh7F92PUMelY1taXn2BuimSZpYZa7QulXVoUkNM,105556
20
- trilogy/core/query_processor.py,sha256=x3fjs1Vhg_G1FHhjLomZ3kH16PU9RPcediRz-Lu_QZg,11625
19
+ trilogy/core/models.py,sha256=WH7GHgn1a3xegVo12_NTP7V_ptN-_ObY7s4ZgkPI0D4,108548
20
+ trilogy/core/optimization.py,sha256=SpWRQL1biAUvMCijk2I-FCQY2KzXd4eiu3ZlxzVE-uQ,4505
21
+ trilogy/core/query_processor.py,sha256=w_CS2TEmSk8Bhk6ukxLavQgQyA9UwcgoPChouLREujQ,11747
21
22
  trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
23
  trilogy/core/processing/concept_strategies_v3.py,sha256=6Z1ODKbvxaWFMRHa3vX87oSBW9XKtC7S7pUxburCVkA,22369
23
24
  trilogy/core/processing/graph_utils.py,sha256=ulCJ4hYAISbUxLD6VM2fah9RBPGIXSEHEPeRBSFl0Rs,1197
@@ -36,24 +37,24 @@ trilogy/core/processing/node_generators/select_node.py,sha256=xeCqIUEubrf3u_QQfb
36
37
  trilogy/core/processing/node_generators/unnest_node.py,sha256=s1VXQZSf1LnX3ISeQ5JzmzmCKUw30-5OK_f0YTB9_48,1031
37
38
  trilogy/core/processing/node_generators/window_node.py,sha256=ekazi5eXxnShpcp-qukXNG4DHFdULoXrX-YWUWLNEpM,2527
38
39
  trilogy/core/processing/nodes/__init__.py,sha256=ZkDGQksvsM5uNia5rhXFCUJcpTRhoYYFdyfJw-Eiu8s,3674
39
- trilogy/core/processing/nodes/base_node.py,sha256=ovFM4r8QG2sy5d2X1MTCk5BiYUwvr0chxSaIopkO6Fc,8890
40
+ trilogy/core/processing/nodes/base_node.py,sha256=VaK4rWV8PQMyTPTgStmCbPbV0mmTHyTwBhz0C0N2KG0,8961
40
41
  trilogy/core/processing/nodes/filter_node.py,sha256=DqSRv8voEajPZqzeeiIsxuv4ubvsmeQcCW6x_v2CmOk,1359
41
- trilogy/core/processing/nodes/group_node.py,sha256=-xaZuAkCXHWP_K5BmoJ5jTM_53wYWtYPEkrr4LaWib0,3735
42
- trilogy/core/processing/nodes/merge_node.py,sha256=sXaxKEFdPrTYJyDwcs8lNBoAvz0Nebfd8xcdUtavOoU,12332
42
+ trilogy/core/processing/nodes/group_node.py,sha256=Y_NWB_AwFrE-YithjZ7lYYDN4e0el4su3ICq2EIr3HA,3837
43
+ trilogy/core/processing/nodes/merge_node.py,sha256=pAtRTqkpsfE_pmqPSdeV0rHwFPzclJ3WItRLX8AuuJw,12609
43
44
  trilogy/core/processing/nodes/select_node_v2.py,sha256=tAADeVruch-flFiedbY1zi7ukMG2RpWecvxxZ5aL3ZU,6354
44
45
  trilogy/core/processing/nodes/unnest_node.py,sha256=t4kY3a_dR3iXistPemStfdw0uJfnxwTcoQg1HiDa3xo,1501
45
46
  trilogy/core/processing/nodes/window_node.py,sha256=QjAWgqBZqFSRCPwc7JBmgQJobWW50rsHI0pjJe0Zzg0,926
46
47
  trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- trilogy/dialect/base.py,sha256=xWSbU50tiG55aDPXGxpUbgg4clGuvtnBBEHFWn7cku8,28669
48
- trilogy/dialect/bigquery.py,sha256=oiyZ9GejmRJocMFcprMM1ZxTSYI5Po5PXHJKvQSltWs,2845
49
- trilogy/dialect/common.py,sha256=TI0UZk8N-M-cQbGPw62uUTaxDTnnsH-zJqhPOkD0KtQ,1913
50
- trilogy/dialect/config.py,sha256=XjxvUmnF_QDc2IE70TnySMsiBuKs3iebC_O0ZJ34atE,1491
51
- trilogy/dialect/duckdb.py,sha256=vEf6srsjByGS4Xt6ou5jrOj98HOoHaOtZhlsoEo1EIU,3016
52
- trilogy/dialect/enums.py,sha256=5fDOyQxgBQoUOgzc_ARdtTXNiEhhYDfrk7gc5scMJiw,3402
53
- trilogy/dialect/postgres.py,sha256=aY9E9gCzprVn1skCI-qdBG6E9HsuRX77XpFk1XbgSTY,3144
54
- trilogy/dialect/presto.py,sha256=9rUqdx1fCekTSnzhHVSW71LFBW2uWjKSobohkcBA7vw,2728
55
- trilogy/dialect/snowflake.py,sha256=r_biJMiJdzBwOXp092TKYpaGd8OxbQiHetDGYCGoySI,2879
56
- trilogy/dialect/sql_server.py,sha256=ood_PftLs3CehH7UGH72WRZ9Ezw6kjdiTtXv4KFInvg,2837
48
+ trilogy/dialect/base.py,sha256=nYrm7Z-GnVVhr5vWHjMghWkGoq2r7ogzoGVasAGTxGo,29223
49
+ trilogy/dialect/bigquery.py,sha256=9vxQn2BMv_oTGQSWQpoN5ho_OgqMWaHH9e-5vQVf44c,2906
50
+ trilogy/dialect/common.py,sha256=zWrYmvevlXznocw9uGHmY5Ws1rp_kICm9zA_ulTe4eg,2165
51
+ trilogy/dialect/config.py,sha256=JdGIiHf2EVoFNTYzqQUy1bMmzqZiFTjcnYglzAMa4dM,3351
52
+ trilogy/dialect/duckdb.py,sha256=Ddyt68sr8IL2HnZMenyytoD65FXwY_O2pz1McyS0bis,3075
53
+ trilogy/dialect/enums.py,sha256=4NdpsydBpDn6jnh0JzFz5VvQEtnShErWtWHVyT6TNpw,3948
54
+ trilogy/dialect/postgres.py,sha256=r47xbCA7nfEYENofiVfLZ-SnReNfDmUmW4OSHVkkP4E,3206
55
+ trilogy/dialect/presto.py,sha256=rOr-ftb0S5AeEncQgp6jj1iCWlCEkzZuQC3TGMUcemg,2790
56
+ trilogy/dialect/snowflake.py,sha256=N3HknYgN-fjD7BLX1Ucj-ss_ku2Ox8DgLsF3BIHutHo,2941
57
+ trilogy/dialect/sql_server.py,sha256=UrLeA9bxiFJ4qpGsqVJqBybQCyJhetMebe8IzQW1q9s,2900
57
58
  trilogy/docs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
59
  trilogy/hooks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
60
  trilogy/hooks/base_hook.py,sha256=Xkb-A2qCHozYjum0A36zOy5PwTVwrP3NLDF0U2GpgHo,1100
@@ -65,13 +66,13 @@ trilogy/parsing/common.py,sha256=lz0IyVA8v-u-DGFgzkmdb4_00I--Kegmo9HNF7CrajI,579
65
66
  trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
66
67
  trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
67
68
  trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
68
- trilogy/parsing/parse_engine.py,sha256=2IK7nAz_X9WiAqWG9cMPsj3IW6bGS4vqSWB4afvyUF4,63845
69
+ trilogy/parsing/parse_engine.py,sha256=TLy56pDatDfzfwbJkrJ-XXB05s_VW9_iRrkwtKR0GR4,63860
69
70
  trilogy/parsing/render.py,sha256=fxjpq2FZLgllw_d4cru-t_IXNPAz2DmYkT7v9ED0XRI,11540
70
71
  trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
72
  trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
72
- pytrilogy-0.0.1.105.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
73
- pytrilogy-0.0.1.105.dist-info/METADATA,sha256=VTUiVso4QpSImu15q4jsbq0FQ7HY3hUi9GOPan69sfI,6883
74
- pytrilogy-0.0.1.105.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
75
- pytrilogy-0.0.1.105.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
76
- pytrilogy-0.0.1.105.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
77
- pytrilogy-0.0.1.105.dist-info/RECORD,,
73
+ pytrilogy-0.0.1.107.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
74
+ pytrilogy-0.0.1.107.dist-info/METADATA,sha256=KF68qL9kNj855oUEtUVCCMd5hei-LRfABfoaEWlTa8g,7882
75
+ pytrilogy-0.0.1.107.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
76
+ pytrilogy-0.0.1.107.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
77
+ pytrilogy-0.0.1.107.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
78
+ pytrilogy-0.0.1.107.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.2.0)
2
+ Generator: setuptools (70.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
trilogy/__init__.py CHANGED
@@ -2,7 +2,8 @@ from trilogy.core.models import Environment
2
2
  from trilogy.dialect.enums import Dialects
3
3
  from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
+ from trilogy.constants import CONFIG
5
6
 
6
- __version__ = "0.0.1.105"
7
+ __version__ = "0.0.1.107"
7
8
 
8
- __all__ = ["parse", "Executor", "Dialects", "Environment"]
9
+ __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
trilogy/constants.py CHANGED
@@ -23,6 +23,7 @@ NULL_VALUE = MagicConstants.NULL
23
23
  class Config:
24
24
  strict_mode: bool = True
25
25
  human_identifiers: bool = True
26
+ inline_datasources: bool = True
26
27
 
27
28
 
28
29
  CONFIG = Config()
trilogy/core/models.py CHANGED
@@ -34,7 +34,11 @@ from pydantic import (
34
34
  from lark.tree import Meta
35
35
  from pathlib import Path
36
36
  from trilogy.constants import logger, DEFAULT_NAMESPACE, ENV_CACHE_NAME, MagicConstants
37
- from trilogy.core.constants import ALL_ROWS_CONCEPT, INTERNAL_NAMESPACE
37
+ from trilogy.core.constants import (
38
+ ALL_ROWS_CONCEPT,
39
+ INTERNAL_NAMESPACE,
40
+ CONSTANT_DATASET,
41
+ )
38
42
  from trilogy.core.enums import (
39
43
  InfiniteFunctionArgs,
40
44
  Purpose,
@@ -1434,6 +1438,7 @@ class MultiSelectStatement(Namespaced, BaseModel):
1434
1438
 
1435
1439
  class Address(BaseModel):
1436
1440
  location: str
1441
+ is_query: bool = False
1437
1442
 
1438
1443
 
1439
1444
  class Query(BaseModel):
@@ -1529,10 +1534,27 @@ class Datasource(Namespaced, BaseModel):
1529
1534
  default_factory=lambda: DatasourceMetadata(freshness_concept=None)
1530
1535
  )
1531
1536
 
1537
+ @property
1538
+ def condition(self):
1539
+ return None
1540
+
1532
1541
  @cached_property
1533
1542
  def output_lcl(self) -> LooseConceptList:
1534
1543
  return LooseConceptList(concepts=self.output_concepts)
1535
1544
 
1545
+ @property
1546
+ def can_be_inlined(self) -> bool:
1547
+ if isinstance(self.address, Address) and self.address.is_query:
1548
+ return False
1549
+ for x in self.columns:
1550
+ if not isinstance(x.alias, str):
1551
+ return False
1552
+ return True
1553
+
1554
+ @property
1555
+ def non_partial_concept_addresses(self) -> set[str]:
1556
+ return set([c.address for c in self.full_concepts])
1557
+
1536
1558
  @field_validator("namespace", mode="plain")
1537
1559
  @classmethod
1538
1560
  def namespace_validation(cls, v):
@@ -1647,7 +1669,7 @@ class Datasource(Namespaced, BaseModel):
1647
1669
  namespace = self.namespace.replace(".", "_") if self.namespace else ""
1648
1670
  return f"{namespace}_{self.identifier}"
1649
1671
 
1650
- @cached_property
1672
+ @property
1651
1673
  def safe_location(self) -> str:
1652
1674
  if isinstance(self.address, Address):
1653
1675
  return self.address.location
@@ -1746,7 +1768,7 @@ class QueryDatasource(BaseModel):
1746
1768
  input_concepts: List[Concept]
1747
1769
  output_concepts: List[Concept]
1748
1770
  source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
1749
- datasources: Sequence[Union[Datasource, "QueryDatasource"]]
1771
+ datasources: List[Union[Datasource, "QueryDatasource"]]
1750
1772
  grain: Grain
1751
1773
  joins: List[BaseJoin | UnnestJoin]
1752
1774
  limit: Optional[int] = None
@@ -1797,10 +1819,7 @@ class QueryDatasource(BaseModel):
1797
1819
  c.address for c in values["input_concepts"]
1798
1820
  )
1799
1821
  seen = set()
1800
- for k, val in v.items():
1801
- # if val:
1802
- # if len(val) != 1:
1803
- # raise SyntaxError(f"source map {k} has multiple values {len(val)}")
1822
+ for k, _ in v.items():
1804
1823
  seen.add(k)
1805
1824
  for x in expected:
1806
1825
  if x not in seen:
@@ -1922,18 +1941,18 @@ class QueryDatasource(BaseModel):
1922
1941
  )
1923
1942
 
1924
1943
  def get_alias(
1925
- self, concept: Concept, use_raw_name: bool = False, force_alias: bool = False
1944
+ self,
1945
+ concept: Concept,
1946
+ use_raw_name: bool = False,
1947
+ force_alias: bool = False,
1948
+ source: str | None = None,
1926
1949
  ):
1927
- # if we should use the raw datasource name to access
1928
- use_raw_name = (
1929
- True
1930
- if (len(self.datasources) == 1 or use_raw_name) and not force_alias
1931
- # if ((len(self.datasources) == 1 and isinstance(self.datasources[0], Datasource)) or use_raw_name) and not force_alias
1932
- else False
1933
- )
1934
1950
  for x in self.datasources:
1935
1951
  # query datasources should be referenced by their alias, always
1936
1952
  force_alias = isinstance(x, QueryDatasource)
1953
+ use_raw_name = isinstance(x, Datasource) and not force_alias
1954
+ if source and x.identifier != source:
1955
+ continue
1937
1956
  try:
1938
1957
  return x.get_alias(
1939
1958
  concept.with_grain(self.grain),
@@ -1967,8 +1986,7 @@ class Comment(BaseModel):
1967
1986
 
1968
1987
  class CTE(BaseModel):
1969
1988
  name: str
1970
- source: "QueryDatasource" # TODO: make recursive
1971
- # output columns are what are selected/grouped by
1989
+ source: "QueryDatasource"
1972
1990
  output_columns: List[Concept]
1973
1991
  source_map: Dict[str, str | list[str]]
1974
1992
  grain: Grain
@@ -1979,6 +1997,10 @@ class CTE(BaseModel):
1979
1997
  condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
1980
1998
  partial_concepts: List[Concept] = Field(default_factory=list)
1981
1999
  join_derived_concepts: List[Concept] = Field(default_factory=list)
2000
+ order_by: Optional[OrderBy] = None
2001
+ limit: Optional[int] = None
2002
+ requires_nesting: bool = True
2003
+ base_name_override: Optional[str] = None
1982
2004
 
1983
2005
  @computed_field # type: ignore
1984
2006
  @property
@@ -1989,6 +2011,40 @@ class CTE(BaseModel):
1989
2011
  def validate_output_columns(cls, v):
1990
2012
  return unique(v, "address")
1991
2013
 
2014
+ def inline_parent_datasource(self, parent: CTE) -> bool:
2015
+ qds_being_inlined = parent.source
2016
+ ds_being_inlined = qds_being_inlined.datasources[0]
2017
+ if not isinstance(ds_being_inlined, Datasource):
2018
+ return False
2019
+ self.source.datasources = [
2020
+ ds_being_inlined,
2021
+ *[
2022
+ x
2023
+ for x in self.source.datasources
2024
+ if x.identifier != qds_being_inlined.identifier
2025
+ ],
2026
+ ]
2027
+ # need to identify this before updating joins
2028
+ if self.base_name == parent.name:
2029
+ self.base_name_override = ds_being_inlined.safe_location
2030
+
2031
+ for join in self.joins:
2032
+ if isinstance(join, InstantiatedUnnestJoin):
2033
+ continue
2034
+ if join.left_cte.name == parent.name:
2035
+ join.left_cte = ds_being_inlined
2036
+ if join.right_cte.name == parent.name:
2037
+ join.right_cte = ds_being_inlined
2038
+ for k, v in self.source_map.items():
2039
+ if isinstance(v, list):
2040
+ self.source_map[k] = [
2041
+ ds_being_inlined.name if x == parent.name else x for x in v
2042
+ ]
2043
+ elif v == parent.name:
2044
+ self.source_map[k] = ds_being_inlined.name
2045
+ self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
2046
+ return True
2047
+
1992
2048
  def __add__(self, other: "CTE"):
1993
2049
  logger.info('Merging two copies of CTE "%s"', self.name)
1994
2050
  if not self.grain == other.grain:
@@ -2031,16 +2087,25 @@ class CTE(BaseModel):
2031
2087
  def relevant_base_ctes(self):
2032
2088
  return self.parent_ctes
2033
2089
 
2090
+ @property
2091
+ def is_root_datasource(self) -> bool:
2092
+ return (
2093
+ len(self.source.datasources) == 1
2094
+ and isinstance(self.source.datasources[0], Datasource)
2095
+ and not self.source.datasources[0].name == CONSTANT_DATASET
2096
+ )
2097
+
2034
2098
  @property
2035
2099
  def base_name(self) -> str:
2100
+ if self.base_name_override:
2101
+ return self.base_name_override
2036
2102
  # if this cte selects from a single datasource, select right from it
2037
2103
  valid_joins: List[Join] = [
2038
2104
  join for join in self.joins if isinstance(join, Join)
2039
2105
  ]
2040
- if len(self.source.datasources) == 1 and isinstance(
2041
- self.source.datasources[0], Datasource
2042
- ):
2106
+ if self.is_root_datasource:
2043
2107
  return self.source.datasources[0].safe_location
2108
+
2044
2109
  # if we have multiple joined CTEs, pick the base
2045
2110
  # as the root
2046
2111
  elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
@@ -2066,11 +2131,10 @@ class CTE(BaseModel):
2066
2131
 
2067
2132
  @property
2068
2133
  def base_alias(self) -> str:
2134
+
2135
+ if self.is_root_datasource:
2136
+ return self.source.datasources[0].identifier
2069
2137
  relevant_joins = [j for j in self.joins if isinstance(j, Join)]
2070
- if len(self.source.datasources) == 1 and isinstance(
2071
- self.source.datasources[0], Datasource
2072
- ):
2073
- return self.source.datasources[0].full_name.replace(".", "_")
2074
2138
  if relevant_joins:
2075
2139
  return relevant_joins[0].left_cte.name
2076
2140
  elif self.relevant_base_ctes:
@@ -2079,12 +2143,16 @@ class CTE(BaseModel):
2079
2143
  return self.parent_ctes[0].name
2080
2144
  return self.name
2081
2145
 
2082
- def get_alias(self, concept: Concept) -> str:
2146
+ def get_alias(self, concept: Concept, source: str | None = None) -> str:
2083
2147
  for cte in self.parent_ctes:
2084
2148
  if concept.address in [x.address for x in cte.output_columns]:
2149
+ if source and source != cte.name:
2150
+ continue
2085
2151
  return concept.safe_address
2086
2152
  try:
2087
- source = self.source.get_alias(concept)
2153
+ source = self.source.get_alias(concept, source=source)
2154
+ if not source:
2155
+ raise ValueError("No source found")
2088
2156
  return source
2089
2157
  except ValueError as e:
2090
2158
  return f"INVALID_ALIAS: {str(e)}"
@@ -2097,6 +2165,11 @@ class CTE(BaseModel):
2097
2165
  and not self.group_to_grain
2098
2166
  ):
2099
2167
  return False
2168
+ if (
2169
+ len(self.source.datasources) == 1
2170
+ and self.source.datasources[0].name == CONSTANT_DATASET
2171
+ ):
2172
+ return False
2100
2173
  return True
2101
2174
 
2102
2175
  @property
@@ -2130,19 +2203,43 @@ class JoinKey(BaseModel):
2130
2203
 
2131
2204
 
2132
2205
  class Join(BaseModel):
2133
- left_cte: CTE
2134
- right_cte: CTE
2206
+ left_cte: CTE | Datasource
2207
+ right_cte: CTE | Datasource
2135
2208
  jointype: JoinType
2136
2209
  joinkeys: List[JoinKey]
2137
2210
 
2211
+ @property
2212
+ def left_name(self) -> str:
2213
+ if isinstance(self.left_cte, Datasource):
2214
+ return self.left_cte.identifier
2215
+ return self.left_cte.name
2216
+
2217
+ @property
2218
+ def right_name(self) -> str:
2219
+ if isinstance(self.right_cte, Datasource):
2220
+ return self.right_cte.identifier
2221
+ return self.right_cte.name
2222
+
2223
+ @property
2224
+ def left_ref(self) -> str:
2225
+ if isinstance(self.left_cte, Datasource):
2226
+ return f"{self.left_cte.safe_location} as {self.left_cte.identifier}"
2227
+ return self.left_cte.name
2228
+
2229
+ @property
2230
+ def right_ref(self) -> str:
2231
+ if isinstance(self.right_cte, Datasource):
2232
+ return f"{self.right_cte.safe_location} as {self.right_cte.identifier}"
2233
+ return self.right_cte.name
2234
+
2138
2235
  @property
2139
2236
  def unique_id(self) -> str:
2140
- return self.left_cte.name + self.right_cte.name + self.jointype.value
2237
+ return self.left_name + self.right_name + self.jointype.value
2141
2238
 
2142
2239
  def __str__(self):
2143
2240
  return (
2144
- f"{self.jointype.value} JOIN {self.left_cte.name} and"
2145
- f" {self.right_cte.name} on {','.join([str(k) for k in self.joinkeys])}"
2241
+ f"{self.jointype.value} JOIN {self.left_name} and"
2242
+ f" {self.right_name} on {','.join([str(k) for k in self.joinkeys])}"
2146
2243
  )
2147
2244
 
2148
2245
 
@@ -0,0 +1,141 @@
1
+ from trilogy.core.models import (
2
+ CTE,
3
+ SelectStatement,
4
+ PersistStatement,
5
+ Datasource,
6
+ MultiSelectStatement,
7
+ )
8
+ from trilogy.core.enums import PurposeLineage
9
+ from trilogy.constants import logger
10
+ from abc import ABC
11
+
12
+
13
+ class OptimizationRule(ABC):
14
+
15
+ def optimize(self, cte: CTE) -> bool:
16
+ raise NotImplementedError
17
+
18
+ def log(self, message: str):
19
+ logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
20
+
21
+
22
+ class InlineDatasource(OptimizationRule):
23
+
24
+ def optimize(self, cte: CTE) -> bool:
25
+ if not cte.parent_ctes:
26
+ return False
27
+
28
+ optimized = False
29
+ self.log(
30
+ f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
31
+ )
32
+ to_inline: list[CTE] = []
33
+ for parent_cte in cte.parent_ctes:
34
+ if not parent_cte.is_root_datasource:
35
+ self.log(f"parent {parent_cte.name} is not root")
36
+ continue
37
+ if parent_cte.parent_ctes:
38
+ self.log(f"parent {parent_cte.name} has parents")
39
+ continue
40
+ raw_root = parent_cte.source.datasources[0]
41
+ if not isinstance(raw_root, Datasource):
42
+ self.log(f"parent {parent_cte.name} is not datasource")
43
+ continue
44
+ root: Datasource = raw_root
45
+ if not root.can_be_inlined:
46
+ self.log(f"parent {parent_cte.name} datasource is not inlineable")
47
+ continue
48
+ root_outputs = {x.address for x in root.output_concepts}
49
+ cte_outputs = {x.address for x in parent_cte.output_columns}
50
+ if not cte_outputs.issubset(root_outputs):
51
+ self.log(f"Not all {parent_cte.name} outputs are found on datasource")
52
+ continue
53
+
54
+ to_inline.append(parent_cte)
55
+
56
+ for replaceable in to_inline:
57
+ self.log(f"Inlining parent {replaceable.name}")
58
+ cte.inline_parent_datasource(replaceable)
59
+
60
+ return optimized
61
+
62
+
63
+ REGISTERED_RULES: list[OptimizationRule] = [InlineDatasource()]
64
+
65
+
66
+ def filter_irrelevant_ctes(input: list[CTE], root_cte: CTE):
67
+ relevant_ctes = set()
68
+
69
+ def recurse(cte: CTE):
70
+ relevant_ctes.add(cte.name)
71
+ for cte in cte.parent_ctes:
72
+ recurse(cte)
73
+
74
+ recurse(root_cte)
75
+ return [cte for cte in input if cte.name in relevant_ctes]
76
+
77
+
78
+ def is_direct_return_eligible(
79
+ cte: CTE, select: SelectStatement | PersistStatement | MultiSelectStatement
80
+ ) -> bool:
81
+ if isinstance(select, (PersistStatement, MultiSelectStatement)):
82
+ return False
83
+ derived_concepts = [
84
+ c for c in cte.source.output_concepts if c not in cte.source.input_concepts
85
+ ]
86
+ eligible = True
87
+ conditions = (
88
+ set(x.address for x in select.where_clause.concept_arguments)
89
+ if select.where_clause
90
+ else set()
91
+ )
92
+ if conditions and select.limit:
93
+ return False
94
+ for x in derived_concepts:
95
+ if x.derivation == PurposeLineage.WINDOW:
96
+ return False
97
+ if x.derivation == PurposeLineage.AGGREGATE:
98
+ if x.address in conditions:
99
+ return False
100
+ logger.info(
101
+ f"Upleveling output select to final CTE with derived_concepts {[x.address for x in derived_concepts]}"
102
+ )
103
+ return eligible
104
+
105
+
106
+ def sort_select_output(cte: CTE, query: SelectStatement | MultiSelectStatement):
107
+ hidden_addresses = [c.address for c in query.hidden_components]
108
+ output_addresses = [
109
+ c.address for c in query.output_components if c.address not in hidden_addresses
110
+ ]
111
+
112
+ mapping = {x.address: x for x in cte.output_columns}
113
+
114
+ new_output = []
115
+ for x in output_addresses:
116
+ new_output.append(mapping[x])
117
+ cte.output_columns = new_output
118
+
119
+
120
+ def optimize_ctes(
121
+ input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
122
+ ):
123
+ complete = False
124
+
125
+ while not complete:
126
+ actions_taken = False
127
+ for rule in REGISTERED_RULES:
128
+ for cte in input:
129
+ actions_taken = rule.optimize(cte)
130
+ complete = not actions_taken
131
+
132
+ if is_direct_return_eligible(root_cte, select):
133
+ root_cte.order_by = select.order_by
134
+ root_cte.limit = select.limit
135
+ root_cte.condition = (
136
+ select.where_clause.conditional if select.where_clause else None
137
+ )
138
+ root_cte.requires_nesting = False
139
+ sort_select_output(cte, select)
140
+
141
+ return filter_irrelevant_ctes(input, root_cte)
@@ -45,7 +45,7 @@ def concept_list_to_grain(
45
45
 
46
46
 
47
47
  def resolve_concept_map(
48
- inputs: List[QueryDatasource],
48
+ inputs: List[QueryDatasource | Datasource],
49
49
  targets: List[Concept],
50
50
  inherited_inputs: List[Concept],
51
51
  full_joins: List[Concept] | None = None,
@@ -156,7 +156,9 @@ class StrategyNode:
156
156
  return f"{self.__class__.__name__}<{contents}>"
157
157
 
158
158
  def _resolve(self) -> QueryDatasource:
159
- parent_sources = [p.resolve() for p in self.parents]
159
+ parent_sources: List[QueryDatasource | Datasource] = [
160
+ p.resolve() for p in self.parents
161
+ ]
160
162
 
161
163
  # if conditional:
162
164
  # for condition in conditions[1:]:
@@ -4,6 +4,7 @@ from trilogy.constants import logger
4
4
  from trilogy.core.models import (
5
5
  Grain,
6
6
  QueryDatasource,
7
+ Datasource,
7
8
  SourceType,
8
9
  Concept,
9
10
  Environment,
@@ -45,7 +46,9 @@ class GroupNode(StrategyNode):
45
46
  )
46
47
 
47
48
  def _resolve(self) -> QueryDatasource:
48
- parent_sources: list[QueryDatasource] = [p.resolve() for p in self.parents]
49
+ parent_sources: List[QueryDatasource | Datasource] = [
50
+ p.resolve() for p in self.parents
51
+ ]
49
52
 
50
53
  grain = concept_list_to_grain(self.output_concepts, [])
51
54
  comp_grain = Grain()
@@ -66,7 +69,7 @@ class GroupNode(StrategyNode):
66
69
  len(parent_sources) == 1
67
70
  and LooseConceptList(concepts=parent_sources[0].output_concepts)
68
71
  == self.output_lcl
69
- ):
72
+ ) and isinstance(parent_sources[0], QueryDatasource):
70
73
  logger.info(
71
74
  f"{self.logging_prefix}{LOGGER_PREFIX} No group by required, returning parent node"
72
75
  )