pytrilogy 0.0.1.105__py3-none-any.whl → 0.0.1.107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/METADATA +79 -1
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/RECORD +26 -25
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +3 -2
- trilogy/constants.py +1 -0
- trilogy/core/models.py +128 -31
- trilogy/core/optimization.py +141 -0
- trilogy/core/processing/nodes/base_node.py +4 -2
- trilogy/core/processing/nodes/group_node.py +5 -2
- trilogy/core/processing/nodes/merge_node.py +13 -8
- trilogy/core/query_processor.py +5 -2
- trilogy/dialect/base.py +73 -51
- trilogy/dialect/bigquery.py +6 -4
- trilogy/dialect/common.py +8 -6
- trilogy/dialect/config.py +69 -1
- trilogy/dialect/duckdb.py +5 -4
- trilogy/dialect/enums.py +40 -19
- trilogy/dialect/postgres.py +4 -2
- trilogy/dialect/presto.py +6 -4
- trilogy/dialect/snowflake.py +6 -4
- trilogy/dialect/sql_server.py +4 -1
- trilogy/executor.py +18 -5
- trilogy/parsing/parse_engine.py +1 -1
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pytrilogy
|
|
3
|
-
Version: 0.0.1.
|
|
3
|
+
Version: 0.0.1.107
|
|
4
4
|
Summary: Declarative, typed query language that compiles to SQL.
|
|
5
5
|
Home-page:
|
|
6
6
|
Author:
|
|
@@ -275,3 +275,81 @@ but all are worth checking out. Please open PRs/comment for anything missed!
|
|
|
275
275
|
- [malloy](https://github.com/malloydata/malloy)
|
|
276
276
|
- [preql](https://github.com/erezsh/Preql)
|
|
277
277
|
- [PREQL](https://github.com/PRQL/prql)
|
|
278
|
+
|
|
279
|
+
## Minimal Syntax Reference
|
|
280
|
+
|
|
281
|
+
#### IMPORT
|
|
282
|
+
|
|
283
|
+
`import <path> as <alias>;`
|
|
284
|
+
|
|
285
|
+
#### CONCEPT
|
|
286
|
+
|
|
287
|
+
Types: `string | int | float | bool | date | datetime | time | timestamp | interval`;
|
|
288
|
+
|
|
289
|
+
Key:
|
|
290
|
+
`key <name> <type>;`
|
|
291
|
+
|
|
292
|
+
Property:
|
|
293
|
+
`property <key>.<name> <type>;`
|
|
294
|
+
|
|
295
|
+
Transformation:
|
|
296
|
+
`auto <name> <- <expression>;`
|
|
297
|
+
|
|
298
|
+
#### DATASOURCE
|
|
299
|
+
```sql
|
|
300
|
+
datasource <name>(
|
|
301
|
+
<column>:<concept>,
|
|
302
|
+
<column>:<concept>,
|
|
303
|
+
)
|
|
304
|
+
grain(<concept>, <concept>)
|
|
305
|
+
address <table>;
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
#### SELECT
|
|
309
|
+
|
|
310
|
+
Primary acces
|
|
311
|
+
|
|
312
|
+
```sql
|
|
313
|
+
select
|
|
314
|
+
<concept>,
|
|
315
|
+
<concept>+1 -> <alias>
|
|
316
|
+
WHERE
|
|
317
|
+
<concept> = <value>
|
|
318
|
+
ORDER BY
|
|
319
|
+
<concept> asc|desc
|
|
320
|
+
;
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
#### CTE/ROWSET
|
|
324
|
+
|
|
325
|
+
Reusable virtual set of rows. Useful for windows, filtering.
|
|
326
|
+
|
|
327
|
+
```sql
|
|
328
|
+
with <alias> as
|
|
329
|
+
select
|
|
330
|
+
<concept>,
|
|
331
|
+
<concept>+1 -> <alias>
|
|
332
|
+
WHERE
|
|
333
|
+
<concept> = <value>
|
|
334
|
+
|
|
335
|
+
select <alias>.<concept>;
|
|
336
|
+
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
#### PERSIST
|
|
341
|
+
|
|
342
|
+
Store output of a query in a warehouse table
|
|
343
|
+
|
|
344
|
+
```sql
|
|
345
|
+
persist <alias> as <table_name> from
|
|
346
|
+
<select>;
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
#### SHOW
|
|
350
|
+
|
|
351
|
+
Return generated SQL without executing.
|
|
352
|
+
|
|
353
|
+
```sql
|
|
354
|
+
show <select>;
|
|
355
|
+
```
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
trilogy/__init__.py,sha256=ouq-RNu0DVYw8n1C2ekRmcAJ_SL_PFzbRBzo2O814TM,292
|
|
2
2
|
trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
trilogy/constants.py,sha256=
|
|
3
|
+
trilogy/constants.py,sha256=LxiK2TiVQPEa6tXkxWk9DJHOR3zsGNSqgQuqtOf66cw,518
|
|
4
4
|
trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
|
|
5
|
-
trilogy/executor.py,sha256=
|
|
5
|
+
trilogy/executor.py,sha256=xF6wzbhP6a3wz4nrxsRCKeKF7qytUQEL75oI3BGJ2hQ,8744
|
|
6
6
|
trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
|
|
7
7
|
trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
|
|
@@ -16,8 +16,9 @@ trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,56
|
|
|
16
16
|
trilogy/core/functions.py,sha256=zkRReytiotOBAW-a3Ri5eoejZDYTt2-7Op80ZxZxUmw,9129
|
|
17
17
|
trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
|
|
18
18
|
trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
|
|
19
|
-
trilogy/core/models.py,sha256=
|
|
20
|
-
trilogy/core/
|
|
19
|
+
trilogy/core/models.py,sha256=WH7GHgn1a3xegVo12_NTP7V_ptN-_ObY7s4ZgkPI0D4,108548
|
|
20
|
+
trilogy/core/optimization.py,sha256=SpWRQL1biAUvMCijk2I-FCQY2KzXd4eiu3ZlxzVE-uQ,4505
|
|
21
|
+
trilogy/core/query_processor.py,sha256=w_CS2TEmSk8Bhk6ukxLavQgQyA9UwcgoPChouLREujQ,11747
|
|
21
22
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
23
|
trilogy/core/processing/concept_strategies_v3.py,sha256=6Z1ODKbvxaWFMRHa3vX87oSBW9XKtC7S7pUxburCVkA,22369
|
|
23
24
|
trilogy/core/processing/graph_utils.py,sha256=ulCJ4hYAISbUxLD6VM2fah9RBPGIXSEHEPeRBSFl0Rs,1197
|
|
@@ -36,24 +37,24 @@ trilogy/core/processing/node_generators/select_node.py,sha256=xeCqIUEubrf3u_QQfb
|
|
|
36
37
|
trilogy/core/processing/node_generators/unnest_node.py,sha256=s1VXQZSf1LnX3ISeQ5JzmzmCKUw30-5OK_f0YTB9_48,1031
|
|
37
38
|
trilogy/core/processing/node_generators/window_node.py,sha256=ekazi5eXxnShpcp-qukXNG4DHFdULoXrX-YWUWLNEpM,2527
|
|
38
39
|
trilogy/core/processing/nodes/__init__.py,sha256=ZkDGQksvsM5uNia5rhXFCUJcpTRhoYYFdyfJw-Eiu8s,3674
|
|
39
|
-
trilogy/core/processing/nodes/base_node.py,sha256=
|
|
40
|
+
trilogy/core/processing/nodes/base_node.py,sha256=VaK4rWV8PQMyTPTgStmCbPbV0mmTHyTwBhz0C0N2KG0,8961
|
|
40
41
|
trilogy/core/processing/nodes/filter_node.py,sha256=DqSRv8voEajPZqzeeiIsxuv4ubvsmeQcCW6x_v2CmOk,1359
|
|
41
|
-
trilogy/core/processing/nodes/group_node.py,sha256
|
|
42
|
-
trilogy/core/processing/nodes/merge_node.py,sha256=
|
|
42
|
+
trilogy/core/processing/nodes/group_node.py,sha256=Y_NWB_AwFrE-YithjZ7lYYDN4e0el4su3ICq2EIr3HA,3837
|
|
43
|
+
trilogy/core/processing/nodes/merge_node.py,sha256=pAtRTqkpsfE_pmqPSdeV0rHwFPzclJ3WItRLX8AuuJw,12609
|
|
43
44
|
trilogy/core/processing/nodes/select_node_v2.py,sha256=tAADeVruch-flFiedbY1zi7ukMG2RpWecvxxZ5aL3ZU,6354
|
|
44
45
|
trilogy/core/processing/nodes/unnest_node.py,sha256=t4kY3a_dR3iXistPemStfdw0uJfnxwTcoQg1HiDa3xo,1501
|
|
45
46
|
trilogy/core/processing/nodes/window_node.py,sha256=QjAWgqBZqFSRCPwc7JBmgQJobWW50rsHI0pjJe0Zzg0,926
|
|
46
47
|
trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
-
trilogy/dialect/base.py,sha256=
|
|
48
|
-
trilogy/dialect/bigquery.py,sha256=
|
|
49
|
-
trilogy/dialect/common.py,sha256=
|
|
50
|
-
trilogy/dialect/config.py,sha256=
|
|
51
|
-
trilogy/dialect/duckdb.py,sha256=
|
|
52
|
-
trilogy/dialect/enums.py,sha256=
|
|
53
|
-
trilogy/dialect/postgres.py,sha256=
|
|
54
|
-
trilogy/dialect/presto.py,sha256=
|
|
55
|
-
trilogy/dialect/snowflake.py,sha256=
|
|
56
|
-
trilogy/dialect/sql_server.py,sha256=
|
|
48
|
+
trilogy/dialect/base.py,sha256=nYrm7Z-GnVVhr5vWHjMghWkGoq2r7ogzoGVasAGTxGo,29223
|
|
49
|
+
trilogy/dialect/bigquery.py,sha256=9vxQn2BMv_oTGQSWQpoN5ho_OgqMWaHH9e-5vQVf44c,2906
|
|
50
|
+
trilogy/dialect/common.py,sha256=zWrYmvevlXznocw9uGHmY5Ws1rp_kICm9zA_ulTe4eg,2165
|
|
51
|
+
trilogy/dialect/config.py,sha256=JdGIiHf2EVoFNTYzqQUy1bMmzqZiFTjcnYglzAMa4dM,3351
|
|
52
|
+
trilogy/dialect/duckdb.py,sha256=Ddyt68sr8IL2HnZMenyytoD65FXwY_O2pz1McyS0bis,3075
|
|
53
|
+
trilogy/dialect/enums.py,sha256=4NdpsydBpDn6jnh0JzFz5VvQEtnShErWtWHVyT6TNpw,3948
|
|
54
|
+
trilogy/dialect/postgres.py,sha256=r47xbCA7nfEYENofiVfLZ-SnReNfDmUmW4OSHVkkP4E,3206
|
|
55
|
+
trilogy/dialect/presto.py,sha256=rOr-ftb0S5AeEncQgp6jj1iCWlCEkzZuQC3TGMUcemg,2790
|
|
56
|
+
trilogy/dialect/snowflake.py,sha256=N3HknYgN-fjD7BLX1Ucj-ss_ku2Ox8DgLsF3BIHutHo,2941
|
|
57
|
+
trilogy/dialect/sql_server.py,sha256=UrLeA9bxiFJ4qpGsqVJqBybQCyJhetMebe8IzQW1q9s,2900
|
|
57
58
|
trilogy/docs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
59
|
trilogy/hooks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
60
|
trilogy/hooks/base_hook.py,sha256=Xkb-A2qCHozYjum0A36zOy5PwTVwrP3NLDF0U2GpgHo,1100
|
|
@@ -65,13 +66,13 @@ trilogy/parsing/common.py,sha256=lz0IyVA8v-u-DGFgzkmdb4_00I--Kegmo9HNF7CrajI,579
|
|
|
65
66
|
trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
|
|
66
67
|
trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
|
|
67
68
|
trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
68
|
-
trilogy/parsing/parse_engine.py,sha256=
|
|
69
|
+
trilogy/parsing/parse_engine.py,sha256=TLy56pDatDfzfwbJkrJ-XXB05s_VW9_iRrkwtKR0GR4,63860
|
|
69
70
|
trilogy/parsing/render.py,sha256=fxjpq2FZLgllw_d4cru-t_IXNPAz2DmYkT7v9ED0XRI,11540
|
|
70
71
|
trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
72
|
trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
|
|
72
|
-
pytrilogy-0.0.1.
|
|
73
|
-
pytrilogy-0.0.1.
|
|
74
|
-
pytrilogy-0.0.1.
|
|
75
|
-
pytrilogy-0.0.1.
|
|
76
|
-
pytrilogy-0.0.1.
|
|
77
|
-
pytrilogy-0.0.1.
|
|
73
|
+
pytrilogy-0.0.1.107.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
74
|
+
pytrilogy-0.0.1.107.dist-info/METADATA,sha256=KF68qL9kNj855oUEtUVCCMd5hei-LRfABfoaEWlTa8g,7882
|
|
75
|
+
pytrilogy-0.0.1.107.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
|
|
76
|
+
pytrilogy-0.0.1.107.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
|
|
77
|
+
pytrilogy-0.0.1.107.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
78
|
+
pytrilogy-0.0.1.107.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
|
@@ -2,7 +2,8 @@ from trilogy.core.models import Environment
|
|
|
2
2
|
from trilogy.dialect.enums import Dialects
|
|
3
3
|
from trilogy.executor import Executor
|
|
4
4
|
from trilogy.parser import parse
|
|
5
|
+
from trilogy.constants import CONFIG
|
|
5
6
|
|
|
6
|
-
__version__ = "0.0.1.
|
|
7
|
+
__version__ = "0.0.1.107"
|
|
7
8
|
|
|
8
|
-
__all__ = ["parse", "Executor", "Dialects", "Environment"]
|
|
9
|
+
__all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
|
trilogy/constants.py
CHANGED
trilogy/core/models.py
CHANGED
|
@@ -34,7 +34,11 @@ from pydantic import (
|
|
|
34
34
|
from lark.tree import Meta
|
|
35
35
|
from pathlib import Path
|
|
36
36
|
from trilogy.constants import logger, DEFAULT_NAMESPACE, ENV_CACHE_NAME, MagicConstants
|
|
37
|
-
from trilogy.core.constants import
|
|
37
|
+
from trilogy.core.constants import (
|
|
38
|
+
ALL_ROWS_CONCEPT,
|
|
39
|
+
INTERNAL_NAMESPACE,
|
|
40
|
+
CONSTANT_DATASET,
|
|
41
|
+
)
|
|
38
42
|
from trilogy.core.enums import (
|
|
39
43
|
InfiniteFunctionArgs,
|
|
40
44
|
Purpose,
|
|
@@ -1434,6 +1438,7 @@ class MultiSelectStatement(Namespaced, BaseModel):
|
|
|
1434
1438
|
|
|
1435
1439
|
class Address(BaseModel):
|
|
1436
1440
|
location: str
|
|
1441
|
+
is_query: bool = False
|
|
1437
1442
|
|
|
1438
1443
|
|
|
1439
1444
|
class Query(BaseModel):
|
|
@@ -1529,10 +1534,27 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1529
1534
|
default_factory=lambda: DatasourceMetadata(freshness_concept=None)
|
|
1530
1535
|
)
|
|
1531
1536
|
|
|
1537
|
+
@property
|
|
1538
|
+
def condition(self):
|
|
1539
|
+
return None
|
|
1540
|
+
|
|
1532
1541
|
@cached_property
|
|
1533
1542
|
def output_lcl(self) -> LooseConceptList:
|
|
1534
1543
|
return LooseConceptList(concepts=self.output_concepts)
|
|
1535
1544
|
|
|
1545
|
+
@property
|
|
1546
|
+
def can_be_inlined(self) -> bool:
|
|
1547
|
+
if isinstance(self.address, Address) and self.address.is_query:
|
|
1548
|
+
return False
|
|
1549
|
+
for x in self.columns:
|
|
1550
|
+
if not isinstance(x.alias, str):
|
|
1551
|
+
return False
|
|
1552
|
+
return True
|
|
1553
|
+
|
|
1554
|
+
@property
|
|
1555
|
+
def non_partial_concept_addresses(self) -> set[str]:
|
|
1556
|
+
return set([c.address for c in self.full_concepts])
|
|
1557
|
+
|
|
1536
1558
|
@field_validator("namespace", mode="plain")
|
|
1537
1559
|
@classmethod
|
|
1538
1560
|
def namespace_validation(cls, v):
|
|
@@ -1647,7 +1669,7 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1647
1669
|
namespace = self.namespace.replace(".", "_") if self.namespace else ""
|
|
1648
1670
|
return f"{namespace}_{self.identifier}"
|
|
1649
1671
|
|
|
1650
|
-
@
|
|
1672
|
+
@property
|
|
1651
1673
|
def safe_location(self) -> str:
|
|
1652
1674
|
if isinstance(self.address, Address):
|
|
1653
1675
|
return self.address.location
|
|
@@ -1746,7 +1768,7 @@ class QueryDatasource(BaseModel):
|
|
|
1746
1768
|
input_concepts: List[Concept]
|
|
1747
1769
|
output_concepts: List[Concept]
|
|
1748
1770
|
source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
|
|
1749
|
-
datasources:
|
|
1771
|
+
datasources: List[Union[Datasource, "QueryDatasource"]]
|
|
1750
1772
|
grain: Grain
|
|
1751
1773
|
joins: List[BaseJoin | UnnestJoin]
|
|
1752
1774
|
limit: Optional[int] = None
|
|
@@ -1797,10 +1819,7 @@ class QueryDatasource(BaseModel):
|
|
|
1797
1819
|
c.address for c in values["input_concepts"]
|
|
1798
1820
|
)
|
|
1799
1821
|
seen = set()
|
|
1800
|
-
for k,
|
|
1801
|
-
# if val:
|
|
1802
|
-
# if len(val) != 1:
|
|
1803
|
-
# raise SyntaxError(f"source map {k} has multiple values {len(val)}")
|
|
1822
|
+
for k, _ in v.items():
|
|
1804
1823
|
seen.add(k)
|
|
1805
1824
|
for x in expected:
|
|
1806
1825
|
if x not in seen:
|
|
@@ -1922,18 +1941,18 @@ class QueryDatasource(BaseModel):
|
|
|
1922
1941
|
)
|
|
1923
1942
|
|
|
1924
1943
|
def get_alias(
|
|
1925
|
-
self,
|
|
1944
|
+
self,
|
|
1945
|
+
concept: Concept,
|
|
1946
|
+
use_raw_name: bool = False,
|
|
1947
|
+
force_alias: bool = False,
|
|
1948
|
+
source: str | None = None,
|
|
1926
1949
|
):
|
|
1927
|
-
# if we should use the raw datasource name to access
|
|
1928
|
-
use_raw_name = (
|
|
1929
|
-
True
|
|
1930
|
-
if (len(self.datasources) == 1 or use_raw_name) and not force_alias
|
|
1931
|
-
# if ((len(self.datasources) == 1 and isinstance(self.datasources[0], Datasource)) or use_raw_name) and not force_alias
|
|
1932
|
-
else False
|
|
1933
|
-
)
|
|
1934
1950
|
for x in self.datasources:
|
|
1935
1951
|
# query datasources should be referenced by their alias, always
|
|
1936
1952
|
force_alias = isinstance(x, QueryDatasource)
|
|
1953
|
+
use_raw_name = isinstance(x, Datasource) and not force_alias
|
|
1954
|
+
if source and x.identifier != source:
|
|
1955
|
+
continue
|
|
1937
1956
|
try:
|
|
1938
1957
|
return x.get_alias(
|
|
1939
1958
|
concept.with_grain(self.grain),
|
|
@@ -1967,8 +1986,7 @@ class Comment(BaseModel):
|
|
|
1967
1986
|
|
|
1968
1987
|
class CTE(BaseModel):
|
|
1969
1988
|
name: str
|
|
1970
|
-
source: "QueryDatasource"
|
|
1971
|
-
# output columns are what are selected/grouped by
|
|
1989
|
+
source: "QueryDatasource"
|
|
1972
1990
|
output_columns: List[Concept]
|
|
1973
1991
|
source_map: Dict[str, str | list[str]]
|
|
1974
1992
|
grain: Grain
|
|
@@ -1979,6 +1997,10 @@ class CTE(BaseModel):
|
|
|
1979
1997
|
condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
|
|
1980
1998
|
partial_concepts: List[Concept] = Field(default_factory=list)
|
|
1981
1999
|
join_derived_concepts: List[Concept] = Field(default_factory=list)
|
|
2000
|
+
order_by: Optional[OrderBy] = None
|
|
2001
|
+
limit: Optional[int] = None
|
|
2002
|
+
requires_nesting: bool = True
|
|
2003
|
+
base_name_override: Optional[str] = None
|
|
1982
2004
|
|
|
1983
2005
|
@computed_field # type: ignore
|
|
1984
2006
|
@property
|
|
@@ -1989,6 +2011,40 @@ class CTE(BaseModel):
|
|
|
1989
2011
|
def validate_output_columns(cls, v):
|
|
1990
2012
|
return unique(v, "address")
|
|
1991
2013
|
|
|
2014
|
+
def inline_parent_datasource(self, parent: CTE) -> bool:
|
|
2015
|
+
qds_being_inlined = parent.source
|
|
2016
|
+
ds_being_inlined = qds_being_inlined.datasources[0]
|
|
2017
|
+
if not isinstance(ds_being_inlined, Datasource):
|
|
2018
|
+
return False
|
|
2019
|
+
self.source.datasources = [
|
|
2020
|
+
ds_being_inlined,
|
|
2021
|
+
*[
|
|
2022
|
+
x
|
|
2023
|
+
for x in self.source.datasources
|
|
2024
|
+
if x.identifier != qds_being_inlined.identifier
|
|
2025
|
+
],
|
|
2026
|
+
]
|
|
2027
|
+
# need to identify this before updating joins
|
|
2028
|
+
if self.base_name == parent.name:
|
|
2029
|
+
self.base_name_override = ds_being_inlined.safe_location
|
|
2030
|
+
|
|
2031
|
+
for join in self.joins:
|
|
2032
|
+
if isinstance(join, InstantiatedUnnestJoin):
|
|
2033
|
+
continue
|
|
2034
|
+
if join.left_cte.name == parent.name:
|
|
2035
|
+
join.left_cte = ds_being_inlined
|
|
2036
|
+
if join.right_cte.name == parent.name:
|
|
2037
|
+
join.right_cte = ds_being_inlined
|
|
2038
|
+
for k, v in self.source_map.items():
|
|
2039
|
+
if isinstance(v, list):
|
|
2040
|
+
self.source_map[k] = [
|
|
2041
|
+
ds_being_inlined.name if x == parent.name else x for x in v
|
|
2042
|
+
]
|
|
2043
|
+
elif v == parent.name:
|
|
2044
|
+
self.source_map[k] = ds_being_inlined.name
|
|
2045
|
+
self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
|
|
2046
|
+
return True
|
|
2047
|
+
|
|
1992
2048
|
def __add__(self, other: "CTE"):
|
|
1993
2049
|
logger.info('Merging two copies of CTE "%s"', self.name)
|
|
1994
2050
|
if not self.grain == other.grain:
|
|
@@ -2031,16 +2087,25 @@ class CTE(BaseModel):
|
|
|
2031
2087
|
def relevant_base_ctes(self):
|
|
2032
2088
|
return self.parent_ctes
|
|
2033
2089
|
|
|
2090
|
+
@property
|
|
2091
|
+
def is_root_datasource(self) -> bool:
|
|
2092
|
+
return (
|
|
2093
|
+
len(self.source.datasources) == 1
|
|
2094
|
+
and isinstance(self.source.datasources[0], Datasource)
|
|
2095
|
+
and not self.source.datasources[0].name == CONSTANT_DATASET
|
|
2096
|
+
)
|
|
2097
|
+
|
|
2034
2098
|
@property
|
|
2035
2099
|
def base_name(self) -> str:
|
|
2100
|
+
if self.base_name_override:
|
|
2101
|
+
return self.base_name_override
|
|
2036
2102
|
# if this cte selects from a single datasource, select right from it
|
|
2037
2103
|
valid_joins: List[Join] = [
|
|
2038
2104
|
join for join in self.joins if isinstance(join, Join)
|
|
2039
2105
|
]
|
|
2040
|
-
if
|
|
2041
|
-
self.source.datasources[0], Datasource
|
|
2042
|
-
):
|
|
2106
|
+
if self.is_root_datasource:
|
|
2043
2107
|
return self.source.datasources[0].safe_location
|
|
2108
|
+
|
|
2044
2109
|
# if we have multiple joined CTEs, pick the base
|
|
2045
2110
|
# as the root
|
|
2046
2111
|
elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
|
|
@@ -2066,11 +2131,10 @@ class CTE(BaseModel):
|
|
|
2066
2131
|
|
|
2067
2132
|
@property
|
|
2068
2133
|
def base_alias(self) -> str:
|
|
2134
|
+
|
|
2135
|
+
if self.is_root_datasource:
|
|
2136
|
+
return self.source.datasources[0].identifier
|
|
2069
2137
|
relevant_joins = [j for j in self.joins if isinstance(j, Join)]
|
|
2070
|
-
if len(self.source.datasources) == 1 and isinstance(
|
|
2071
|
-
self.source.datasources[0], Datasource
|
|
2072
|
-
):
|
|
2073
|
-
return self.source.datasources[0].full_name.replace(".", "_")
|
|
2074
2138
|
if relevant_joins:
|
|
2075
2139
|
return relevant_joins[0].left_cte.name
|
|
2076
2140
|
elif self.relevant_base_ctes:
|
|
@@ -2079,12 +2143,16 @@ class CTE(BaseModel):
|
|
|
2079
2143
|
return self.parent_ctes[0].name
|
|
2080
2144
|
return self.name
|
|
2081
2145
|
|
|
2082
|
-
def get_alias(self, concept: Concept) -> str:
|
|
2146
|
+
def get_alias(self, concept: Concept, source: str | None = None) -> str:
|
|
2083
2147
|
for cte in self.parent_ctes:
|
|
2084
2148
|
if concept.address in [x.address for x in cte.output_columns]:
|
|
2149
|
+
if source and source != cte.name:
|
|
2150
|
+
continue
|
|
2085
2151
|
return concept.safe_address
|
|
2086
2152
|
try:
|
|
2087
|
-
source = self.source.get_alias(concept)
|
|
2153
|
+
source = self.source.get_alias(concept, source=source)
|
|
2154
|
+
if not source:
|
|
2155
|
+
raise ValueError("No source found")
|
|
2088
2156
|
return source
|
|
2089
2157
|
except ValueError as e:
|
|
2090
2158
|
return f"INVALID_ALIAS: {str(e)}"
|
|
@@ -2097,6 +2165,11 @@ class CTE(BaseModel):
|
|
|
2097
2165
|
and not self.group_to_grain
|
|
2098
2166
|
):
|
|
2099
2167
|
return False
|
|
2168
|
+
if (
|
|
2169
|
+
len(self.source.datasources) == 1
|
|
2170
|
+
and self.source.datasources[0].name == CONSTANT_DATASET
|
|
2171
|
+
):
|
|
2172
|
+
return False
|
|
2100
2173
|
return True
|
|
2101
2174
|
|
|
2102
2175
|
@property
|
|
@@ -2130,19 +2203,43 @@ class JoinKey(BaseModel):
|
|
|
2130
2203
|
|
|
2131
2204
|
|
|
2132
2205
|
class Join(BaseModel):
|
|
2133
|
-
left_cte: CTE
|
|
2134
|
-
right_cte: CTE
|
|
2206
|
+
left_cte: CTE | Datasource
|
|
2207
|
+
right_cte: CTE | Datasource
|
|
2135
2208
|
jointype: JoinType
|
|
2136
2209
|
joinkeys: List[JoinKey]
|
|
2137
2210
|
|
|
2211
|
+
@property
|
|
2212
|
+
def left_name(self) -> str:
|
|
2213
|
+
if isinstance(self.left_cte, Datasource):
|
|
2214
|
+
return self.left_cte.identifier
|
|
2215
|
+
return self.left_cte.name
|
|
2216
|
+
|
|
2217
|
+
@property
|
|
2218
|
+
def right_name(self) -> str:
|
|
2219
|
+
if isinstance(self.right_cte, Datasource):
|
|
2220
|
+
return self.right_cte.identifier
|
|
2221
|
+
return self.right_cte.name
|
|
2222
|
+
|
|
2223
|
+
@property
|
|
2224
|
+
def left_ref(self) -> str:
|
|
2225
|
+
if isinstance(self.left_cte, Datasource):
|
|
2226
|
+
return f"{self.left_cte.safe_location} as {self.left_cte.identifier}"
|
|
2227
|
+
return self.left_cte.name
|
|
2228
|
+
|
|
2229
|
+
@property
|
|
2230
|
+
def right_ref(self) -> str:
|
|
2231
|
+
if isinstance(self.right_cte, Datasource):
|
|
2232
|
+
return f"{self.right_cte.safe_location} as {self.right_cte.identifier}"
|
|
2233
|
+
return self.right_cte.name
|
|
2234
|
+
|
|
2138
2235
|
@property
|
|
2139
2236
|
def unique_id(self) -> str:
|
|
2140
|
-
return self.
|
|
2237
|
+
return self.left_name + self.right_name + self.jointype.value
|
|
2141
2238
|
|
|
2142
2239
|
def __str__(self):
|
|
2143
2240
|
return (
|
|
2144
|
-
f"{self.jointype.value} JOIN {self.
|
|
2145
|
-
f" {self.
|
|
2241
|
+
f"{self.jointype.value} JOIN {self.left_name} and"
|
|
2242
|
+
f" {self.right_name} on {','.join([str(k) for k in self.joinkeys])}"
|
|
2146
2243
|
)
|
|
2147
2244
|
|
|
2148
2245
|
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
SelectStatement,
|
|
4
|
+
PersistStatement,
|
|
5
|
+
Datasource,
|
|
6
|
+
MultiSelectStatement,
|
|
7
|
+
)
|
|
8
|
+
from trilogy.core.enums import PurposeLineage
|
|
9
|
+
from trilogy.constants import logger
|
|
10
|
+
from abc import ABC
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class OptimizationRule(ABC):
|
|
14
|
+
|
|
15
|
+
def optimize(self, cte: CTE) -> bool:
|
|
16
|
+
raise NotImplementedError
|
|
17
|
+
|
|
18
|
+
def log(self, message: str):
|
|
19
|
+
logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class InlineDatasource(OptimizationRule):
|
|
23
|
+
|
|
24
|
+
def optimize(self, cte: CTE) -> bool:
|
|
25
|
+
if not cte.parent_ctes:
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
optimized = False
|
|
29
|
+
self.log(
|
|
30
|
+
f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
|
|
31
|
+
)
|
|
32
|
+
to_inline: list[CTE] = []
|
|
33
|
+
for parent_cte in cte.parent_ctes:
|
|
34
|
+
if not parent_cte.is_root_datasource:
|
|
35
|
+
self.log(f"parent {parent_cte.name} is not root")
|
|
36
|
+
continue
|
|
37
|
+
if parent_cte.parent_ctes:
|
|
38
|
+
self.log(f"parent {parent_cte.name} has parents")
|
|
39
|
+
continue
|
|
40
|
+
raw_root = parent_cte.source.datasources[0]
|
|
41
|
+
if not isinstance(raw_root, Datasource):
|
|
42
|
+
self.log(f"parent {parent_cte.name} is not datasource")
|
|
43
|
+
continue
|
|
44
|
+
root: Datasource = raw_root
|
|
45
|
+
if not root.can_be_inlined:
|
|
46
|
+
self.log(f"parent {parent_cte.name} datasource is not inlineable")
|
|
47
|
+
continue
|
|
48
|
+
root_outputs = {x.address for x in root.output_concepts}
|
|
49
|
+
cte_outputs = {x.address for x in parent_cte.output_columns}
|
|
50
|
+
if not cte_outputs.issubset(root_outputs):
|
|
51
|
+
self.log(f"Not all {parent_cte.name} outputs are found on datasource")
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
to_inline.append(parent_cte)
|
|
55
|
+
|
|
56
|
+
for replaceable in to_inline:
|
|
57
|
+
self.log(f"Inlining parent {replaceable.name}")
|
|
58
|
+
cte.inline_parent_datasource(replaceable)
|
|
59
|
+
|
|
60
|
+
return optimized
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
REGISTERED_RULES: list[OptimizationRule] = [InlineDatasource()]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def filter_irrelevant_ctes(input: list[CTE], root_cte: CTE):
|
|
67
|
+
relevant_ctes = set()
|
|
68
|
+
|
|
69
|
+
def recurse(cte: CTE):
|
|
70
|
+
relevant_ctes.add(cte.name)
|
|
71
|
+
for cte in cte.parent_ctes:
|
|
72
|
+
recurse(cte)
|
|
73
|
+
|
|
74
|
+
recurse(root_cte)
|
|
75
|
+
return [cte for cte in input if cte.name in relevant_ctes]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def is_direct_return_eligible(
|
|
79
|
+
cte: CTE, select: SelectStatement | PersistStatement | MultiSelectStatement
|
|
80
|
+
) -> bool:
|
|
81
|
+
if isinstance(select, (PersistStatement, MultiSelectStatement)):
|
|
82
|
+
return False
|
|
83
|
+
derived_concepts = [
|
|
84
|
+
c for c in cte.source.output_concepts if c not in cte.source.input_concepts
|
|
85
|
+
]
|
|
86
|
+
eligible = True
|
|
87
|
+
conditions = (
|
|
88
|
+
set(x.address for x in select.where_clause.concept_arguments)
|
|
89
|
+
if select.where_clause
|
|
90
|
+
else set()
|
|
91
|
+
)
|
|
92
|
+
if conditions and select.limit:
|
|
93
|
+
return False
|
|
94
|
+
for x in derived_concepts:
|
|
95
|
+
if x.derivation == PurposeLineage.WINDOW:
|
|
96
|
+
return False
|
|
97
|
+
if x.derivation == PurposeLineage.AGGREGATE:
|
|
98
|
+
if x.address in conditions:
|
|
99
|
+
return False
|
|
100
|
+
logger.info(
|
|
101
|
+
f"Upleveling output select to final CTE with derived_concepts {[x.address for x in derived_concepts]}"
|
|
102
|
+
)
|
|
103
|
+
return eligible
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def sort_select_output(cte: CTE, query: SelectStatement | MultiSelectStatement):
|
|
107
|
+
hidden_addresses = [c.address for c in query.hidden_components]
|
|
108
|
+
output_addresses = [
|
|
109
|
+
c.address for c in query.output_components if c.address not in hidden_addresses
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
mapping = {x.address: x for x in cte.output_columns}
|
|
113
|
+
|
|
114
|
+
new_output = []
|
|
115
|
+
for x in output_addresses:
|
|
116
|
+
new_output.append(mapping[x])
|
|
117
|
+
cte.output_columns = new_output
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def optimize_ctes(
|
|
121
|
+
input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
|
|
122
|
+
):
|
|
123
|
+
complete = False
|
|
124
|
+
|
|
125
|
+
while not complete:
|
|
126
|
+
actions_taken = False
|
|
127
|
+
for rule in REGISTERED_RULES:
|
|
128
|
+
for cte in input:
|
|
129
|
+
actions_taken = rule.optimize(cte)
|
|
130
|
+
complete = not actions_taken
|
|
131
|
+
|
|
132
|
+
if is_direct_return_eligible(root_cte, select):
|
|
133
|
+
root_cte.order_by = select.order_by
|
|
134
|
+
root_cte.limit = select.limit
|
|
135
|
+
root_cte.condition = (
|
|
136
|
+
select.where_clause.conditional if select.where_clause else None
|
|
137
|
+
)
|
|
138
|
+
root_cte.requires_nesting = False
|
|
139
|
+
sort_select_output(cte, select)
|
|
140
|
+
|
|
141
|
+
return filter_irrelevant_ctes(input, root_cte)
|
|
@@ -45,7 +45,7 @@ def concept_list_to_grain(
|
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
def resolve_concept_map(
|
|
48
|
-
inputs: List[QueryDatasource],
|
|
48
|
+
inputs: List[QueryDatasource | Datasource],
|
|
49
49
|
targets: List[Concept],
|
|
50
50
|
inherited_inputs: List[Concept],
|
|
51
51
|
full_joins: List[Concept] | None = None,
|
|
@@ -156,7 +156,9 @@ class StrategyNode:
|
|
|
156
156
|
return f"{self.__class__.__name__}<{contents}>"
|
|
157
157
|
|
|
158
158
|
def _resolve(self) -> QueryDatasource:
|
|
159
|
-
parent_sources
|
|
159
|
+
parent_sources: List[QueryDatasource | Datasource] = [
|
|
160
|
+
p.resolve() for p in self.parents
|
|
161
|
+
]
|
|
160
162
|
|
|
161
163
|
# if conditional:
|
|
162
164
|
# for condition in conditions[1:]:
|
|
@@ -4,6 +4,7 @@ from trilogy.constants import logger
|
|
|
4
4
|
from trilogy.core.models import (
|
|
5
5
|
Grain,
|
|
6
6
|
QueryDatasource,
|
|
7
|
+
Datasource,
|
|
7
8
|
SourceType,
|
|
8
9
|
Concept,
|
|
9
10
|
Environment,
|
|
@@ -45,7 +46,9 @@ class GroupNode(StrategyNode):
|
|
|
45
46
|
)
|
|
46
47
|
|
|
47
48
|
def _resolve(self) -> QueryDatasource:
|
|
48
|
-
parent_sources:
|
|
49
|
+
parent_sources: List[QueryDatasource | Datasource] = [
|
|
50
|
+
p.resolve() for p in self.parents
|
|
51
|
+
]
|
|
49
52
|
|
|
50
53
|
grain = concept_list_to_grain(self.output_concepts, [])
|
|
51
54
|
comp_grain = Grain()
|
|
@@ -66,7 +69,7 @@ class GroupNode(StrategyNode):
|
|
|
66
69
|
len(parent_sources) == 1
|
|
67
70
|
and LooseConceptList(concepts=parent_sources[0].output_concepts)
|
|
68
71
|
== self.output_lcl
|
|
69
|
-
):
|
|
72
|
+
) and isinstance(parent_sources[0], QueryDatasource):
|
|
70
73
|
logger.info(
|
|
71
74
|
f"{self.logging_prefix}{LOGGER_PREFIX} No group by required, returning parent node"
|
|
72
75
|
)
|