pytrilogy 0.0.1.105__py3-none-any.whl → 0.0.1.107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/METADATA +79 -1
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/RECORD +26 -25
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +3 -2
- trilogy/constants.py +1 -0
- trilogy/core/models.py +128 -31
- trilogy/core/optimization.py +141 -0
- trilogy/core/processing/nodes/base_node.py +4 -2
- trilogy/core/processing/nodes/group_node.py +5 -2
- trilogy/core/processing/nodes/merge_node.py +13 -8
- trilogy/core/query_processor.py +5 -2
- trilogy/dialect/base.py +73 -51
- trilogy/dialect/bigquery.py +6 -4
- trilogy/dialect/common.py +8 -6
- trilogy/dialect/config.py +69 -1
- trilogy/dialect/duckdb.py +5 -4
- trilogy/dialect/enums.py +40 -19
- trilogy/dialect/postgres.py +4 -2
- trilogy/dialect/presto.py +6 -4
- trilogy/dialect/snowflake.py +6 -4
- trilogy/dialect/sql_server.py +4 -1
- trilogy/executor.py +18 -5
- trilogy/parsing/parse_engine.py +1 -1
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.1.105.dist-info → pytrilogy-0.0.1.107.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,7 @@ from trilogy.core.models import (
|
|
|
7
7
|
Grain,
|
|
8
8
|
JoinType,
|
|
9
9
|
QueryDatasource,
|
|
10
|
+
Datasource,
|
|
10
11
|
SourceType,
|
|
11
12
|
Concept,
|
|
12
13
|
UnnestJoin,
|
|
@@ -24,8 +25,8 @@ LOGGER_PREFIX = "[CONCEPT DETAIL - MERGE NODE]"
|
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
def deduplicate_nodes(
|
|
27
|
-
merged: dict[str, QueryDatasource], logging_prefix: str
|
|
28
|
-
) -> tuple[bool, dict[str, QueryDatasource], set[str]]:
|
|
28
|
+
merged: dict[str, QueryDatasource | Datasource], logging_prefix: str
|
|
29
|
+
) -> tuple[bool, dict[str, QueryDatasource | Datasource], set[str]]:
|
|
29
30
|
duplicates = False
|
|
30
31
|
removed: set[str] = set()
|
|
31
32
|
set_map: dict[str, set[str]] = {}
|
|
@@ -65,9 +66,9 @@ def deduplicate_nodes(
|
|
|
65
66
|
|
|
66
67
|
def deduplicate_nodes_and_joins(
|
|
67
68
|
joins: List[NodeJoin] | None,
|
|
68
|
-
merged: dict[str, QueryDatasource],
|
|
69
|
+
merged: dict[str, QueryDatasource | Datasource],
|
|
69
70
|
logging_prefix: str,
|
|
70
|
-
) -> Tuple[List[NodeJoin] | None, dict[str, QueryDatasource]]:
|
|
71
|
+
) -> Tuple[List[NodeJoin] | None, dict[str, QueryDatasource | Datasource]]:
|
|
71
72
|
# it's possible that we have more sources than we need
|
|
72
73
|
duplicates = True
|
|
73
74
|
while duplicates:
|
|
@@ -211,8 +212,10 @@ class MergeNode(StrategyNode):
|
|
|
211
212
|
return joins
|
|
212
213
|
|
|
213
214
|
def _resolve(self) -> QueryDatasource:
|
|
214
|
-
parent_sources
|
|
215
|
-
|
|
215
|
+
parent_sources: List[QueryDatasource | Datasource] = [
|
|
216
|
+
p.resolve() for p in self.parents
|
|
217
|
+
]
|
|
218
|
+
merged: dict[str, QueryDatasource | Datasource] = {}
|
|
216
219
|
final_joins = self.node_joins
|
|
217
220
|
for source in parent_sources:
|
|
218
221
|
if source.full_name in merged:
|
|
@@ -228,14 +231,15 @@ class MergeNode(StrategyNode):
|
|
|
228
231
|
final_joins, merged, self.logging_prefix
|
|
229
232
|
)
|
|
230
233
|
# early exit if we can just return the parent
|
|
231
|
-
final_datasets: List[QueryDatasource] = list(merged.values())
|
|
234
|
+
final_datasets: List[QueryDatasource | Datasource] = list(merged.values())
|
|
232
235
|
|
|
233
236
|
if len(merged.keys()) == 1:
|
|
234
|
-
final: QueryDatasource = list(merged.values())[0]
|
|
237
|
+
final: QueryDatasource | Datasource = list(merged.values())[0]
|
|
235
238
|
if (
|
|
236
239
|
set([c.address for c in final.output_concepts])
|
|
237
240
|
== set([c.address for c in self.output_concepts])
|
|
238
241
|
and not self.conditions
|
|
242
|
+
and isinstance(final, QueryDatasource)
|
|
239
243
|
):
|
|
240
244
|
logger.info(
|
|
241
245
|
f"{self.logging_prefix}{LOGGER_PREFIX} Merge node has only one parent with the same"
|
|
@@ -255,6 +259,7 @@ class MergeNode(StrategyNode):
|
|
|
255
259
|
if (
|
|
256
260
|
all([c.address in output_set for c in self.all_concepts])
|
|
257
261
|
and not self.conditions
|
|
262
|
+
and isinstance(dataset, QueryDatasource)
|
|
258
263
|
):
|
|
259
264
|
logger.info(
|
|
260
265
|
f"{self.logging_prefix}{LOGGER_PREFIX} Merge node not required as parent node {dataset.source_type}"
|
trilogy/core/query_processor.py
CHANGED
|
@@ -29,6 +29,7 @@ from trilogy.hooks.base_hook import BaseHook
|
|
|
29
29
|
from trilogy.constants import logger
|
|
30
30
|
from random import shuffle
|
|
31
31
|
from trilogy.core.ergonomics import CTE_NAMES
|
|
32
|
+
from trilogy.core.optimization import optimize_ctes
|
|
32
33
|
from math import ceil
|
|
33
34
|
|
|
34
35
|
LOGGER_PREFIX = "[QUERY BUILD]"
|
|
@@ -186,7 +187,7 @@ def datasource_to_ctes(
|
|
|
186
187
|
source_map = {k: "" for k in query_datasource.source_map}
|
|
187
188
|
else:
|
|
188
189
|
source_map = {
|
|
189
|
-
k: "" if not v else source.
|
|
190
|
+
k: "" if not v else source.identifier
|
|
190
191
|
for k, v in query_datasource.source_map.items()
|
|
191
192
|
}
|
|
192
193
|
human_id = generate_cte_name(query_datasource.full_name, name_map)
|
|
@@ -315,7 +316,9 @@ def process_query(
|
|
|
315
316
|
seen[cte.name] = seen[cte.name] + cte
|
|
316
317
|
for cte in raw_ctes:
|
|
317
318
|
cte.parent_ctes = [seen[x.name] for x in cte.parent_ctes]
|
|
318
|
-
|
|
319
|
+
deduped_ctes: List[CTE] = list(seen.values())
|
|
320
|
+
|
|
321
|
+
final_ctes = optimize_ctes(deduped_ctes, root_cte, statement)
|
|
319
322
|
|
|
320
323
|
return ProcessedQuery(
|
|
321
324
|
order_by=statement.order_by,
|
trilogy/dialect/base.py
CHANGED
|
@@ -170,6 +170,9 @@ GENERIC_SQL_TEMPLATE = Template(
|
|
|
170
170
|
"""{%- if ctes %}
|
|
171
171
|
WITH {% for cte in ctes %}
|
|
172
172
|
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
173
|
+
{%- if full_select -%}
|
|
174
|
+
{{full_select}}
|
|
175
|
+
{%- else -%}
|
|
173
176
|
SELECT
|
|
174
177
|
{%- if limit is not none %}
|
|
175
178
|
TOP {{ limit }}{% endif %}
|
|
@@ -184,8 +187,8 @@ TOP {{ limit }}{% endif %}
|
|
|
184
187
|
\t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
185
188
|
{%- if order_by %}
|
|
186
189
|
ORDER BY {% for order in order_by %}
|
|
187
|
-
{{ order }}{% if not loop.last %},{% endif %}
|
|
188
|
-
{%
|
|
190
|
+
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
191
|
+
{% endif %}{% endif %}
|
|
189
192
|
"""
|
|
190
193
|
)
|
|
191
194
|
|
|
@@ -218,15 +221,19 @@ def safe_quote(string: str, quote_char: str):
|
|
|
218
221
|
return ".".join([f"{quote_char}{string}{quote_char}" for string in components])
|
|
219
222
|
|
|
220
223
|
|
|
221
|
-
def safe_get_cte_value(coalesce, cte: CTE,
|
|
224
|
+
def safe_get_cte_value(coalesce, cte: CTE, c: Concept, quote_char: str):
|
|
225
|
+
address = c.address
|
|
222
226
|
raw = cte.source_map.get(address, None)
|
|
227
|
+
|
|
223
228
|
if not raw:
|
|
224
229
|
return INVALID_REFERENCE_STRING("Missing source reference")
|
|
225
230
|
if isinstance(raw, str):
|
|
226
|
-
|
|
231
|
+
rendered = cte.get_alias(c, raw)
|
|
232
|
+
return f"{raw}.{safe_quote(rendered, quote_char)}"
|
|
227
233
|
if isinstance(raw, list) and len(raw) == 1:
|
|
228
|
-
|
|
229
|
-
|
|
234
|
+
rendered = cte.get_alias(c, raw[0])
|
|
235
|
+
return f"{raw[0]}.{safe_quote(rendered, quote_char)}"
|
|
236
|
+
return coalesce([f"{x}.{safe_quote(cte.get_alias(c, x), quote_char)}" for x in raw])
|
|
230
237
|
|
|
231
238
|
|
|
232
239
|
class BaseDialect:
|
|
@@ -238,21 +245,13 @@ class BaseDialect:
|
|
|
238
245
|
DATATYPE_MAP = DATATYPE_MAP
|
|
239
246
|
UNNEST_MODE = UnnestMode.CROSS_APPLY
|
|
240
247
|
|
|
241
|
-
def render_order_item(
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
all_outputs = set()
|
|
249
|
-
for cte in ctes:
|
|
250
|
-
all_outputs.update([a.address for a in cte.output_columns])
|
|
251
|
-
raise ValueError(
|
|
252
|
-
f"No source found for concept {order_item.expr}, have {all_outputs}"
|
|
253
|
-
)
|
|
254
|
-
selected = matched_ctes[0]
|
|
255
|
-
return f"{selected.name}.{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
|
|
248
|
+
def render_order_item(
|
|
249
|
+
self, order_item: OrderItem, cte: CTE, final: bool = False
|
|
250
|
+
) -> str:
|
|
251
|
+
if final:
|
|
252
|
+
return f"{cte.name}.{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
|
|
253
|
+
|
|
254
|
+
return f"{self.render_concept_sql(order_item.expr, cte=cte, alias=False)} {order_item.order.value}"
|
|
256
255
|
|
|
257
256
|
def render_concept_sql(self, c: Concept, cte: CTE, alias: bool = True) -> str:
|
|
258
257
|
# only recurse while it's in sources of the current cte
|
|
@@ -310,13 +309,14 @@ class BaseDialect:
|
|
|
310
309
|
logger.debug(
|
|
311
310
|
f"{LOGGER_PREFIX} [{c.address}] Rendering basic lookup from {cte.source_map.get(c.address, INVALID_REFERENCE_STRING('Missing source reference'))}"
|
|
312
311
|
)
|
|
312
|
+
|
|
313
313
|
raw_content = cte.get_alias(c)
|
|
314
314
|
if isinstance(raw_content, RawColumnExpr):
|
|
315
315
|
rval = raw_content.text
|
|
316
316
|
elif isinstance(raw_content, Function):
|
|
317
317
|
rval = self.render_expr(raw_content, cte=cte)
|
|
318
318
|
else:
|
|
319
|
-
rval = f"{safe_get_cte_value(self.FUNCTION_MAP[FunctionType.COALESCE], cte, c
|
|
319
|
+
rval = f"{safe_get_cte_value(self.FUNCTION_MAP[FunctionType.COALESCE], cte, c, self.QUOTE_CHARACTER)}"
|
|
320
320
|
if alias:
|
|
321
321
|
return (
|
|
322
322
|
f"{rval} as"
|
|
@@ -456,7 +456,7 @@ class BaseDialect:
|
|
|
456
456
|
else None
|
|
457
457
|
),
|
|
458
458
|
grain=cte.grain,
|
|
459
|
-
limit=
|
|
459
|
+
limit=cte.limit,
|
|
460
460
|
# some joins may not need to be rendered
|
|
461
461
|
joins=[
|
|
462
462
|
j
|
|
@@ -475,9 +475,11 @@ class BaseDialect:
|
|
|
475
475
|
where=(
|
|
476
476
|
self.render_expr(cte.condition, cte) if cte.condition else None
|
|
477
477
|
), # source_map=cte_output_map)
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
478
|
+
order_by=(
|
|
479
|
+
[self.render_order_item(i, cte) for i in cte.order_by.items]
|
|
480
|
+
if cte.order_by
|
|
481
|
+
else None
|
|
482
|
+
),
|
|
481
483
|
group_by=(
|
|
482
484
|
list(
|
|
483
485
|
set(
|
|
@@ -522,7 +524,8 @@ class BaseDialect:
|
|
|
522
524
|
)
|
|
523
525
|
|
|
524
526
|
def generate_ctes(
|
|
525
|
-
self,
|
|
527
|
+
self,
|
|
528
|
+
query: ProcessedQuery,
|
|
526
529
|
):
|
|
527
530
|
return [self.render_cte(cte) for cte in query.ctes]
|
|
528
531
|
|
|
@@ -649,35 +652,54 @@ class BaseDialect:
|
|
|
649
652
|
" filtered concept instead."
|
|
650
653
|
)
|
|
651
654
|
|
|
652
|
-
compiled_ctes = self.generate_ctes(query
|
|
655
|
+
compiled_ctes = self.generate_ctes(query)
|
|
653
656
|
|
|
654
657
|
# restort selections by the order they were written in
|
|
655
658
|
sorted_select: List[str] = []
|
|
656
659
|
for output_c in output_addresses:
|
|
657
660
|
sorted_select.append(select_columns[output_c])
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
661
|
+
if not query.base.requires_nesting:
|
|
662
|
+
final = self.SQL_TEMPLATE.render(
|
|
663
|
+
output=(
|
|
664
|
+
query.output_to
|
|
665
|
+
if isinstance(query, ProcessedQueryPersist)
|
|
666
|
+
else None
|
|
667
|
+
),
|
|
668
|
+
full_select=compiled_ctes[-1].statement,
|
|
669
|
+
ctes=compiled_ctes[:-1],
|
|
670
|
+
)
|
|
671
|
+
else:
|
|
672
|
+
final = self.SQL_TEMPLATE.render(
|
|
673
|
+
output=(
|
|
674
|
+
query.output_to
|
|
675
|
+
if isinstance(query, ProcessedQueryPersist)
|
|
676
|
+
else None
|
|
677
|
+
),
|
|
678
|
+
select_columns=sorted_select,
|
|
679
|
+
base=query.base.name,
|
|
680
|
+
joins=[
|
|
681
|
+
render_join(join, self.QUOTE_CHARACTER, None)
|
|
682
|
+
for join in query.joins
|
|
683
|
+
],
|
|
684
|
+
ctes=compiled_ctes,
|
|
685
|
+
limit=query.limit,
|
|
686
|
+
# move up to CTEs
|
|
687
|
+
where=(
|
|
688
|
+
self.render_expr(
|
|
689
|
+
query.where_clause.conditional, cte_map=cte_output_map
|
|
690
|
+
)
|
|
691
|
+
if query.where_clause and output_where
|
|
692
|
+
else None
|
|
693
|
+
),
|
|
694
|
+
order_by=(
|
|
695
|
+
[
|
|
696
|
+
self.render_order_item(i, query.base, final=True)
|
|
697
|
+
for i in query.order_by.items
|
|
698
|
+
]
|
|
699
|
+
if query.order_by
|
|
700
|
+
else None
|
|
701
|
+
),
|
|
702
|
+
)
|
|
681
703
|
|
|
682
704
|
if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
|
|
683
705
|
raise ValueError(
|
trilogy/dialect/bigquery.py
CHANGED
|
@@ -43,8 +43,11 @@ CREATE OR REPLACE TABLE {{ output.address.location }} AS
|
|
|
43
43
|
{% endif %}{%- if ctes %}
|
|
44
44
|
WITH {% for cte in ctes %}
|
|
45
45
|
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
46
|
-
|
|
46
|
+
{%- if full_select -%}
|
|
47
|
+
{{full_select}}
|
|
48
|
+
{% else -%}
|
|
47
49
|
|
|
50
|
+
SELECT
|
|
48
51
|
{%- for select in select_columns %}
|
|
49
52
|
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
50
53
|
{% if base %}FROM
|
|
@@ -59,10 +62,9 @@ SELECT
|
|
|
59
62
|
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
60
63
|
{%- if order_by %}
|
|
61
64
|
ORDER BY {% for order in order_by %}
|
|
62
|
-
{{ order }}{% if not loop.last %},{% endif %}
|
|
63
|
-
{% endfor %}{% endif %}
|
|
65
|
+
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
64
66
|
{%- if limit is not none %}
|
|
65
|
-
LIMIT {{ limit }}{% endif %}
|
|
67
|
+
LIMIT {{ limit }}{% endif %}{% endif %}
|
|
66
68
|
"""
|
|
67
69
|
)
|
|
68
70
|
MAX_IDENTIFIER_LENGTH = 50
|
trilogy/dialect/common.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from trilogy.core.models import Join, InstantiatedUnnestJoin, CTE, Concept
|
|
1
|
+
from trilogy.core.models import Join, InstantiatedUnnestJoin, CTE, Concept, Datasource
|
|
2
2
|
from trilogy.core.enums import UnnestMode, Modifier
|
|
3
3
|
from typing import Optional, Callable
|
|
4
4
|
|
|
@@ -21,18 +21,20 @@ def render_join(
|
|
|
21
21
|
if unnest_mode == UnnestMode.DIRECT:
|
|
22
22
|
return None
|
|
23
23
|
if not render_func:
|
|
24
|
-
raise ValueError("must provide a render
|
|
24
|
+
raise ValueError("must provide a render function to build an unnest joins")
|
|
25
25
|
if not cte:
|
|
26
26
|
raise ValueError("must provide a cte to build an unnest joins")
|
|
27
27
|
if unnest_mode == UnnestMode.CROSS_JOIN:
|
|
28
28
|
return f"CROSS JOIN {render_func(join.concept, cte, False)} as {quote_character}{join.concept.safe_address}{quote_character}"
|
|
29
29
|
|
|
30
30
|
return f"FULL JOIN {render_func(join.concept, cte, False)} as unnest_wrapper({quote_character}{join.concept.safe_address}{quote_character})"
|
|
31
|
-
|
|
31
|
+
left_name = join.left_name
|
|
32
|
+
right_name = join.right_name
|
|
33
|
+
right_base = join.right_ref
|
|
32
34
|
base_joinkeys = [
|
|
33
35
|
null_wrapper(
|
|
34
|
-
f"{
|
|
35
|
-
f"{
|
|
36
|
+
f"{left_name}.{quote_character}{join.left_cte.get_alias(key.concept) if isinstance(join.left_cte, Datasource) else key.concept.safe_address}{quote_character}",
|
|
37
|
+
f"{right_name}.{quote_character}{join.right_cte.get_alias(key.concept) if isinstance(join.right_cte, Datasource) else key.concept.safe_address}{quote_character}",
|
|
36
38
|
key.concept,
|
|
37
39
|
)
|
|
38
40
|
for key in join.joinkeys
|
|
@@ -40,4 +42,4 @@ def render_join(
|
|
|
40
42
|
if not base_joinkeys:
|
|
41
43
|
base_joinkeys = ["1=1"]
|
|
42
44
|
joinkeys = " AND ".join(base_joinkeys)
|
|
43
|
-
return f"{join.jointype.value.upper()} JOIN {
|
|
45
|
+
return f"{join.jointype.value.upper()} JOIN {right_base} on {joinkeys}"
|
trilogy/dialect/config.py
CHANGED
|
@@ -1,5 +1,27 @@
|
|
|
1
1
|
class DialectConfig:
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
def __init__(self):
|
|
4
|
+
pass
|
|
5
|
+
|
|
6
|
+
def connection_string(self) -> str:
|
|
7
|
+
raise NotImplementedError
|
|
8
|
+
|
|
9
|
+
@property
|
|
10
|
+
def connect_args(self) -> dict:
|
|
11
|
+
return {}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BigQueryConfig(DialectConfig):
|
|
15
|
+
def __init__(self, project: str, client):
|
|
16
|
+
self.project = project
|
|
17
|
+
self.client = client
|
|
18
|
+
|
|
19
|
+
def connection_string(self) -> str:
|
|
20
|
+
return f"bigquery://{self.project}?user_supplied_client=True"
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def connect_args(self) -> dict:
|
|
24
|
+
return {"client": self.client}
|
|
3
25
|
|
|
4
26
|
|
|
5
27
|
class DuckDBConfig(DialectConfig):
|
|
@@ -53,3 +75,49 @@ class SnowflakeConfig(DialectConfig):
|
|
|
53
75
|
|
|
54
76
|
def connection_string(self) -> str:
|
|
55
77
|
return f"snowflake://{self.username}:{self.password}@{self.account}"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class PrestoConfig(DialectConfig):
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
host: str,
|
|
84
|
+
port: int,
|
|
85
|
+
username: str,
|
|
86
|
+
password: str,
|
|
87
|
+
catalog: str,
|
|
88
|
+
schema: str | None = None,
|
|
89
|
+
):
|
|
90
|
+
self.host = host
|
|
91
|
+
self.port = port
|
|
92
|
+
self.username = username
|
|
93
|
+
self.password = password
|
|
94
|
+
self.catalog = catalog
|
|
95
|
+
self.schema = schema
|
|
96
|
+
|
|
97
|
+
def connection_string(self) -> str:
|
|
98
|
+
if self.schema:
|
|
99
|
+
return f"presto://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}/{self.schema}"
|
|
100
|
+
return f"presto://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class TrinoConfig(DialectConfig):
|
|
104
|
+
def __init__(
|
|
105
|
+
self,
|
|
106
|
+
host: str,
|
|
107
|
+
port: int,
|
|
108
|
+
username: str,
|
|
109
|
+
password: str,
|
|
110
|
+
catalog: str,
|
|
111
|
+
schema: str | None = None,
|
|
112
|
+
):
|
|
113
|
+
self.host = host
|
|
114
|
+
self.port = port
|
|
115
|
+
self.username = username
|
|
116
|
+
self.password = password
|
|
117
|
+
self.catalog = catalog
|
|
118
|
+
self.schema = schema
|
|
119
|
+
|
|
120
|
+
def connection_string(self) -> str:
|
|
121
|
+
if self.schema:
|
|
122
|
+
return f"trino://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}/{self.schema}"
|
|
123
|
+
return f"trino://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}"
|
trilogy/dialect/duckdb.py
CHANGED
|
@@ -47,8 +47,10 @@ CREATE OR REPLACE TABLE {{ output.address.location }} AS
|
|
|
47
47
|
{% endif %}{%- if ctes %}
|
|
48
48
|
WITH {% for cte in ctes %}
|
|
49
49
|
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
50
|
-
|
|
50
|
+
{% if full_select -%}{{full_select}}
|
|
51
|
+
{% else -%}
|
|
51
52
|
|
|
53
|
+
SELECT
|
|
52
54
|
{%- for select in select_columns %}
|
|
53
55
|
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
54
56
|
{% if base %}FROM
|
|
@@ -63,10 +65,9 @@ GROUP BY {% for group in group_by %}
|
|
|
63
65
|
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
64
66
|
{%- if order_by %}
|
|
65
67
|
ORDER BY {% for order in order_by %}
|
|
66
|
-
{{ order }}{% if not loop.last %},{% endif %}
|
|
67
|
-
{% endfor %}{% endif %}
|
|
68
|
+
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
68
69
|
{%- if limit is not none %}
|
|
69
|
-
LIMIT ({{ limit }}){% endif %}
|
|
70
|
+
LIMIT ({{ limit }}){% endif %}{% endif %}
|
|
70
71
|
"""
|
|
71
72
|
)
|
|
72
73
|
|
trilogy/dialect/enums.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import List, TYPE_CHECKING, Optional
|
|
2
|
+
from typing import List, TYPE_CHECKING, Optional, Callable
|
|
3
3
|
|
|
4
4
|
if TYPE_CHECKING:
|
|
5
5
|
from trilogy.hooks.base_hook import BaseHook
|
|
@@ -9,6 +9,20 @@ from trilogy.dialect.config import DialectConfig
|
|
|
9
9
|
from trilogy.constants import logger
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
def default_factory(conf: DialectConfig, config_type):
|
|
13
|
+
from sqlalchemy import create_engine
|
|
14
|
+
|
|
15
|
+
if not isinstance(conf, config_type):
|
|
16
|
+
raise TypeError(
|
|
17
|
+
f"Invalid dialect configuration for type {type(config_type).__name__}"
|
|
18
|
+
)
|
|
19
|
+
if conf.connect_args:
|
|
20
|
+
return create_engine(
|
|
21
|
+
conf.connection_string(), future=True, connect_args=conf.connect_args
|
|
22
|
+
)
|
|
23
|
+
return create_engine(conf.connection_string(), future=True)
|
|
24
|
+
|
|
25
|
+
|
|
12
26
|
class Dialects(Enum):
|
|
13
27
|
BIGQUERY = "bigquery"
|
|
14
28
|
SQL_SERVER = "sql_server"
|
|
@@ -24,38 +38,32 @@ class Dialects(Enum):
|
|
|
24
38
|
return cls.DUCK_DB
|
|
25
39
|
return super()._missing_(value)
|
|
26
40
|
|
|
27
|
-
def default_engine(self, conf=None):
|
|
41
|
+
def default_engine(self, conf=None, _engine_factory: Callable = default_factory):
|
|
28
42
|
if self == Dialects.BIGQUERY:
|
|
29
|
-
from sqlalchemy import create_engine
|
|
30
43
|
from google.auth import default
|
|
31
44
|
from google.cloud import bigquery
|
|
45
|
+
from trilogy.dialect.config import BigQueryConfig
|
|
32
46
|
|
|
33
47
|
credentials, project = default()
|
|
34
48
|
client = bigquery.Client(credentials=credentials, project=project)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
49
|
+
conf = conf or BigQueryConfig(project=project, client=client)
|
|
50
|
+
return _engine_factory(
|
|
51
|
+
conf,
|
|
52
|
+
BigQueryConfig,
|
|
38
53
|
)
|
|
39
54
|
elif self == Dialects.SQL_SERVER:
|
|
40
|
-
from sqlalchemy import create_engine
|
|
41
55
|
|
|
42
56
|
raise NotImplementedError()
|
|
43
57
|
elif self == Dialects.DUCK_DB:
|
|
44
|
-
from sqlalchemy import create_engine
|
|
45
58
|
from trilogy.dialect.config import DuckDBConfig
|
|
46
59
|
|
|
47
60
|
if not conf:
|
|
48
61
|
conf = DuckDBConfig()
|
|
49
|
-
|
|
50
|
-
raise TypeError("Invalid dialect configuration for type duck_db")
|
|
51
|
-
return create_engine(conf.connection_string(), future=True)
|
|
62
|
+
return _engine_factory(conf, DuckDBConfig)
|
|
52
63
|
elif self == Dialects.SNOWFLAKE:
|
|
53
|
-
from sqlalchemy import create_engine
|
|
54
64
|
from trilogy.dialect.config import SnowflakeConfig
|
|
55
65
|
|
|
56
|
-
|
|
57
|
-
raise TypeError("Invalid dialect configuration for type snowflake")
|
|
58
|
-
return create_engine(conf.connection_string(), future=True)
|
|
66
|
+
return _engine_factory(conf, SnowflakeConfig)
|
|
59
67
|
elif self == Dialects.POSTGRES:
|
|
60
68
|
logger.warn(
|
|
61
69
|
"WARN: Using experimental postgres dialect. Most functionality will not work."
|
|
@@ -67,13 +75,17 @@ class Dialects(Enum):
|
|
|
67
75
|
raise ImportError(
|
|
68
76
|
"postgres driver not installed. python -m pip install pypreql[postgres]"
|
|
69
77
|
)
|
|
70
|
-
from sqlalchemy import create_engine
|
|
71
78
|
from trilogy.dialect.config import PostgresConfig
|
|
72
79
|
|
|
73
|
-
|
|
74
|
-
|
|
80
|
+
return _engine_factory(conf, PostgresConfig)
|
|
81
|
+
elif self == Dialects.PRESTO:
|
|
82
|
+
from trilogy.dialect.config import PrestoConfig
|
|
83
|
+
|
|
84
|
+
return _engine_factory(conf, PrestoConfig)
|
|
85
|
+
elif self == Dialects.TRINO:
|
|
86
|
+
from trilogy.dialect.config import TrinoConfig
|
|
75
87
|
|
|
76
|
-
return
|
|
88
|
+
return _engine_factory(conf, TrinoConfig)
|
|
77
89
|
else:
|
|
78
90
|
raise ValueError(
|
|
79
91
|
f"Unsupported dialect {self} for default engine creation; create one explicitly."
|
|
@@ -84,9 +96,18 @@ class Dialects(Enum):
|
|
|
84
96
|
environment: Optional["Environment"] = None,
|
|
85
97
|
hooks: List["BaseHook"] | None = None,
|
|
86
98
|
conf: DialectConfig | None = None,
|
|
99
|
+
_engine_factory: Callable | None = None,
|
|
87
100
|
) -> "Executor":
|
|
88
101
|
from trilogy import Executor, Environment
|
|
89
102
|
|
|
103
|
+
if _engine_factory is not None:
|
|
104
|
+
return Executor(
|
|
105
|
+
engine=self.default_engine(conf=conf, _engine_factory=_engine_factory),
|
|
106
|
+
environment=environment or Environment(),
|
|
107
|
+
dialect=self,
|
|
108
|
+
hooks=hooks,
|
|
109
|
+
)
|
|
110
|
+
|
|
90
111
|
return Executor(
|
|
91
112
|
engine=self.default_engine(conf=conf),
|
|
92
113
|
environment=environment or Environment(),
|
trilogy/dialect/postgres.py
CHANGED
|
@@ -49,8 +49,10 @@ CREATE TABLE {{ output.address.location }} AS
|
|
|
49
49
|
{% endif %}{%- if ctes %}
|
|
50
50
|
WITH {% for cte in ctes %}
|
|
51
51
|
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
52
|
+
{%- if full_select -%}
|
|
53
|
+
{{full_select}}
|
|
54
|
+
{%- else -%}
|
|
52
55
|
SELECT
|
|
53
|
-
|
|
54
56
|
{%- for select in select_columns %}
|
|
55
57
|
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
56
58
|
{% if base %}FROM
|
|
@@ -68,7 +70,7 @@ ORDER BY {% for order in order_by %}
|
|
|
68
70
|
{{ order }}{% if not loop.last %},{% endif %}
|
|
69
71
|
{% endfor %}{% endif %}
|
|
70
72
|
{%- if limit is not none %}
|
|
71
|
-
LIMIT {{ limit }}{% endif %}
|
|
73
|
+
LIMIT {{ limit }}{% endif %}{% endif %}
|
|
72
74
|
"""
|
|
73
75
|
)
|
|
74
76
|
|
trilogy/dialect/presto.py
CHANGED
|
@@ -42,8 +42,11 @@ CREATE OR REPLACE TABLE {{ output.address }} AS
|
|
|
42
42
|
{% endif %}{%- if ctes %}
|
|
43
43
|
WITH {% for cte in ctes %}
|
|
44
44
|
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
45
|
-
SELECT
|
|
46
45
|
|
|
46
|
+
SELECT
|
|
47
|
+
{%- if full_select -%}
|
|
48
|
+
{{full_select}}
|
|
49
|
+
{%- else -%}
|
|
47
50
|
{%- for select in select_columns %}
|
|
48
51
|
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
49
52
|
{% if base %}FROM
|
|
@@ -58,10 +61,9 @@ SELECT
|
|
|
58
61
|
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
59
62
|
{%- if order_by %}
|
|
60
63
|
ORDER BY {% for order in order_by %}
|
|
61
|
-
{{ order }}{% if not loop.last %},{% endif %}
|
|
62
|
-
{% endfor %}{% endif %}
|
|
64
|
+
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
63
65
|
{%- if limit is not none %}
|
|
64
|
-
LIMIT {{ limit }}{% endif %}
|
|
66
|
+
LIMIT {{ limit }}{% endif %}{% endif %}
|
|
65
67
|
"""
|
|
66
68
|
)
|
|
67
69
|
MAX_IDENTIFIER_LENGTH = 50
|
trilogy/dialect/snowflake.py
CHANGED
|
@@ -45,8 +45,11 @@ CREATE OR REPLACE TABLE {{ output.address.location }} AS
|
|
|
45
45
|
{% endif %}{%- if ctes %}
|
|
46
46
|
WITH {% for cte in ctes %}
|
|
47
47
|
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
48
|
-
|
|
48
|
+
{%- if full_select -%}
|
|
49
|
+
{{full_select}}
|
|
50
|
+
{%- else -%}
|
|
49
51
|
|
|
52
|
+
SELECT
|
|
50
53
|
{%- for select in select_columns %}
|
|
51
54
|
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
52
55
|
{% if base %}FROM
|
|
@@ -61,10 +64,9 @@ SELECT
|
|
|
61
64
|
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
62
65
|
{%- if order_by %}
|
|
63
66
|
ORDER BY {% for order in order_by %}
|
|
64
|
-
{{ order }}{% if not loop.last %},{% endif %}
|
|
65
|
-
{% endfor %}{% endif %}
|
|
67
|
+
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
66
68
|
{%- if limit is not none %}
|
|
67
|
-
LIMIT {{ limit }}{% endif %}
|
|
69
|
+
LIMIT {{ limit }}{% endif %}{% endif %}
|
|
68
70
|
"""
|
|
69
71
|
)
|
|
70
72
|
MAX_IDENTIFIER_LENGTH = 50
|
trilogy/dialect/sql_server.py
CHANGED
|
@@ -40,6 +40,9 @@ TSQL_TEMPLATE = Template(
|
|
|
40
40
|
"""{%- if ctes %}
|
|
41
41
|
WITH {% for cte in ctes %}
|
|
42
42
|
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
43
|
+
{%- if full_select -%}
|
|
44
|
+
{{full_select}}
|
|
45
|
+
{%- else -%}
|
|
43
46
|
SELECT
|
|
44
47
|
{%- if limit is not none %}
|
|
45
48
|
TOP {{ limit }}{% endif %}
|
|
@@ -60,7 +63,7 @@ GROUP BY {% for group in group_by %}
|
|
|
60
63
|
{%- if order_by %}
|
|
61
64
|
ORDER BY {% for order in order_by %}
|
|
62
65
|
{{ order }}{% if not loop.last %},{% endif %}
|
|
63
|
-
{% endfor %}{% endif %}
|
|
66
|
+
{% endfor %}{% endif %}{% endif %}
|
|
64
67
|
"""
|
|
65
68
|
)
|
|
66
69
|
|