pytrilogy 0.3.142__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cp312-win_amd64.pyd +0 -0
- pytrilogy-0.3.142.dist-info/METADATA +555 -0
- pytrilogy-0.3.142.dist-info/RECORD +200 -0
- pytrilogy-0.3.142.dist-info/WHEEL +4 -0
- pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +16 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +113 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +443 -0
- trilogy/core/env_processor.py +120 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1227 -0
- trilogy/core/graph_models.py +139 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2669 -0
- trilogy/core/models/build.py +2521 -0
- trilogy/core/models/build_environment.py +180 -0
- trilogy/core/models/core.py +501 -0
- trilogy/core/models/datasource.py +322 -0
- trilogy/core/models/environment.py +751 -0
- trilogy/core/models/execute.py +1177 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +268 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +205 -0
- trilogy/core/processing/node_generators/node_merge_node.py +653 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +748 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +519 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +596 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1392 -0
- trilogy/dialect/bigquery.py +308 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +144 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +231 -0
- trilogy/dialect/enums.py +147 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/config.py +75 -0
- trilogy/executor.py +568 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +139 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2813 -0
- trilogy/parsing/render.py +769 -0
- trilogy/parsing/trilogy.lark +540 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +42 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +303 -0
- trilogy/scripts/common.py +355 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +303 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +512 -0
- trilogy/scripts/environment.py +46 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +471 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +713 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/run.py +63 -0
- trilogy/scripts/serve.py +140 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +119 -0
- trilogy/scripts/trilogy.py +68 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Any, Callable, Mapping
|
|
3
|
+
|
|
4
|
+
from jinja2 import Template
|
|
5
|
+
|
|
6
|
+
from trilogy.core.enums import FunctionType, Modifier, UnnestMode, WindowType
|
|
7
|
+
from trilogy.core.models.core import DataType
|
|
8
|
+
from trilogy.dialect.base import BaseDialect
|
|
9
|
+
|
|
10
|
+
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
11
|
+
|
|
12
|
+
SENTINAL_AUTO_CAPTURE_GROUP_VALUE = "-1"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def null_wrapper(
|
|
16
|
+
lval: str,
|
|
17
|
+
rval: str,
|
|
18
|
+
modifiers: list[Modifier],
|
|
19
|
+
) -> str:
|
|
20
|
+
|
|
21
|
+
if Modifier.NULLABLE in modifiers:
|
|
22
|
+
return f"{lval} is not distinct from {rval}"
|
|
23
|
+
return f"{lval} = {rval}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def generate_regex_extract(x: list[str]) -> str:
|
|
27
|
+
if str(x[2]) == SENTINAL_AUTO_CAPTURE_GROUP_VALUE:
|
|
28
|
+
regex = re.compile(x[1])
|
|
29
|
+
if regex.groups == 0:
|
|
30
|
+
search = 0
|
|
31
|
+
else:
|
|
32
|
+
search = 1
|
|
33
|
+
return f"REGEXP_EXTRACT({x[0]},{x[1]},{search})"
|
|
34
|
+
return f"REGEXP_EXTRACT({x[0]},{x[1]},{x[2]})"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def render_sort(args, types):
|
|
38
|
+
if len(args) == 1:
|
|
39
|
+
return f"list_sort({args[0]})"
|
|
40
|
+
order = args[1].split(" ", 1)
|
|
41
|
+
if len(order) == 1:
|
|
42
|
+
return f"list_sort({args[0]}, '{order[0]}')"
|
|
43
|
+
elif len(order) == 2:
|
|
44
|
+
return f"list_sort({args[0]}, '{order[0]}', '{order[1]}')"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def render_log(args):
|
|
48
|
+
if len(args) == 1:
|
|
49
|
+
return f"log({args[0]})"
|
|
50
|
+
elif len(args) == 2:
|
|
51
|
+
if int(args[1]) == 10:
|
|
52
|
+
return f"log({args[0]})"
|
|
53
|
+
else:
|
|
54
|
+
# change of base formula
|
|
55
|
+
return f"log({args[0]})/log({args[1]})"
|
|
56
|
+
else:
|
|
57
|
+
raise ValueError("log function requires 1 or 2 arguments")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def map_date_part_specifier(specifier: str) -> str:
|
|
61
|
+
"""Map date part specifiers to DuckDB-compatible names"""
|
|
62
|
+
mapping = {
|
|
63
|
+
"day_of_week": "dow",
|
|
64
|
+
# Add other mappings if needed
|
|
65
|
+
}
|
|
66
|
+
return mapping.get(specifier, specifier)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
FUNCTION_MAP = {
|
|
70
|
+
FunctionType.COUNT: lambda args, types: f"count({args[0]})",
|
|
71
|
+
FunctionType.SUM: lambda args, types: f"sum({args[0]})",
|
|
72
|
+
FunctionType.AVG: lambda args, types: f"avg({args[0]})",
|
|
73
|
+
FunctionType.LENGTH: lambda args, types: f"length({args[0]})",
|
|
74
|
+
FunctionType.LOG: lambda args, types: render_log(args),
|
|
75
|
+
FunctionType.LIKE: lambda args, types: (
|
|
76
|
+
f" CASE WHEN {args[0]} like {args[1]} THEN True ELSE False END"
|
|
77
|
+
),
|
|
78
|
+
FunctionType.CONCAT: lambda args, types: (
|
|
79
|
+
f"CONCAT({','.join([f''' {str(a)} ''' for a in args])})"
|
|
80
|
+
),
|
|
81
|
+
FunctionType.SPLIT: lambda args, types: (
|
|
82
|
+
f"STRING_SPLIT({','.join([f''' {str(a)} ''' for a in args])})"
|
|
83
|
+
),
|
|
84
|
+
## Duckdb indexes from 1, not 0
|
|
85
|
+
FunctionType.INDEX_ACCESS: lambda args, types: (f"{args[0]}[{args[1]}]"),
|
|
86
|
+
## Duckdb uses list for array
|
|
87
|
+
FunctionType.ARRAY_DISTINCT: lambda args, types: f"list_distinct({args[0]})",
|
|
88
|
+
FunctionType.ARRAY_SUM: lambda args, types: f"list_sum({args[0]})",
|
|
89
|
+
FunctionType.ARRAY_SORT: render_sort,
|
|
90
|
+
FunctionType.ARRAY_TRANSFORM: lambda args, types: (
|
|
91
|
+
f"list_transform({args[0]}, {args[1]} -> {args[2]})"
|
|
92
|
+
),
|
|
93
|
+
FunctionType.ARRAY_AGG: lambda args, types: f"array_agg({args[0]})",
|
|
94
|
+
# datetime is aliased,
|
|
95
|
+
FunctionType.CURRENT_DATETIME: lambda x, types: "cast(get_current_timestamp() as datetime)",
|
|
96
|
+
FunctionType.DATETIME: lambda x, types: f"cast({x[0]} as datetime)",
|
|
97
|
+
FunctionType.TIMESTAMP: lambda x, types: f"cast({x[0]} as timestamp)",
|
|
98
|
+
FunctionType.DATE: lambda x, types: f"cast({x[0]} as date)",
|
|
99
|
+
FunctionType.DATE_TRUNCATE: lambda x, types: f"date_trunc('{x[1]}', {x[0]})",
|
|
100
|
+
FunctionType.DATE_ADD: lambda x, types: f"date_add({x[0]}, {x[2]} * INTERVAL 1 {x[1]})",
|
|
101
|
+
FunctionType.DATE_SUB: lambda x, types: f"date_add({x[0]}, -{x[2]} * INTERVAL 1 {x[1]})",
|
|
102
|
+
FunctionType.DATE_PART: lambda x, types: f"date_part('{map_date_part_specifier(x[1])}', {x[0]})",
|
|
103
|
+
FunctionType.DATE_DIFF: lambda x, types: f"date_diff('{x[2]}', {x[0]}, {x[1]})",
|
|
104
|
+
FunctionType.CONCAT: lambda x, types: f"({' || '.join(x)})",
|
|
105
|
+
FunctionType.DATE_LITERAL: lambda x, types: f"date '{x}'",
|
|
106
|
+
FunctionType.DATETIME_LITERAL: lambda x, types: f"datetime '{x}'",
|
|
107
|
+
FunctionType.DAY_OF_WEEK: lambda x, types: f"dayofweek({x[0]})",
|
|
108
|
+
# string
|
|
109
|
+
FunctionType.CONTAINS: lambda x, types: f"CONTAINS(LOWER({x[0]}), LOWER({x[1]}))",
|
|
110
|
+
# regexp
|
|
111
|
+
FunctionType.REGEXP_CONTAINS: lambda x, types: f"REGEXP_MATCHES({x[0]},{x[1]})",
|
|
112
|
+
FunctionType.REGEXP_EXTRACT: lambda x, types: generate_regex_extract(x),
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
# if an aggregate function is called on a source that is at the same grain as the aggregate
|
|
116
|
+
# we may return a static value
|
|
117
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
118
|
+
**FUNCTION_MAP,
|
|
119
|
+
FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
120
|
+
FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
121
|
+
FunctionType.SUM: lambda args, types: f"{args[0]}",
|
|
122
|
+
FunctionType.AVG: lambda args, types: f"{args[0]}",
|
|
123
|
+
FunctionType.MAX: lambda args, types: f"{args[0]}",
|
|
124
|
+
FunctionType.MIN: lambda args, types: f"{args[0]}",
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
DATATYPE_MAP: dict[DataType, str] = {}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
DUCKDB_TEMPLATE = Template(
|
|
131
|
+
"""{%- if output %}
|
|
132
|
+
{{output}}
|
|
133
|
+
{% endif %}{%- if ctes %}
|
|
134
|
+
WITH {% if recursive%}RECURSIVE{% endif %}{% for cte in ctes %}
|
|
135
|
+
{{cte.name}} as (
|
|
136
|
+
{{cte.statement}}){% if not loop.last %},{% else %}
|
|
137
|
+
{% endif %}{% endfor %}{% endif %}
|
|
138
|
+
{%- if full_select -%}
|
|
139
|
+
{{full_select}}
|
|
140
|
+
{%- else -%}{%- if comment -%}
|
|
141
|
+
-- {{ comment }}
|
|
142
|
+
{%- endif %}SELECT
|
|
143
|
+
{%- for select in select_columns %}
|
|
144
|
+
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
145
|
+
{% if base %}FROM
|
|
146
|
+
{{ base }}{% endif %}{% if joins %}
|
|
147
|
+
{%- for join in joins %}
|
|
148
|
+
{{ join }}{% endfor %}{% endif %}
|
|
149
|
+
{%- if where %}
|
|
150
|
+
WHERE
|
|
151
|
+
{{ where }}
|
|
152
|
+
{% endif -%}{%- if group_by %}
|
|
153
|
+
GROUP BY {% for group in group_by %}
|
|
154
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
|
|
155
|
+
HAVING
|
|
156
|
+
{{ having }}
|
|
157
|
+
{% endif %}{%- if order_by %}
|
|
158
|
+
ORDER BY {% for order in order_by %}
|
|
159
|
+
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
160
|
+
{%- if limit is not none %}
|
|
161
|
+
LIMIT ({{ limit }}){% endif %}{% endif %}
|
|
162
|
+
"""
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class DuckDBDialect(BaseDialect):
|
|
167
|
+
WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
|
|
168
|
+
FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
|
|
169
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
170
|
+
**BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
|
|
171
|
+
**FUNCTION_GRAIN_MATCH_MAP,
|
|
172
|
+
}
|
|
173
|
+
DATATYPE_MAP = {**BaseDialect.DATATYPE_MAP, **DATATYPE_MAP}
|
|
174
|
+
QUOTE_CHARACTER = '"'
|
|
175
|
+
SQL_TEMPLATE = DUCKDB_TEMPLATE
|
|
176
|
+
UNNEST_MODE = UnnestMode.DIRECT
|
|
177
|
+
NULL_WRAPPER = staticmethod(null_wrapper)
|
|
178
|
+
|
|
179
|
+
def get_table_schema(
|
|
180
|
+
self, executor, table_name: str, schema: str | None = None
|
|
181
|
+
) -> list[tuple]:
|
|
182
|
+
"""Returns a list of tuples: (column_name, data_type, is_nullable, column_comment)."""
|
|
183
|
+
column_query = """
|
|
184
|
+
SELECT
|
|
185
|
+
column_name,
|
|
186
|
+
data_type,
|
|
187
|
+
is_nullable,
|
|
188
|
+
column_comment
|
|
189
|
+
FROM information_schema.columns
|
|
190
|
+
WHERE table_name = ?
|
|
191
|
+
"""
|
|
192
|
+
params = [table_name]
|
|
193
|
+
|
|
194
|
+
if schema:
|
|
195
|
+
column_query += " AND table_schema = ?"
|
|
196
|
+
params.append(schema)
|
|
197
|
+
|
|
198
|
+
column_query += " ORDER BY ordinal_position"
|
|
199
|
+
|
|
200
|
+
# DuckDB supports parameterized queries
|
|
201
|
+
rows = executor.execute_raw_sql(
|
|
202
|
+
column_query.replace("?", "'{}'").format(*params)
|
|
203
|
+
).fetchall()
|
|
204
|
+
return rows
|
|
205
|
+
|
|
206
|
+
def get_table_primary_keys(
|
|
207
|
+
self, executor, table_name: str, schema: str | None = None
|
|
208
|
+
) -> list[str]:
|
|
209
|
+
"""Get primary key columns by joining key_column_usage with table_constraints."""
|
|
210
|
+
pk_query = """
|
|
211
|
+
SELECT kcu.column_name
|
|
212
|
+
FROM information_schema.key_column_usage kcu
|
|
213
|
+
JOIN information_schema.table_constraints tc
|
|
214
|
+
ON kcu.constraint_name = tc.constraint_name
|
|
215
|
+
AND kcu.table_name = tc.table_name
|
|
216
|
+
WHERE kcu.table_name = '{}'
|
|
217
|
+
AND tc.constraint_type = 'PRIMARY KEY'
|
|
218
|
+
""".format(
|
|
219
|
+
table_name
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
if schema:
|
|
223
|
+
pk_query += " AND kcu.table_schema = '{}'".format(schema)
|
|
224
|
+
|
|
225
|
+
pk_query += " ORDER BY kcu.ordinal_position"
|
|
226
|
+
|
|
227
|
+
rows = executor.execute_raw_sql(pk_query).fetchall()
|
|
228
|
+
if rows:
|
|
229
|
+
return [row[0] for row in rows]
|
|
230
|
+
|
|
231
|
+
return []
|
trilogy/dialect/enums.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import TYPE_CHECKING, Callable, List, Optional
|
|
3
|
+
|
|
4
|
+
from trilogy.core.models.environment import Environment
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from trilogy import Executor
|
|
8
|
+
from trilogy.hooks.base_hook import BaseHook
|
|
9
|
+
|
|
10
|
+
from trilogy.constants import Rendering, logger
|
|
11
|
+
from trilogy.dialect.config import DialectConfig, DuckDBConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def default_factory(conf: DialectConfig, config_type):
|
|
15
|
+
from sqlalchemy import create_engine
|
|
16
|
+
from sqlalchemy.pool import NullPool
|
|
17
|
+
|
|
18
|
+
# the DuckDB IdentifierPreparer uses a global connection that is not thread safe
|
|
19
|
+
if isinstance(conf, DuckDBConfig):
|
|
20
|
+
# we monkey patch to parent to avoid this
|
|
21
|
+
from duckdb_engine import DuckDBIdentifierPreparer, PGIdentifierPreparer
|
|
22
|
+
|
|
23
|
+
DuckDBIdentifierPreparer.__init__ = PGIdentifierPreparer.__init__ # type: ignore
|
|
24
|
+
engine_args = {
|
|
25
|
+
"future": True,
|
|
26
|
+
"poolclass": NullPool,
|
|
27
|
+
}
|
|
28
|
+
if not isinstance(conf, config_type):
|
|
29
|
+
raise TypeError(
|
|
30
|
+
f"Invalid dialect configuration for type {type(config_type).__name__}, is {type(conf)}"
|
|
31
|
+
)
|
|
32
|
+
connect_args = conf.create_connect_args()
|
|
33
|
+
if connect_args:
|
|
34
|
+
engine_args["connect_args"] = connect_args
|
|
35
|
+
return create_engine(conf.connection_string(), **engine_args)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Dialects(Enum):
|
|
39
|
+
BIGQUERY = "bigquery"
|
|
40
|
+
SQL_SERVER = "sql_server"
|
|
41
|
+
DUCK_DB = "duck_db"
|
|
42
|
+
PRESTO = "presto"
|
|
43
|
+
TRINO = "trino"
|
|
44
|
+
POSTGRES = "postgres"
|
|
45
|
+
SNOWFLAKE = "snowflake"
|
|
46
|
+
DATAFRAME = "dataframe"
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def _missing_(cls, value):
|
|
50
|
+
if value == "duckdb":
|
|
51
|
+
return cls.DUCK_DB
|
|
52
|
+
return super()._missing_(value)
|
|
53
|
+
|
|
54
|
+
def default_renderer(self, conf=None, _engine_factory: Callable = default_factory):
|
|
55
|
+
from trilogy.render import get_dialect_generator
|
|
56
|
+
|
|
57
|
+
return get_dialect_generator(self)
|
|
58
|
+
|
|
59
|
+
def default_engine(self, conf=None, _engine_factory: Callable = default_factory):
|
|
60
|
+
if self == Dialects.BIGQUERY:
|
|
61
|
+
from google.auth import default
|
|
62
|
+
from google.cloud import bigquery
|
|
63
|
+
|
|
64
|
+
from trilogy.dialect.config import BigQueryConfig
|
|
65
|
+
|
|
66
|
+
credentials, project = default()
|
|
67
|
+
client = bigquery.Client(credentials=credentials, project=project)
|
|
68
|
+
conf = conf or BigQueryConfig(project=project, client=client)
|
|
69
|
+
return _engine_factory(
|
|
70
|
+
conf,
|
|
71
|
+
BigQueryConfig,
|
|
72
|
+
)
|
|
73
|
+
elif self == Dialects.SQL_SERVER:
|
|
74
|
+
raise NotImplementedError()
|
|
75
|
+
elif self == Dialects.DUCK_DB:
|
|
76
|
+
from trilogy.dialect.config import DuckDBConfig
|
|
77
|
+
|
|
78
|
+
if not conf:
|
|
79
|
+
conf = DuckDBConfig()
|
|
80
|
+
return _engine_factory(conf, DuckDBConfig)
|
|
81
|
+
elif self == Dialects.SNOWFLAKE:
|
|
82
|
+
from trilogy.dialect.config import SnowflakeConfig
|
|
83
|
+
|
|
84
|
+
return _engine_factory(conf, SnowflakeConfig)
|
|
85
|
+
elif self == Dialects.POSTGRES:
|
|
86
|
+
logger.warn(
|
|
87
|
+
"WARN: Using experimental postgres dialect. Most functionality will not work."
|
|
88
|
+
)
|
|
89
|
+
import importlib
|
|
90
|
+
|
|
91
|
+
spec = importlib.util.find_spec("psycopg2")
|
|
92
|
+
if spec is None:
|
|
93
|
+
raise ImportError(
|
|
94
|
+
"postgres driver not installed. python -m pip install pypreql[postgres]"
|
|
95
|
+
)
|
|
96
|
+
from trilogy.dialect.config import PostgresConfig
|
|
97
|
+
|
|
98
|
+
return _engine_factory(conf, PostgresConfig)
|
|
99
|
+
elif self == Dialects.PRESTO:
|
|
100
|
+
from trilogy.dialect.config import PrestoConfig
|
|
101
|
+
|
|
102
|
+
return _engine_factory(conf, PrestoConfig)
|
|
103
|
+
elif self == Dialects.TRINO:
|
|
104
|
+
from trilogy.dialect.config import TrinoConfig
|
|
105
|
+
|
|
106
|
+
return _engine_factory(conf, TrinoConfig)
|
|
107
|
+
elif self == Dialects.DATAFRAME:
|
|
108
|
+
from trilogy.dialect.config import DataFrameConfig
|
|
109
|
+
from trilogy.dialect.dataframe import DataframeConnectionWrapper
|
|
110
|
+
|
|
111
|
+
if not conf:
|
|
112
|
+
conf = DataFrameConfig(dataframes={})
|
|
113
|
+
|
|
114
|
+
base = _engine_factory(conf, DataFrameConfig)
|
|
115
|
+
|
|
116
|
+
return DataframeConnectionWrapper(base, dataframes=conf.dataframes)
|
|
117
|
+
else:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
f"Unsupported dialect {self} for default engine creation; create one explicitly."
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def default_executor(
|
|
123
|
+
self,
|
|
124
|
+
environment: Optional["Environment"] = None,
|
|
125
|
+
hooks: List["BaseHook"] | None = None,
|
|
126
|
+
conf: DialectConfig | None = None,
|
|
127
|
+
rendering: Rendering | None = None,
|
|
128
|
+
_engine_factory: Callable | None = None,
|
|
129
|
+
) -> "Executor":
|
|
130
|
+
from trilogy import Executor
|
|
131
|
+
|
|
132
|
+
if _engine_factory is not None:
|
|
133
|
+
return Executor(
|
|
134
|
+
engine=self.default_engine(conf=conf, _engine_factory=_engine_factory),
|
|
135
|
+
environment=environment or Environment(),
|
|
136
|
+
dialect=self,
|
|
137
|
+
rendering=rendering,
|
|
138
|
+
hooks=hooks,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return Executor(
|
|
142
|
+
engine=self.default_engine(conf=conf),
|
|
143
|
+
environment=environment or Environment(),
|
|
144
|
+
dialect=self,
|
|
145
|
+
rendering=rendering,
|
|
146
|
+
hooks=hooks,
|
|
147
|
+
)
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from trilogy.core.enums import DatasourceState, PublishAction
|
|
4
|
+
from trilogy.core.models.datasource import Datasource
|
|
5
|
+
from trilogy.core.models.environment import Environment
|
|
6
|
+
from trilogy.core.statements.author import (
|
|
7
|
+
ConceptDeclarationStatement,
|
|
8
|
+
ImportStatement,
|
|
9
|
+
MergeStatementV2,
|
|
10
|
+
)
|
|
11
|
+
from trilogy.core.statements.execute import (
|
|
12
|
+
ProcessedPublishStatement,
|
|
13
|
+
ProcessedShowStatement,
|
|
14
|
+
ProcessedStaticValueOutput,
|
|
15
|
+
ProcessedValidateStatement,
|
|
16
|
+
)
|
|
17
|
+
from trilogy.core.validation.common import ValidationTest
|
|
18
|
+
from trilogy.dialect.base import BaseDialect
|
|
19
|
+
from trilogy.dialect.results import MockResult, generate_result_set
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def handle_concept_declaration(query: ConceptDeclarationStatement) -> MockResult:
|
|
23
|
+
"""Handle concept declaration statements without execution."""
|
|
24
|
+
concept = query.concept
|
|
25
|
+
return MockResult(
|
|
26
|
+
[
|
|
27
|
+
{
|
|
28
|
+
"address": concept.address,
|
|
29
|
+
"type": concept.datatype.value,
|
|
30
|
+
"purpose": concept.purpose.value,
|
|
31
|
+
"derivation": concept.derivation.value,
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
["address", "type", "purpose", "derivation"],
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def handle_datasource(query: Datasource) -> MockResult:
|
|
39
|
+
"""Handle datasource queries without execution."""
|
|
40
|
+
return MockResult(
|
|
41
|
+
[
|
|
42
|
+
{
|
|
43
|
+
"name": query.name,
|
|
44
|
+
}
|
|
45
|
+
],
|
|
46
|
+
["name"],
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def handle_import_statement(query: ImportStatement) -> MockResult:
|
|
51
|
+
"""Handle import statements without execution."""
|
|
52
|
+
return MockResult(
|
|
53
|
+
[
|
|
54
|
+
{
|
|
55
|
+
"path": query.path,
|
|
56
|
+
"alias": query.alias,
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
["path", "alias"],
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def handle_publish_statement(
|
|
64
|
+
query: ProcessedPublishStatement, environment: Environment
|
|
65
|
+
) -> MockResult:
|
|
66
|
+
"""Handle publish statements by updating environment and returning result."""
|
|
67
|
+
for x in query.targets:
|
|
68
|
+
datasource = environment.datasources.get(x)
|
|
69
|
+
if not datasource:
|
|
70
|
+
raise ValueError(f"Datasource {x} not found in environment")
|
|
71
|
+
if query.action == PublishAction.UNPUBLISH:
|
|
72
|
+
datasource.status = DatasourceState.UNPUBLISHED
|
|
73
|
+
else:
|
|
74
|
+
datasource.status = DatasourceState.PUBLISHED
|
|
75
|
+
|
|
76
|
+
return MockResult(
|
|
77
|
+
[{"published": target} for target in query.targets],
|
|
78
|
+
[
|
|
79
|
+
"published",
|
|
80
|
+
],
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def handle_merge_statement(
|
|
85
|
+
query: MergeStatementV2, environment: Environment
|
|
86
|
+
) -> MockResult:
|
|
87
|
+
"""Handle merge statements by updating environment and returning result."""
|
|
88
|
+
for concept in query.sources:
|
|
89
|
+
environment.merge_concept(
|
|
90
|
+
concept, query.targets[concept.address], modifiers=query.modifiers
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
return MockResult(
|
|
94
|
+
[
|
|
95
|
+
{
|
|
96
|
+
"sources": ",".join([x.address for x in query.sources]),
|
|
97
|
+
"targets": ",".join([x.address for _, x in query.targets.items()]),
|
|
98
|
+
}
|
|
99
|
+
],
|
|
100
|
+
["source", "target"],
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def handle_processed_show_statement(
|
|
105
|
+
query: ProcessedShowStatement, compiled_statements: list[str]
|
|
106
|
+
) -> MockResult:
|
|
107
|
+
"""Handle processed show statements without execution."""
|
|
108
|
+
|
|
109
|
+
return generate_result_set(query.output_columns, compiled_statements)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def raw_validation_to_result(
|
|
113
|
+
raw: list[ValidationTest], generator: Optional[BaseDialect] = None
|
|
114
|
+
) -> Optional[MockResult]:
|
|
115
|
+
"""Convert raw validation tests to mock result."""
|
|
116
|
+
if not raw:
|
|
117
|
+
return MockResult([], ["check_type", "expected", "result", "ran", "query"])
|
|
118
|
+
output = []
|
|
119
|
+
for row in raw:
|
|
120
|
+
if row.raw_query and generator and not row.generated_query:
|
|
121
|
+
try:
|
|
122
|
+
row.generated_query = generator.compile_statement(row.raw_query)
|
|
123
|
+
except Exception as e:
|
|
124
|
+
row.generated_query = f"Error generating query: {e}"
|
|
125
|
+
output.append(
|
|
126
|
+
{
|
|
127
|
+
"check_type": row.check_type.value,
|
|
128
|
+
"expected": row.expected,
|
|
129
|
+
"result": str(row.result) if row.result else None,
|
|
130
|
+
"ran": row.ran,
|
|
131
|
+
"query": row.generated_query if row.generated_query else "",
|
|
132
|
+
}
|
|
133
|
+
)
|
|
134
|
+
return MockResult(output, ["check_type", "expected", "result", "ran", "query"])
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def handle_processed_validate_statement(
|
|
138
|
+
query: ProcessedValidateStatement, dialect: BaseDialect, validate_environment_func
|
|
139
|
+
) -> Optional[MockResult]:
|
|
140
|
+
"""Handle processed validate statements."""
|
|
141
|
+
results = validate_environment_func(query.scope, query.targets)
|
|
142
|
+
return raw_validation_to_result(results, dialect)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def handle_show_statement_outputs(
|
|
146
|
+
statement: ProcessedShowStatement,
|
|
147
|
+
compiled_statements: list[str],
|
|
148
|
+
environment: Environment,
|
|
149
|
+
dialect: BaseDialect,
|
|
150
|
+
) -> list[MockResult]:
|
|
151
|
+
"""Handle show statement outputs without execution."""
|
|
152
|
+
output = []
|
|
153
|
+
for x in statement.output_values:
|
|
154
|
+
if isinstance(x, ProcessedStaticValueOutput):
|
|
155
|
+
output.append(generate_result_set(statement.output_columns, x.values))
|
|
156
|
+
elif compiled_statements:
|
|
157
|
+
|
|
158
|
+
output.append(
|
|
159
|
+
generate_result_set(
|
|
160
|
+
statement.output_columns,
|
|
161
|
+
compiled_statements,
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
elif isinstance(x, ProcessedValidateStatement):
|
|
165
|
+
from trilogy.core.validation.environment import validate_environment
|
|
166
|
+
|
|
167
|
+
raw = validate_environment(environment, x.scope, x.targets)
|
|
168
|
+
results = raw_validation_to_result(raw, dialect)
|
|
169
|
+
if results:
|
|
170
|
+
output.append(results)
|
|
171
|
+
else:
|
|
172
|
+
raise NotImplementedError(f"Cannot show type {type(x)} in show statement")
|
|
173
|
+
return output
|