pytrilogy 0.3.149__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp313-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.149.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.149.dist-info/RECORD +207 -0
  6. pytrilogy-0.3.149.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.149.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.149.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2670 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +436 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +846 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1432 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +397 -0
  117. trilogy/dialect/enums.py +151 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/exceptions.py +26 -0
  130. trilogy/execution/state/file_state_store.py +0 -0
  131. trilogy/execution/state/sqllite_state_store.py +0 -0
  132. trilogy/execution/state/state_store.py +406 -0
  133. trilogy/executor.py +692 -0
  134. trilogy/hooks/__init__.py +4 -0
  135. trilogy/hooks/base_hook.py +40 -0
  136. trilogy/hooks/graph_hook.py +135 -0
  137. trilogy/hooks/query_debugger.py +166 -0
  138. trilogy/metadata/__init__.py +0 -0
  139. trilogy/parser.py +10 -0
  140. trilogy/parsing/README.md +21 -0
  141. trilogy/parsing/__init__.py +0 -0
  142. trilogy/parsing/common.py +1069 -0
  143. trilogy/parsing/config.py +5 -0
  144. trilogy/parsing/exceptions.py +8 -0
  145. trilogy/parsing/helpers.py +1 -0
  146. trilogy/parsing/parse_engine.py +2876 -0
  147. trilogy/parsing/render.py +775 -0
  148. trilogy/parsing/trilogy.lark +546 -0
  149. trilogy/py.typed +0 -0
  150. trilogy/render.py +45 -0
  151. trilogy/scripts/README.md +9 -0
  152. trilogy/scripts/__init__.py +0 -0
  153. trilogy/scripts/agent.py +41 -0
  154. trilogy/scripts/agent_info.py +306 -0
  155. trilogy/scripts/common.py +432 -0
  156. trilogy/scripts/dependency/Cargo.lock +617 -0
  157. trilogy/scripts/dependency/Cargo.toml +39 -0
  158. trilogy/scripts/dependency/README.md +131 -0
  159. trilogy/scripts/dependency/build.sh +25 -0
  160. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  161. trilogy/scripts/dependency/src/lib.rs +16 -0
  162. trilogy/scripts/dependency/src/main.rs +770 -0
  163. trilogy/scripts/dependency/src/parser.rs +435 -0
  164. trilogy/scripts/dependency/src/preql.pest +208 -0
  165. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  166. trilogy/scripts/dependency/src/resolver.rs +716 -0
  167. trilogy/scripts/dependency/tests/base.preql +3 -0
  168. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  169. trilogy/scripts/dependency/tests/customer.preql +6 -0
  170. trilogy/scripts/dependency/tests/main.preql +9 -0
  171. trilogy/scripts/dependency/tests/orders.preql +7 -0
  172. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  173. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  174. trilogy/scripts/dependency.py +323 -0
  175. trilogy/scripts/display.py +555 -0
  176. trilogy/scripts/environment.py +59 -0
  177. trilogy/scripts/fmt.py +32 -0
  178. trilogy/scripts/ingest.py +487 -0
  179. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  180. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  181. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  182. trilogy/scripts/ingest_helpers/typing.py +161 -0
  183. trilogy/scripts/init.py +105 -0
  184. trilogy/scripts/parallel_execution.py +762 -0
  185. trilogy/scripts/plan.py +189 -0
  186. trilogy/scripts/refresh.py +161 -0
  187. trilogy/scripts/run.py +79 -0
  188. trilogy/scripts/serve.py +202 -0
  189. trilogy/scripts/serve_helpers/__init__.py +41 -0
  190. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  191. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  192. trilogy/scripts/serve_helpers/models.py +38 -0
  193. trilogy/scripts/single_execution.py +131 -0
  194. trilogy/scripts/testing.py +143 -0
  195. trilogy/scripts/trilogy.py +75 -0
  196. trilogy/std/__init__.py +0 -0
  197. trilogy/std/color.preql +3 -0
  198. trilogy/std/date.preql +13 -0
  199. trilogy/std/display.preql +18 -0
  200. trilogy/std/geography.preql +22 -0
  201. trilogy/std/metric.preql +15 -0
  202. trilogy/std/money.preql +67 -0
  203. trilogy/std/net.preql +14 -0
  204. trilogy/std/ranking.preql +7 -0
  205. trilogy/std/report.preql +5 -0
  206. trilogy/std/semantic.preql +6 -0
  207. trilogy/utility.py +34 -0
@@ -0,0 +1,397 @@
1
+ import re
2
+ from os import environ
3
+ from pathlib import Path
4
+ from typing import Any, Callable, Mapping
5
+
6
+ from jinja2 import Template
7
+
8
+ from trilogy.core.enums import (
9
+ AddressType,
10
+ FunctionType,
11
+ Modifier,
12
+ UnnestMode,
13
+ WindowType,
14
+ )
15
+ from trilogy.core.models.core import DataType
16
+ from trilogy.core.models.datasource import Address
17
+ from trilogy.dialect.base import BaseDialect
18
+
19
+ WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
20
+
21
+ SENTINAL_AUTO_CAPTURE_GROUP_VALUE = "-1"
22
+
23
+
24
+ def null_wrapper(
25
+ lval: str,
26
+ rval: str,
27
+ modifiers: list[Modifier],
28
+ ) -> str:
29
+
30
+ if Modifier.NULLABLE in modifiers:
31
+ return f"{lval} is not distinct from {rval}"
32
+ return f"{lval} = {rval}"
33
+
34
+
35
+ def generate_regex_extract(x: list[str]) -> str:
36
+ if str(x[2]) == SENTINAL_AUTO_CAPTURE_GROUP_VALUE:
37
+ regex = re.compile(x[1])
38
+ if regex.groups == 0:
39
+ search = 0
40
+ else:
41
+ search = 1
42
+ return f"REGEXP_EXTRACT({x[0]},{x[1]},{search})"
43
+ return f"REGEXP_EXTRACT({x[0]},{x[1]},{x[2]})"
44
+
45
+
46
+ def render_sort(args, types):
47
+ if len(args) == 1:
48
+ return f"list_sort({args[0]})"
49
+ order = args[1].split(" ", 1)
50
+ if len(order) == 1:
51
+ return f"list_sort({args[0]}, '{order[0]}')"
52
+ elif len(order) == 2:
53
+ return f"list_sort({args[0]}, '{order[0]}', '{order[1]}')"
54
+
55
+
56
+ def render_log(args):
57
+ if len(args) == 1:
58
+ return f"log({args[0]})"
59
+ elif len(args) == 2:
60
+ if int(args[1]) == 10:
61
+ return f"log({args[0]})"
62
+ else:
63
+ # change of base formula
64
+ return f"log({args[0]})/log({args[1]})"
65
+ else:
66
+ raise ValueError("log function requires 1 or 2 arguments")
67
+
68
+
69
+ def map_date_part_specifier(specifier: str) -> str:
70
+ """Map date part specifiers to DuckDB-compatible names"""
71
+ mapping = {
72
+ "day_of_week": "dow",
73
+ # Add other mappings if needed
74
+ }
75
+ return mapping.get(specifier, specifier)
76
+
77
+
78
+ FUNCTION_MAP = {
79
+ FunctionType.COUNT: lambda args, types: f"count({args[0]})",
80
+ FunctionType.SUM: lambda args, types: f"sum({args[0]})",
81
+ FunctionType.AVG: lambda args, types: f"avg({args[0]})",
82
+ FunctionType.LENGTH: lambda args, types: f"length({args[0]})",
83
+ FunctionType.LOG: lambda args, types: render_log(args),
84
+ FunctionType.LIKE: lambda args, types: (
85
+ f" CASE WHEN {args[0]} like {args[1]} THEN True ELSE False END"
86
+ ),
87
+ FunctionType.CONCAT: lambda args, types: (
88
+ f"CONCAT({','.join([f''' {str(a)} ''' for a in args])})"
89
+ ),
90
+ FunctionType.SPLIT: lambda args, types: (
91
+ f"STRING_SPLIT({','.join([f''' {str(a)} ''' for a in args])})"
92
+ ),
93
+ ## Duckdb indexes from 1, not 0
94
+ FunctionType.INDEX_ACCESS: lambda args, types: (f"{args[0]}[{args[1]}]"),
95
+ ## Duckdb uses list for array
96
+ FunctionType.ARRAY_DISTINCT: lambda args, types: f"list_distinct({args[0]})",
97
+ FunctionType.ARRAY_SUM: lambda args, types: f"list_sum({args[0]})",
98
+ FunctionType.ARRAY_SORT: render_sort,
99
+ FunctionType.ARRAY_TRANSFORM: lambda args, types: (
100
+ f"list_transform({args[0]}, {args[1]} -> {args[2]})"
101
+ ),
102
+ FunctionType.ARRAY_AGG: lambda args, types: f"array_agg({args[0]})",
103
+ # datetime is aliased,
104
+ FunctionType.CURRENT_DATETIME: lambda x, types: "cast(get_current_timestamp() as datetime)",
105
+ FunctionType.DATETIME: lambda x, types: f"cast({x[0]} as datetime)",
106
+ FunctionType.TIMESTAMP: lambda x, types: f"cast({x[0]} as timestamp)",
107
+ FunctionType.DATE: lambda x, types: f"cast({x[0]} as date)",
108
+ FunctionType.DATE_TRUNCATE: lambda x, types: f"date_trunc('{x[1]}', {x[0]})",
109
+ FunctionType.DATE_ADD: lambda x, types: f"date_add({x[0]}, {x[2]} * INTERVAL 1 {x[1]})",
110
+ FunctionType.DATE_SUB: lambda x, types: f"date_add({x[0]}, -{x[2]} * INTERVAL 1 {x[1]})",
111
+ FunctionType.DATE_PART: lambda x, types: f"date_part('{map_date_part_specifier(x[1])}', {x[0]})",
112
+ FunctionType.DATE_DIFF: lambda x, types: f"date_diff('{x[2]}', {x[0]}, {x[1]})",
113
+ FunctionType.CONCAT: lambda x, types: f"({' || '.join(x)})",
114
+ FunctionType.DATE_LITERAL: lambda x, types: f"date '{x}'",
115
+ FunctionType.DATETIME_LITERAL: lambda x, types: f"datetime '{x}'",
116
+ FunctionType.DAY_OF_WEEK: lambda x, types: f"dayofweek({x[0]})",
117
+ # string
118
+ FunctionType.CONTAINS: lambda x, types: f"CONTAINS(LOWER({x[0]}), LOWER({x[1]}))",
119
+ # regexp
120
+ FunctionType.REGEXP_CONTAINS: lambda x, types: f"REGEXP_MATCHES({x[0]},{x[1]})",
121
+ FunctionType.REGEXP_EXTRACT: lambda x, types: generate_regex_extract(x),
122
+ }
123
+
124
+ # if an aggregate function is called on a source that is at the same grain as the aggregate
125
+ # we may return a static value
126
+ FUNCTION_GRAIN_MATCH_MAP = {
127
+ **FUNCTION_MAP,
128
+ FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
129
+ FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
130
+ FunctionType.SUM: lambda args, types: f"{args[0]}",
131
+ FunctionType.AVG: lambda args, types: f"{args[0]}",
132
+ FunctionType.MAX: lambda args, types: f"{args[0]}",
133
+ FunctionType.MIN: lambda args, types: f"{args[0]}",
134
+ }
135
+
136
+ DATATYPE_MAP: dict[DataType, str] = {}
137
+
138
+
139
+ def get_python_datasource_setup_sql(enabled: bool, is_windows: bool = False) -> str:
140
+ """Return SQL to setup the uv_run macro for Python script datasources.
141
+ Inspired by: https://sidequery.dev/blog/uv-run-duckdb
142
+
143
+ Args:
144
+ enabled: If True, installs extensions and creates working macro.
145
+ If False, creates macro that throws a clear error.
146
+ is_windows: If True, uses temp file workaround for shellfs pipe bug.
147
+ """
148
+ if enabled:
149
+ if is_windows:
150
+ import atexit
151
+ import os
152
+ import tempfile
153
+
154
+ # Windows workaround: shellfs has a bug with Arrow IPC pipes on Windows.
155
+ # We use a temp file approach: run script to file, then read file.
156
+ # The read_json forces the shell command to complete before read_arrow.
157
+ # Using getvariable() defers file path resolution until execution.
158
+ # Include PID in filename to avoid conflicts between parallel processes.
159
+ # Use Path.resolve() to avoid 8.3 short names (e.g. RUNNER~1) on CI.
160
+
161
+ temp_file = (
162
+ str(Path(tempfile.gettempdir()).resolve()).replace("\\", "/")
163
+ + f"/trilogy_uv_run_{os.getpid()}.arrow"
164
+ )
165
+
166
+ def cleanup_temp_file() -> None:
167
+ try:
168
+ os.unlink(temp_file)
169
+ except OSError:
170
+ pass
171
+
172
+ atexit.register(cleanup_temp_file)
173
+ return f"""
174
+ INSTALL shellfs FROM community;
175
+ INSTALL arrow FROM community;
176
+ LOAD shellfs;
177
+ LOAD arrow;
178
+
179
+ SET VARIABLE __trilogy_uv_temp_file = '{temp_file}';
180
+
181
+ CREATE OR REPLACE MACRO uv_run(script, args := '') AS TABLE
182
+ WITH __build AS (
183
+ SELECT a.name
184
+ FROM read_json('uv run --quiet ' || script || ' ' || args || ' > {temp_file} && echo {{"name": "done"}} |') AS a
185
+ LIMIT 1
186
+ )
187
+ SELECT * FROM read_arrow(getvariable('__trilogy_uv_temp_file'));
188
+ """
189
+ else:
190
+ return """
191
+ INSTALL shellfs FROM community;
192
+ INSTALL arrow FROM community;
193
+ LOAD shellfs;
194
+ LOAD arrow;
195
+
196
+ CREATE OR REPLACE MACRO uv_run(script, args := '') AS TABLE
197
+ SELECT * FROM read_arrow('uv run --quiet ' || script || ' ' || args || ' |');
198
+ """
199
+ else:
200
+ # Use a subquery that throws an error when evaluated
201
+ # This ensures the error message is shown before column binding
202
+ return """
203
+ CREATE OR REPLACE MACRO uv_run(script, args := '') AS TABLE
204
+ SELECT * FROM (
205
+ SELECT CASE
206
+ WHEN true THEN error('Python script datasources require enable_python_datasources=True in DuckDBConfig. '
207
+ || 'Set this in your trilogy.conf under [engine.config] or pass DuckDBConfig(enable_python_datasources=True) to the executor.')
208
+ END as __error__
209
+ ) WHERE __error__ IS NOT NULL;
210
+ """
211
+
212
+
213
+ def get_gcs_setup_sql(enabled: bool) -> str:
214
+ """Return SQL to setup GCS extension with optional HMAC credentials.
215
+
216
+ Args:
217
+ enabled: If True, installs httpfs. If credentials are available,
218
+ also creates a secret for authenticated access.
219
+ If False, does nothing.
220
+
221
+ Environment variables (optional, required only for write access):
222
+ GOOGLE_HMAC_KEY: GCS HMAC access key ID
223
+ GOOGLE_HMAC_SECRET: GCS HMAC secret key
224
+ """
225
+ if not enabled:
226
+ return ""
227
+
228
+ key_id = environ.get("GOOGLE_HMAC_KEY")
229
+ secret = environ.get("GOOGLE_HMAC_SECRET")
230
+
231
+ # Always install httpfs for read access to public buckets
232
+ base_sql = """
233
+ INSTALL httpfs;
234
+ LOAD httpfs;
235
+ """
236
+
237
+ # If credentials are available, create a secret for authenticated access
238
+ if key_id and secret:
239
+ return (
240
+ base_sql
241
+ + f"""
242
+ CREATE OR REPLACE SECRET __trilogy_gcs_secret (
243
+ TYPE gcs,
244
+ KEY_ID '{key_id}',
245
+ SECRET '{secret}'
246
+ );
247
+ """
248
+ )
249
+ return base_sql
250
+
251
+
252
+ def check_gcs_write_credentials() -> None:
253
+ """Validate that GCS write credentials are available.
254
+
255
+ Raises ValueError if GOOGLE_HMAC_KEY and GOOGLE_HMAC_SECRET are not set.
256
+ Call this before attempting to write to GCS.
257
+ """
258
+ key_id = environ.get("GOOGLE_HMAC_KEY")
259
+ secret = environ.get("GOOGLE_HMAC_SECRET")
260
+
261
+ if not key_id or not secret:
262
+ raise ValueError(
263
+ "Writing to GCS requires GOOGLE_HMAC_KEY and GOOGLE_HMAC_SECRET "
264
+ "environment variables to be set"
265
+ )
266
+
267
+
268
+ DUCKDB_TEMPLATE = Template(
269
+ """{%- if output %}
270
+ {{output}}
271
+ {% endif %}{%- if ctes %}
272
+ WITH {% if recursive%}RECURSIVE{% endif %}{% for cte in ctes %}
273
+ {{cte.name}} as (
274
+ {{cte.statement}}){% if not loop.last %},{% else %}
275
+ {% endif %}{% endfor %}{% endif %}
276
+ {%- if full_select -%}
277
+ {{full_select}}
278
+ {%- else -%}{%- if comment -%}
279
+ -- {{ comment }}
280
+ {%- endif %}SELECT
281
+ {%- for select in select_columns %}
282
+ {{ select }}{% if not loop.last %},{% endif %}{% endfor %}
283
+ {% if base %}FROM
284
+ {{ base }}{% endif %}{% if joins %}
285
+ {%- for join in joins %}
286
+ {{ join }}{% endfor %}{% endif %}
287
+ {%- if where %}
288
+ WHERE
289
+ {{ where }}
290
+ {% endif -%}{%- if group_by %}
291
+ GROUP BY {% for group in group_by %}
292
+ {{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
293
+ HAVING
294
+ {{ having }}
295
+ {% endif %}{%- if order_by %}
296
+ ORDER BY {% for order in order_by %}
297
+ {{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
298
+ {%- if limit is not none %}
299
+ LIMIT ({{ limit }}){% endif %}{% endif %}
300
+ """
301
+ )
302
+
303
+
304
+ class DuckDBDialect(BaseDialect):
305
+ WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
306
+ FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
307
+ FUNCTION_GRAIN_MATCH_MAP = {
308
+ **BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
309
+ **FUNCTION_GRAIN_MATCH_MAP,
310
+ }
311
+ DATATYPE_MAP = {**BaseDialect.DATATYPE_MAP, **DATATYPE_MAP}
312
+ QUOTE_CHARACTER = '"'
313
+ SQL_TEMPLATE = DUCKDB_TEMPLATE
314
+ UNNEST_MODE = UnnestMode.DIRECT
315
+ NULL_WRAPPER = staticmethod(null_wrapper)
316
+ TABLE_NOT_FOUND_PATTERN = "Catalog Error: Table with name"
317
+ HTTP_NOT_FOUND_PATTERN = "404 (Not Found)"
318
+
319
+ def render_source(self, address: Address) -> str:
320
+ if address.type == AddressType.CSV:
321
+ return f"read_csv('{address.location}')"
322
+ if address.type == AddressType.TSV:
323
+ return f"read_csv('{address.location}', delim='\\t')"
324
+ if address.type == AddressType.PARQUET:
325
+ return f"read_parquet('{address.location}')"
326
+ if address.type == AddressType.PYTHON_SCRIPT:
327
+ from trilogy.dialect.config import DuckDBConfig
328
+
329
+ if not (
330
+ isinstance(self.config, DuckDBConfig)
331
+ and self.config.enable_python_datasources
332
+ ):
333
+ raise ValueError(
334
+ "Python script datasources require enable_python_datasources=True in DuckDBConfig. "
335
+ "Set this in your trilogy.conf under [engine.config] or pass "
336
+ "DuckDBConfig(enable_python_datasources=True) to the executor."
337
+ )
338
+ return f"uv_run('{address.location}')"
339
+ if address.type == AddressType.SQL:
340
+ with open(address.location, "r") as f:
341
+ sql_content = f.read().strip()
342
+ return f"({sql_content})"
343
+ return super().render_source(address)
344
+
345
+ def get_table_schema(
346
+ self, executor, table_name: str, schema: str | None = None
347
+ ) -> list[tuple]:
348
+ """Returns a list of tuples: (column_name, data_type, is_nullable, column_comment)."""
349
+ column_query = """
350
+ SELECT
351
+ column_name,
352
+ data_type,
353
+ is_nullable,
354
+ column_comment
355
+ FROM information_schema.columns
356
+ WHERE table_name = ?
357
+ """
358
+ params = [table_name]
359
+
360
+ if schema:
361
+ column_query += " AND table_schema = ?"
362
+ params.append(schema)
363
+
364
+ column_query += " ORDER BY ordinal_position"
365
+
366
+ # DuckDB supports parameterized queries
367
+ rows = executor.execute_raw_sql(
368
+ column_query.replace("?", "'{}'").format(*params)
369
+ ).fetchall()
370
+ return rows
371
+
372
+ def get_table_primary_keys(
373
+ self, executor, table_name: str, schema: str | None = None
374
+ ) -> list[str]:
375
+ """Get primary key columns by joining key_column_usage with table_constraints."""
376
+ pk_query = """
377
+ SELECT kcu.column_name
378
+ FROM information_schema.key_column_usage kcu
379
+ JOIN information_schema.table_constraints tc
380
+ ON kcu.constraint_name = tc.constraint_name
381
+ AND kcu.table_name = tc.table_name
382
+ WHERE kcu.table_name = '{}'
383
+ AND tc.constraint_type = 'PRIMARY KEY'
384
+ """.format(
385
+ table_name
386
+ )
387
+
388
+ if schema:
389
+ pk_query += " AND kcu.table_schema = '{}'".format(schema)
390
+
391
+ pk_query += " ORDER BY kcu.ordinal_position"
392
+
393
+ rows = executor.execute_raw_sql(pk_query).fetchall()
394
+ if rows:
395
+ return [row[0] for row in rows]
396
+
397
+ return []
@@ -0,0 +1,151 @@
1
+ from enum import Enum
2
+ from typing import TYPE_CHECKING, Callable, List, Optional
3
+
4
+ from trilogy.core.models.environment import Environment
5
+
6
+ if TYPE_CHECKING:
7
+ from trilogy import Executor
8
+ from trilogy.hooks.base_hook import BaseHook
9
+
10
+ from trilogy.constants import Rendering, logger
11
+ from trilogy.dialect.config import DialectConfig, DuckDBConfig
12
+
13
+
14
+ def default_factory(conf: DialectConfig, config_type):
15
+ from sqlalchemy import create_engine
16
+ from sqlalchemy.pool import NullPool
17
+
18
+ engine_args = {
19
+ "future": True,
20
+ "poolclass": NullPool,
21
+ }
22
+ # the DuckDB IdentifierPreparer uses a global connection that is not thread safe
23
+ if isinstance(conf, DuckDBConfig):
24
+ # we monkey patch to parent to avoid this
25
+ from duckdb_engine import DuckDBIdentifierPreparer, PGIdentifierPreparer
26
+
27
+ DuckDBIdentifierPreparer.__init__ = PGIdentifierPreparer.__init__ # type: ignore
28
+ engine_args["isolation_level"] = "AUTOCOMMIT"
29
+
30
+ if not isinstance(conf, config_type):
31
+ raise TypeError(
32
+ f"Invalid dialect configuration for type {type(config_type).__name__}, is {type(conf)}"
33
+ )
34
+ connect_args = conf.create_connect_args()
35
+ if connect_args:
36
+ engine_args["connect_args"] = connect_args
37
+ return create_engine(conf.connection_string(), **engine_args)
38
+
39
+
40
+ class Dialects(Enum):
41
+ BIGQUERY = "bigquery"
42
+ SQL_SERVER = "sql_server"
43
+ DUCK_DB = "duck_db"
44
+ PRESTO = "presto"
45
+ TRINO = "trino"
46
+ POSTGRES = "postgres"
47
+ SNOWFLAKE = "snowflake"
48
+ DATAFRAME = "dataframe"
49
+
50
+ @classmethod
51
+ def _missing_(cls, value):
52
+ if value == "duckdb":
53
+ return cls.DUCK_DB
54
+ return super()._missing_(value)
55
+
56
+ def default_renderer(self, conf=None, _engine_factory: Callable = default_factory):
57
+ from trilogy.render import get_dialect_generator
58
+
59
+ return get_dialect_generator(self)
60
+
61
+ def default_engine(self, conf=None, _engine_factory: Callable = default_factory):
62
+ if self == Dialects.BIGQUERY:
63
+ from google.auth import default
64
+ from google.cloud import bigquery
65
+
66
+ from trilogy.dialect.config import BigQueryConfig
67
+
68
+ credentials, project = default()
69
+ client = bigquery.Client(credentials=credentials, project=project)
70
+ conf = conf or BigQueryConfig(project=project, client=client)
71
+ return _engine_factory(
72
+ conf,
73
+ BigQueryConfig,
74
+ )
75
+ elif self == Dialects.SQL_SERVER:
76
+ raise NotImplementedError()
77
+ elif self == Dialects.DUCK_DB:
78
+ from trilogy.dialect.config import DuckDBConfig
79
+
80
+ if not conf:
81
+ conf = DuckDBConfig()
82
+ return _engine_factory(conf, DuckDBConfig)
83
+ elif self == Dialects.SNOWFLAKE:
84
+ from trilogy.dialect.config import SnowflakeConfig
85
+
86
+ return _engine_factory(conf, SnowflakeConfig)
87
+ elif self == Dialects.POSTGRES:
88
+ logger.warn(
89
+ "WARN: Using experimental postgres dialect. Most functionality will not work."
90
+ )
91
+ import importlib
92
+
93
+ spec = importlib.util.find_spec("psycopg2")
94
+ if spec is None:
95
+ raise ImportError(
96
+ "postgres driver not installed. python -m pip install pypreql[postgres]"
97
+ )
98
+ from trilogy.dialect.config import PostgresConfig
99
+
100
+ return _engine_factory(conf, PostgresConfig)
101
+ elif self == Dialects.PRESTO:
102
+ from trilogy.dialect.config import PrestoConfig
103
+
104
+ return _engine_factory(conf, PrestoConfig)
105
+ elif self == Dialects.TRINO:
106
+ from trilogy.dialect.config import TrinoConfig
107
+
108
+ return _engine_factory(conf, TrinoConfig)
109
+ elif self == Dialects.DATAFRAME:
110
+ from trilogy.dialect.config import DataFrameConfig
111
+ from trilogy.dialect.dataframe import DataframeConnectionWrapper
112
+
113
+ if not conf:
114
+ conf = DataFrameConfig(dataframes={})
115
+
116
+ base = _engine_factory(conf, DataFrameConfig)
117
+
118
+ return DataframeConnectionWrapper(base, dataframes=conf.dataframes)
119
+ else:
120
+ raise ValueError(
121
+ f"Unsupported dialect {self} for default engine creation; create one explicitly."
122
+ )
123
+
124
+ def default_executor(
125
+ self,
126
+ environment: Optional["Environment"] = None,
127
+ hooks: List["BaseHook"] | None = None,
128
+ conf: DialectConfig | None = None,
129
+ rendering: Rendering | None = None,
130
+ _engine_factory: Callable | None = None,
131
+ ) -> "Executor":
132
+ from trilogy import Executor
133
+
134
+ if _engine_factory is not None:
135
+ return Executor(
136
+ engine=self.default_engine(conf=conf, _engine_factory=_engine_factory),
137
+ environment=environment or Environment(),
138
+ dialect=self,
139
+ rendering=rendering,
140
+ hooks=hooks,
141
+ config=conf,
142
+ )
143
+
144
+ return Executor(
145
+ engine=self.default_engine(conf=conf),
146
+ environment=environment or Environment(),
147
+ dialect=self,
148
+ rendering=rendering,
149
+ hooks=hooks,
150
+ config=conf,
151
+ )