pytrilogy 0.3.149__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp313-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.149.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.149.dist-info/RECORD +207 -0
  6. pytrilogy-0.3.149.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.149.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.149.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2670 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +436 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +846 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1432 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +397 -0
  117. trilogy/dialect/enums.py +151 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/exceptions.py +26 -0
  130. trilogy/execution/state/file_state_store.py +0 -0
  131. trilogy/execution/state/sqllite_state_store.py +0 -0
  132. trilogy/execution/state/state_store.py +406 -0
  133. trilogy/executor.py +692 -0
  134. trilogy/hooks/__init__.py +4 -0
  135. trilogy/hooks/base_hook.py +40 -0
  136. trilogy/hooks/graph_hook.py +135 -0
  137. trilogy/hooks/query_debugger.py +166 -0
  138. trilogy/metadata/__init__.py +0 -0
  139. trilogy/parser.py +10 -0
  140. trilogy/parsing/README.md +21 -0
  141. trilogy/parsing/__init__.py +0 -0
  142. trilogy/parsing/common.py +1069 -0
  143. trilogy/parsing/config.py +5 -0
  144. trilogy/parsing/exceptions.py +8 -0
  145. trilogy/parsing/helpers.py +1 -0
  146. trilogy/parsing/parse_engine.py +2876 -0
  147. trilogy/parsing/render.py +775 -0
  148. trilogy/parsing/trilogy.lark +546 -0
  149. trilogy/py.typed +0 -0
  150. trilogy/render.py +45 -0
  151. trilogy/scripts/README.md +9 -0
  152. trilogy/scripts/__init__.py +0 -0
  153. trilogy/scripts/agent.py +41 -0
  154. trilogy/scripts/agent_info.py +306 -0
  155. trilogy/scripts/common.py +432 -0
  156. trilogy/scripts/dependency/Cargo.lock +617 -0
  157. trilogy/scripts/dependency/Cargo.toml +39 -0
  158. trilogy/scripts/dependency/README.md +131 -0
  159. trilogy/scripts/dependency/build.sh +25 -0
  160. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  161. trilogy/scripts/dependency/src/lib.rs +16 -0
  162. trilogy/scripts/dependency/src/main.rs +770 -0
  163. trilogy/scripts/dependency/src/parser.rs +435 -0
  164. trilogy/scripts/dependency/src/preql.pest +208 -0
  165. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  166. trilogy/scripts/dependency/src/resolver.rs +716 -0
  167. trilogy/scripts/dependency/tests/base.preql +3 -0
  168. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  169. trilogy/scripts/dependency/tests/customer.preql +6 -0
  170. trilogy/scripts/dependency/tests/main.preql +9 -0
  171. trilogy/scripts/dependency/tests/orders.preql +7 -0
  172. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  173. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  174. trilogy/scripts/dependency.py +323 -0
  175. trilogy/scripts/display.py +555 -0
  176. trilogy/scripts/environment.py +59 -0
  177. trilogy/scripts/fmt.py +32 -0
  178. trilogy/scripts/ingest.py +487 -0
  179. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  180. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  181. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  182. trilogy/scripts/ingest_helpers/typing.py +161 -0
  183. trilogy/scripts/init.py +105 -0
  184. trilogy/scripts/parallel_execution.py +762 -0
  185. trilogy/scripts/plan.py +189 -0
  186. trilogy/scripts/refresh.py +161 -0
  187. trilogy/scripts/run.py +79 -0
  188. trilogy/scripts/serve.py +202 -0
  189. trilogy/scripts/serve_helpers/__init__.py +41 -0
  190. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  191. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  192. trilogy/scripts/serve_helpers/models.py +38 -0
  193. trilogy/scripts/single_execution.py +131 -0
  194. trilogy/scripts/testing.py +143 -0
  195. trilogy/scripts/trilogy.py +75 -0
  196. trilogy/std/__init__.py +0 -0
  197. trilogy/std/color.preql +3 -0
  198. trilogy/std/date.preql +13 -0
  199. trilogy/std/display.preql +18 -0
  200. trilogy/std/geography.preql +22 -0
  201. trilogy/std/metric.preql +15 -0
  202. trilogy/std/money.preql +67 -0
  203. trilogy/std/net.preql +14 -0
  204. trilogy/std/ranking.preql +7 -0
  205. trilogy/std/report.preql +5 -0
  206. trilogy/std/semantic.preql +6 -0
  207. trilogy/utility.py +34 -0
trilogy/executor.py ADDED
@@ -0,0 +1,692 @@
1
+ from functools import singledispatchmethod
2
+ from pathlib import Path
3
+ from typing import Any, Generator, List, Optional
4
+
5
+ from sqlalchemy import text
6
+
7
+ from trilogy.constants import MagicConstants, Rendering, logger
8
+ from trilogy.core.enums import (
9
+ AddressType,
10
+ CreateMode,
11
+ FunctionType,
12
+ Granularity,
13
+ IOType,
14
+ PersistMode,
15
+ ValidationScope,
16
+ )
17
+ from trilogy.core.models.author import Comment, Concept, Function
18
+ from trilogy.core.models.build import BuildFunction
19
+ from trilogy.core.models.core import ListWrapper, MapWrapper
20
+ from trilogy.core.models.datasource import Address, Datasource, UpdateKeys
21
+ from trilogy.core.models.environment import Environment
22
+ from trilogy.core.statements.author import (
23
+ STATEMENT_TYPES,
24
+ ConceptDeclarationStatement,
25
+ CopyStatement,
26
+ CreateStatement,
27
+ ImportStatement,
28
+ MergeStatementV2,
29
+ MockStatement,
30
+ MultiSelectStatement,
31
+ PersistStatement,
32
+ PublishStatement,
33
+ RawSQLStatement,
34
+ SelectStatement,
35
+ ShowStatement,
36
+ ValidateStatement,
37
+ )
38
+ from trilogy.core.statements.execute import (
39
+ PROCESSED_STATEMENT_TYPES,
40
+ ProcessedCopyStatement,
41
+ ProcessedCreateStatement,
42
+ ProcessedMockStatement,
43
+ ProcessedPublishStatement,
44
+ ProcessedQuery,
45
+ ProcessedQueryPersist,
46
+ ProcessedRawSQLStatement,
47
+ ProcessedShowStatement,
48
+ ProcessedValidateStatement,
49
+ )
50
+ from trilogy.core.validation.common import (
51
+ ValidationTest,
52
+ )
53
+ from trilogy.dialect.base import BaseDialect
54
+ from trilogy.dialect.config import DialectConfig
55
+ from trilogy.dialect.enums import Dialects
56
+ from trilogy.dialect.metadata import (
57
+ generate_result_set,
58
+ handle_concept_declaration,
59
+ handle_datasource,
60
+ handle_import_statement,
61
+ handle_merge_statement,
62
+ handle_processed_show_statement,
63
+ handle_processed_validate_statement,
64
+ handle_publish_statement,
65
+ handle_show_statement_outputs,
66
+ )
67
+ from trilogy.dialect.mock import handle_processed_mock_statement
68
+ from trilogy.engine import EngineConnection, ExecutionEngine, ResultProtocol
69
+ from trilogy.hooks.base_hook import BaseHook
70
+ from trilogy.parser import parse_text
71
+ from trilogy.render import get_dialect_generator
72
+
73
+
74
+ class Executor(object):
75
+ def __init__(
76
+ self,
77
+ dialect: Dialects,
78
+ engine: ExecutionEngine,
79
+ environment: Optional[Environment] = None,
80
+ rendering: Rendering | None = None,
81
+ hooks: List[BaseHook] | None = None,
82
+ config: DialectConfig | None = None,
83
+ ):
84
+ self.dialect: Dialects = dialect
85
+ self.engine = engine
86
+ self.environment = environment or Environment()
87
+ self.generator: BaseDialect
88
+ self.logger = logger
89
+ self.hooks = hooks
90
+ self.config = config
91
+ self.generator = get_dialect_generator(self.dialect, rendering, config)
92
+ self.connection = self.connect()
93
+ # TODO: make generic
94
+ if self.dialect == Dialects.DATAFRAME:
95
+ self.engine.setup(self.environment, self.connection)
96
+ # Setup DuckDB extensions
97
+ if self.dialect == Dialects.DUCK_DB:
98
+ self._setup_duckdb_python_datasources()
99
+ self._setup_duckdb_gcs()
100
+
101
+ def connect(self) -> EngineConnection:
102
+ self.connection = self.engine.connect()
103
+ self.connected = True
104
+ return self.connection
105
+
106
+ def _setup_duckdb_python_datasources(self) -> None:
107
+ """Setup DuckDB macro for Python script datasources."""
108
+ import sys
109
+
110
+ from trilogy.dialect.config import DuckDBConfig
111
+ from trilogy.dialect.duckdb import get_python_datasource_setup_sql
112
+
113
+ enabled = (
114
+ isinstance(self.config, DuckDBConfig)
115
+ and self.config.enable_python_datasources
116
+ )
117
+ is_windows = sys.platform == "win32"
118
+ self.execute_raw_sql(get_python_datasource_setup_sql(enabled, is_windows))
119
+ self.connection.commit()
120
+
121
+ def _setup_duckdb_gcs(self) -> None:
122
+ """Setup DuckDB GCS extension with application default credentials."""
123
+ from trilogy.dialect.config import DuckDBConfig
124
+ from trilogy.dialect.duckdb import get_gcs_setup_sql
125
+
126
+ enabled = isinstance(self.config, DuckDBConfig) and self.config.enable_gcs
127
+ if not enabled:
128
+ return
129
+ sql = get_gcs_setup_sql(enabled)
130
+ if sql:
131
+ self.execute_raw_sql(sql)
132
+ self.connection.commit()
133
+
134
+ def close(self):
135
+ self.engine.dispose(close=True)
136
+ if self.dialect == Dialects.DUCK_DB:
137
+ import gc
138
+
139
+ gc.collect()
140
+ self.connected = False
141
+
142
+ def update_datasource(
143
+ self, datasource: Datasource, keys: UpdateKeys | None = None
144
+ ) -> None:
145
+ """Update a datasource with optional filtering based on update keys.
146
+
147
+ Args:
148
+ datasource: The datasource to update
149
+ keys: Optional UpdateKeys specifying incremental filters
150
+ """
151
+ where = keys.to_where_clause(self.environment) if keys else None
152
+ # Skip CREATE for file-backed datasources (parquet, csv, etc.) - the file is the source
153
+ is_file_backed = (
154
+ isinstance(datasource.address, Address) and datasource.address.is_file
155
+ )
156
+ if not is_file_backed:
157
+ create_stmt = CreateStatement(
158
+ scope=ValidationScope.DATASOURCES,
159
+ create_mode=CreateMode.CREATE_IF_NOT_EXISTS,
160
+ targets=[datasource.name],
161
+ )
162
+ self.execute_statement(create_stmt)
163
+ select_stmt = datasource.create_update_statement(
164
+ self.environment, where, line_no=None
165
+ )
166
+ statement = PersistStatement(
167
+ datasource=datasource,
168
+ select=select_stmt,
169
+ )
170
+ self.execute_statement(statement)
171
+
172
+ def execute_statement(
173
+ self,
174
+ statement: PROCESSED_STATEMENT_TYPES | STATEMENT_TYPES,
175
+ ) -> Optional[ResultProtocol]:
176
+ if isinstance(statement, STATEMENT_TYPES):
177
+ generate = self.generator.generate_queries(
178
+ self.environment, [statement], hooks=self.hooks # type: ignore[list-item]
179
+ )
180
+ if not generate:
181
+ return None
182
+ statement = generate[0]
183
+
184
+ if not isinstance(statement, PROCESSED_STATEMENT_TYPES):
185
+ return None
186
+
187
+ return self.execute_query(statement)
188
+
189
+ @singledispatchmethod
190
+ def execute_query(self, query) -> ResultProtocol | None:
191
+ raise NotImplementedError("Cannot execute type {}".format(type(query)))
192
+
193
+ @execute_query.register
194
+ def _(self, query: Comment) -> ResultProtocol | None:
195
+ return None
196
+
197
+ @execute_query.register
198
+ def _(self, query: ConceptDeclarationStatement) -> ResultProtocol | None:
199
+ return handle_concept_declaration(query)
200
+
201
+ @execute_query.register
202
+ def _(self, query: Datasource) -> ResultProtocol | None:
203
+ return handle_datasource(query)
204
+
205
+ @execute_query.register
206
+ def _(self, query: str) -> ResultProtocol | None:
207
+ results = self.execute_text(query)
208
+ if results:
209
+ return results[-1]
210
+ return None
211
+
212
+ @execute_query.register
213
+ def _(self, query: SelectStatement) -> ResultProtocol | None:
214
+ sql = self.generator.generate_queries(
215
+ self.environment, [query], hooks=self.hooks
216
+ )
217
+ return self.execute_query(sql[0])
218
+
219
+ @execute_query.register
220
+ def _(self, query: PersistStatement) -> ResultProtocol | None:
221
+ sql = self.generator.generate_queries(
222
+ self.environment, [query], hooks=self.hooks
223
+ )
224
+ return self.execute_query(sql[0])
225
+
226
+ @execute_query.register
227
+ def _(self, query: RawSQLStatement) -> ResultProtocol | None:
228
+ return self.execute_raw_sql(query.text)
229
+
230
+ @execute_query.register
231
+ def _(self, query: ShowStatement) -> ResultProtocol | None:
232
+ sql = self.generator.generate_queries(
233
+ self.environment, [query], hooks=self.hooks
234
+ )
235
+ return self.execute_query(sql[0])
236
+
237
+ @execute_query.register
238
+ def _(self, query: ProcessedShowStatement) -> ResultProtocol | None:
239
+ return handle_processed_show_statement(
240
+ query,
241
+ [
242
+ self.generator.compile_statement(x)
243
+ for x in query.output_values
244
+ if isinstance(x, (ProcessedQuery, ProcessedQueryPersist))
245
+ ],
246
+ )
247
+
248
+ @execute_query.register
249
+ def _(self, query: ProcessedValidateStatement) -> ResultProtocol | None:
250
+ return handle_processed_validate_statement(
251
+ query, self.generator, self.validate_environment
252
+ )
253
+
254
+ @execute_query.register
255
+ def _(self, query: ProcessedMockStatement) -> ResultProtocol | None:
256
+
257
+ return handle_processed_mock_statement(query, self.environment, self)
258
+
259
+ @execute_query.register
260
+ def _(self, query: ProcessedCreateStatement) -> ResultProtocol | None:
261
+ sql = self.generator.compile_statement(query)
262
+ output = self.execute_raw_sql(sql)
263
+ return output
264
+
265
+ @execute_query.register
266
+ def _(self, query: ProcessedPublishStatement) -> ResultProtocol | None:
267
+ return handle_publish_statement(query, self.environment)
268
+
269
+ @execute_query.register
270
+ def _(self, query: ImportStatement) -> ResultProtocol | None:
271
+ return handle_import_statement(query)
272
+
273
+ @execute_query.register
274
+ def _(self, query: MergeStatementV2) -> ResultProtocol | None:
275
+ return handle_merge_statement(query, self.environment)
276
+
277
+ @execute_query.register
278
+ def _(self, query: ProcessedRawSQLStatement) -> ResultProtocol | None:
279
+ return self.execute_raw_sql(query.text)
280
+
281
+ @execute_query.register
282
+ def _(self, query: ProcessedQuery) -> ResultProtocol | None:
283
+ sql = self.generator.compile_statement(query)
284
+ output = self.execute_raw_sql(sql, local_concepts=query.local_concepts)
285
+ return output
286
+
287
+ def _address_type_to_io_type(self, addr_type: AddressType) -> IOType:
288
+ if addr_type == AddressType.PARQUET:
289
+ return IOType.PARQUET
290
+ elif addr_type == AddressType.CSV:
291
+ return IOType.CSV
292
+ raise NotImplementedError(f"File persist not supported for type {addr_type}")
293
+
294
+ @execute_query.register
295
+ def _(self, query: ProcessedQueryPersist) -> ResultProtocol | None:
296
+ # Check if target is a file - convert to CopyStatement
297
+ addr = query.output_to.address
298
+ if addr.is_file:
299
+ io_type = self._address_type_to_io_type(addr.type)
300
+ # Build column alias mapping from datasource columns
301
+ column_aliases: dict[str, str] = {}
302
+ for col in query.datasource.columns:
303
+ if col.is_concrete and isinstance(col.alias, str):
304
+ column_aliases[col.concept.address] = col.alias
305
+ copy_statement = ProcessedCopyStatement(
306
+ output_columns=query.output_columns,
307
+ ctes=query.ctes,
308
+ base=query.base,
309
+ hidden_columns=query.hidden_columns,
310
+ limit=query.limit,
311
+ order_by=query.order_by,
312
+ local_concepts=query.local_concepts,
313
+ locally_derived=query.locally_derived,
314
+ target=addr.location,
315
+ target_type=io_type,
316
+ column_aliases=column_aliases,
317
+ )
318
+ self.execute_query(copy_statement)
319
+ if query.persist_mode == PersistMode.OVERWRITE:
320
+ self.environment.add_datasource(query.datasource)
321
+ return None
322
+
323
+ sql = self.generator.compile_statement(query)
324
+ output = self.execute_raw_sql(sql, local_concepts=query.local_concepts)
325
+
326
+ if query.persist_mode == PersistMode.OVERWRITE:
327
+ self.environment.add_datasource(query.datasource)
328
+ return output
329
+
330
+ def _build_aliased_copy_sql(self, query: ProcessedCopyStatement) -> str:
331
+ """Build SQL with column aliases for file output."""
332
+ base_sql = self.generator.compile_statement(query)
333
+ if not query.column_aliases:
334
+ return base_sql
335
+ quote = self.generator.QUOTE_CHARACTER
336
+ alias_clauses = []
337
+ for col in query.output_columns:
338
+ target_name = query.column_aliases.get(col.address)
339
+ if target_name:
340
+ alias_clauses.append(
341
+ f"{quote}{col.safe_address}{quote} as {quote}{target_name}{quote}"
342
+ )
343
+ else:
344
+ alias_clauses.append(f"{quote}{col.safe_address}{quote}")
345
+ select_clause = ", ".join(alias_clauses)
346
+ return f"SELECT {select_clause} FROM ({base_sql}) as _copy_source"
347
+
348
+ @execute_query.register
349
+ def _(self, query: ProcessedCopyStatement) -> ResultProtocol | None:
350
+ sql = self._build_aliased_copy_sql(query)
351
+ if self.dialect == Dialects.DUCK_DB:
352
+ # Check for GCS write credentials if target is a GCS path
353
+ if query.target.startswith("gcs://") or query.target.startswith("gs://"):
354
+ from trilogy.dialect.duckdb import check_gcs_write_credentials
355
+
356
+ check_gcs_write_credentials()
357
+
358
+ if query.target_type == IOType.PARQUET:
359
+ copy_sql = f"COPY ({sql}) TO '{query.target}' (FORMAT PARQUET)"
360
+ elif query.target_type == IOType.CSV:
361
+ copy_sql = f"COPY ({sql}) TO '{query.target}' (FORMAT CSV, HEADER)"
362
+ else:
363
+ raise NotImplementedError(f"Unsupported IO Type {query.target_type}")
364
+ self.execute_raw_sql(copy_sql, local_concepts=query.local_concepts)
365
+ else:
366
+ raise NotImplementedError(
367
+ f"COPY statement not supported for dialect {self.dialect}"
368
+ )
369
+ return generate_result_set(
370
+ query.output_columns,
371
+ [self.generator.compile_statement(query)],
372
+ )
373
+
374
+ @singledispatchmethod
375
+ def generate_sql(self, command) -> list[str]:
376
+ raise NotImplementedError(
377
+ "Cannot generate sql for type {}".format(type(command))
378
+ )
379
+
380
+ @generate_sql.register # type: ignore
381
+ def _(self, command: ProcessedQuery) -> list[str]:
382
+ output = []
383
+ compiled_sql = self.generator.compile_statement(command)
384
+ output.append(compiled_sql)
385
+ return output
386
+
387
+ @generate_sql.register
388
+ def _(self, command: ProcessedShowStatement) -> list[str]:
389
+ output = []
390
+ for statement in command.output_values:
391
+ if isinstance(statement, (ProcessedQuery, ProcessedQueryPersist)):
392
+ compiled_sql = self.generator.compile_statement(statement)
393
+ output.append(compiled_sql)
394
+ return output
395
+
396
+ @generate_sql.register # type: ignore
397
+ def _(self, command: MultiSelectStatement) -> list[str]:
398
+ output = []
399
+ sql = self.generator.generate_queries(
400
+ self.environment, [command], hooks=self.hooks
401
+ )
402
+ for statement in sql:
403
+ compiled_sql = self.generator.compile_statement(statement)
404
+ output.append(compiled_sql)
405
+ return output
406
+
407
+ @generate_sql.register
408
+ def _(self, command: SelectStatement) -> list[str]:
409
+ output = []
410
+ sql = self.generator.generate_queries(
411
+ self.environment, [command], hooks=self.hooks
412
+ )
413
+ for statement in sql:
414
+ compiled_sql = self.generator.compile_statement(statement)
415
+ output.append(compiled_sql)
416
+ return output
417
+
418
+ @generate_sql.register
419
+ def _(self, command: ProcessedCreateStatement) -> list[str]:
420
+ output = []
421
+ compiled_sql = self.generator.compile_statement(command)
422
+ output.append(compiled_sql)
423
+ return output
424
+
425
+ @generate_sql.register
426
+ def _(self, command: ProcessedPublishStatement) -> list[str]:
427
+ output = []
428
+ compiled_sql = self.generator.compile_statement(command)
429
+ output.append(compiled_sql)
430
+ return output
431
+
432
+ @generate_sql.register
433
+ def _(self, command: str) -> list[str]:
434
+ _, parsed = parse_text(command, self.environment)
435
+ generatable = [
436
+ x
437
+ for x in parsed
438
+ if isinstance(x, (SelectStatement, PersistStatement, MultiSelectStatement))
439
+ ]
440
+ sql = self.generator.generate_queries(
441
+ self.environment, generatable, hooks=self.hooks
442
+ )
443
+ output = []
444
+ for statement in sql:
445
+ if isinstance(statement, ProcessedShowStatement):
446
+ continue
447
+ compiled_sql = self.generator.compile_statement(statement)
448
+ output.append(compiled_sql)
449
+ return output
450
+
451
+ def parse_file(
452
+ self, file: str | Path, persist: bool = False
453
+ ) -> list[PROCESSED_STATEMENT_TYPES]:
454
+ return list(self.parse_file_generator(file, persist=persist))
455
+
456
+ def parse_file_generator(
457
+ self, file: str | Path, persist: bool = False
458
+ ) -> Generator[
459
+ PROCESSED_STATEMENT_TYPES,
460
+ None,
461
+ None,
462
+ ]:
463
+ file = Path(file)
464
+ candidates = [file, self.environment.working_path / file]
465
+ err = None
466
+ for file in candidates:
467
+ try:
468
+ with open(file, "r") as f:
469
+ command = f.read()
470
+ return self.parse_text_generator(
471
+ command, persist=persist, root=file
472
+ )
473
+ except FileNotFoundError as e:
474
+ if not err:
475
+ err = e
476
+ continue
477
+ if err:
478
+ raise err
479
+ raise FileNotFoundError(f"File {file} not found")
480
+
481
+ def parse_text(
482
+ self, command: str, persist: bool = False, root: Path | None = None
483
+ ) -> List[PROCESSED_STATEMENT_TYPES]:
484
+ return list(self.parse_text_generator(command, persist=persist, root=root))
485
+
486
+ def parse_text_generator(
487
+ self, command: str, persist: bool = False, root: Path | None = None
488
+ ) -> Generator[
489
+ PROCESSED_STATEMENT_TYPES,
490
+ None,
491
+ None,
492
+ ]:
493
+ """Process a preql text command"""
494
+ _, parsed = parse_text(command, self.environment, root=root)
495
+ generatable = [
496
+ x
497
+ for x in parsed
498
+ if isinstance(
499
+ x,
500
+ (
501
+ SelectStatement,
502
+ PersistStatement,
503
+ MultiSelectStatement,
504
+ ShowStatement,
505
+ RawSQLStatement,
506
+ CopyStatement,
507
+ ValidateStatement,
508
+ CreateStatement,
509
+ PublishStatement,
510
+ MockStatement,
511
+ ),
512
+ )
513
+ ]
514
+ while generatable:
515
+ t = generatable.pop(0)
516
+ x = self.generator.generate_queries(
517
+ self.environment, [t], hooks=self.hooks
518
+ )[0]
519
+
520
+ yield x
521
+
522
+ if persist and isinstance(x, ProcessedQueryPersist):
523
+ self.environment.add_datasource(x.datasource)
524
+
525
+ def _atom_to_value(self, val: Any) -> Any:
526
+ if val == MagicConstants.NULL:
527
+ return None
528
+ return val
529
+
530
+ def _concept_to_value(
531
+ self,
532
+ concept: Concept,
533
+ local_concepts: dict[str, Concept] | None = None,
534
+ ) -> Any:
535
+ if not concept.granularity == Granularity.SINGLE_ROW:
536
+ raise SyntaxError(
537
+ f"Cannot bind non-singleton concept {concept.address} ({concept.granularity}) to a parameter."
538
+ )
539
+ # TODO: to get rid of function here - need to figure out why it's getting passed in
540
+ if (
541
+ isinstance(concept.lineage, (BuildFunction, Function))
542
+ and concept.lineage.operator == FunctionType.CONSTANT
543
+ ):
544
+ rval = concept.lineage.arguments[0]
545
+ if isinstance(rval, ListWrapper):
546
+ return [self._atom_to_value(x) for x in rval]
547
+ if isinstance(rval, MapWrapper):
548
+ # duckdb expects maps in this format as variables
549
+ if self.dialect == Dialects.DUCK_DB:
550
+ return {
551
+ "key": [self._atom_to_value(x) for x in rval],
552
+ "value": [self._atom_to_value(rval[x]) for x in rval],
553
+ }
554
+ return {k: self._atom_to_value(v) for k, v in rval.items()}
555
+ # if isinstance(rval, ConceptRef):
556
+ # return self._concept_to_value(self.environment.concepts[rval.address], local_concepts=local_concepts)
557
+ return rval
558
+ else:
559
+ results = self.execute_query(f"select {concept.name} limit 1;")
560
+ if results:
561
+ fetcher = results.fetchone()
562
+ if fetcher:
563
+ return fetcher[0]
564
+ return None
565
+
566
+ def _hydrate_param(
567
+ self, param: str, local_concepts: dict[str, Concept] | None = None
568
+ ) -> Any:
569
+ matched = [
570
+ v
571
+ for v in self.environment.concepts.values()
572
+ if v.safe_address == param or v.address == param
573
+ ]
574
+ if local_concepts and not matched:
575
+ matched = [
576
+ v
577
+ for v in local_concepts.values()
578
+ if v.safe_address == param or v.address == param
579
+ ]
580
+ if not matched:
581
+ raise SyntaxError(f"No concept found for parameter {param};")
582
+
583
+ concept: Concept = matched.pop()
584
+ return self._concept_to_value(concept, local_concepts=local_concepts)
585
+
586
+ def execute_raw_sql(
587
+ self,
588
+ command: str | Path,
589
+ variables: dict | None = None,
590
+ local_concepts: dict[str, Concept] | None = None,
591
+ ) -> ResultProtocol:
592
+ """Run a command against the raw underlying
593
+ execution engine."""
594
+ final_params = None
595
+ if isinstance(command, Path):
596
+ with open(command, "r") as f:
597
+ command = f.read()
598
+ q = text(command)
599
+ if variables:
600
+ final_params = variables
601
+ else:
602
+ params = q.compile().params
603
+ if params:
604
+ final_params = {
605
+ x: self._hydrate_param(x, local_concepts=local_concepts)
606
+ for x in params
607
+ }
608
+
609
+ if final_params:
610
+ output = self.connection.execute(text(command), final_params)
611
+ else:
612
+ output = self.connection.execute(text(command))
613
+ # self.connection.commit()
614
+ return output
615
+
616
+ def execute_text(
617
+ self, command: str, non_interactive: bool = False
618
+ ) -> List[ResultProtocol]:
619
+ if not self.connected:
620
+ self.connect()
621
+
622
+ """Run a trilogy query expressed as text."""
623
+ output: list[ResultProtocol] = []
624
+ # connection = self.engine.connect()
625
+ for statement in self.parse_text_generator(command):
626
+ if isinstance(statement, ProcessedShowStatement):
627
+ results = handle_show_statement_outputs(
628
+ statement,
629
+ [
630
+ self.generator.compile_statement(x)
631
+ for x in statement.output_values
632
+ if isinstance(x, (ProcessedQuery, ProcessedQueryPersist))
633
+ ],
634
+ self.environment,
635
+ self.generator,
636
+ )
637
+ output.extend(results)
638
+ continue
639
+ elif isinstance(statement, ProcessedValidateStatement):
640
+ validate_result = handle_processed_validate_statement(
641
+ statement, self.generator, self.validate_environment
642
+ )
643
+ if validate_result:
644
+ output.append(validate_result)
645
+ continue
646
+ if non_interactive:
647
+ if not isinstance(
648
+ statement,
649
+ (
650
+ ProcessedCopyStatement,
651
+ ProcessedQueryPersist,
652
+ ProcessedValidateStatement,
653
+ ProcessedRawSQLStatement,
654
+ ProcessedPublishStatement,
655
+ ),
656
+ ):
657
+ continue
658
+ result = self.execute_statement(statement)
659
+ if result:
660
+ output.append(result)
661
+ return output
662
+
663
+ def execute_file(
664
+ self, file: str | Path, non_interactive: bool = False
665
+ ) -> List[ResultProtocol]:
666
+ file = Path(file)
667
+ candidates = [file, self.environment.working_path / file]
668
+ err = None
669
+ for file in candidates:
670
+ if not file.exists():
671
+ continue
672
+ with open(file, "r") as f:
673
+ command = f.read()
674
+ if file.suffix == ".sql":
675
+ return [self.execute_raw_sql(command)]
676
+ else:
677
+ return self.execute_text(command, non_interactive=non_interactive)
678
+ if err:
679
+ raise err
680
+ raise FileNotFoundError(f"File {file} not found")
681
+
682
+ def validate_environment(
683
+ self,
684
+ scope: ValidationScope = ValidationScope.ALL,
685
+ targets: Optional[list[str]] = None,
686
+ generate_only: bool = False,
687
+ ) -> list[ValidationTest]:
688
+ from trilogy.core.validation.environment import validate_environment
689
+
690
+ return validate_environment(
691
+ self.environment, scope, targets, exec=None if generate_only else self
692
+ )
@@ -0,0 +1,4 @@
1
+ from trilogy.hooks.graph_hook import GraphHook
2
+ from trilogy.hooks.query_debugger import DebuggingHook
3
+
4
+ __all__ = ["DebuggingHook", "GraphHook"]