pytrilogy 0.3.149__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp313-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.149.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.149.dist-info/RECORD +207 -0
  6. pytrilogy-0.3.149.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.149.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.149.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2670 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +436 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +846 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1432 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +397 -0
  117. trilogy/dialect/enums.py +151 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/exceptions.py +26 -0
  130. trilogy/execution/state/file_state_store.py +0 -0
  131. trilogy/execution/state/sqllite_state_store.py +0 -0
  132. trilogy/execution/state/state_store.py +406 -0
  133. trilogy/executor.py +692 -0
  134. trilogy/hooks/__init__.py +4 -0
  135. trilogy/hooks/base_hook.py +40 -0
  136. trilogy/hooks/graph_hook.py +135 -0
  137. trilogy/hooks/query_debugger.py +166 -0
  138. trilogy/metadata/__init__.py +0 -0
  139. trilogy/parser.py +10 -0
  140. trilogy/parsing/README.md +21 -0
  141. trilogy/parsing/__init__.py +0 -0
  142. trilogy/parsing/common.py +1069 -0
  143. trilogy/parsing/config.py +5 -0
  144. trilogy/parsing/exceptions.py +8 -0
  145. trilogy/parsing/helpers.py +1 -0
  146. trilogy/parsing/parse_engine.py +2876 -0
  147. trilogy/parsing/render.py +775 -0
  148. trilogy/parsing/trilogy.lark +546 -0
  149. trilogy/py.typed +0 -0
  150. trilogy/render.py +45 -0
  151. trilogy/scripts/README.md +9 -0
  152. trilogy/scripts/__init__.py +0 -0
  153. trilogy/scripts/agent.py +41 -0
  154. trilogy/scripts/agent_info.py +306 -0
  155. trilogy/scripts/common.py +432 -0
  156. trilogy/scripts/dependency/Cargo.lock +617 -0
  157. trilogy/scripts/dependency/Cargo.toml +39 -0
  158. trilogy/scripts/dependency/README.md +131 -0
  159. trilogy/scripts/dependency/build.sh +25 -0
  160. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  161. trilogy/scripts/dependency/src/lib.rs +16 -0
  162. trilogy/scripts/dependency/src/main.rs +770 -0
  163. trilogy/scripts/dependency/src/parser.rs +435 -0
  164. trilogy/scripts/dependency/src/preql.pest +208 -0
  165. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  166. trilogy/scripts/dependency/src/resolver.rs +716 -0
  167. trilogy/scripts/dependency/tests/base.preql +3 -0
  168. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  169. trilogy/scripts/dependency/tests/customer.preql +6 -0
  170. trilogy/scripts/dependency/tests/main.preql +9 -0
  171. trilogy/scripts/dependency/tests/orders.preql +7 -0
  172. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  173. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  174. trilogy/scripts/dependency.py +323 -0
  175. trilogy/scripts/display.py +555 -0
  176. trilogy/scripts/environment.py +59 -0
  177. trilogy/scripts/fmt.py +32 -0
  178. trilogy/scripts/ingest.py +487 -0
  179. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  180. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  181. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  182. trilogy/scripts/ingest_helpers/typing.py +161 -0
  183. trilogy/scripts/init.py +105 -0
  184. trilogy/scripts/parallel_execution.py +762 -0
  185. trilogy/scripts/plan.py +189 -0
  186. trilogy/scripts/refresh.py +161 -0
  187. trilogy/scripts/run.py +79 -0
  188. trilogy/scripts/serve.py +202 -0
  189. trilogy/scripts/serve_helpers/__init__.py +41 -0
  190. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  191. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  192. trilogy/scripts/serve_helpers/models.py +38 -0
  193. trilogy/scripts/single_execution.py +131 -0
  194. trilogy/scripts/testing.py +143 -0
  195. trilogy/scripts/trilogy.py +75 -0
  196. trilogy/std/__init__.py +0 -0
  197. trilogy/std/color.preql +3 -0
  198. trilogy/std/date.preql +13 -0
  199. trilogy/std/display.preql +18 -0
  200. trilogy/std/geography.preql +22 -0
  201. trilogy/std/metric.preql +15 -0
  202. trilogy/std/money.preql +67 -0
  203. trilogy/std/net.preql +14 -0
  204. trilogy/std/ranking.preql +7 -0
  205. trilogy/std/report.preql +5 -0
  206. trilogy/std/semantic.preql +6 -0
  207. trilogy/utility.py +34 -0
@@ -0,0 +1,314 @@
1
+ import uuid
2
+ from typing import Any, Callable, Dict, Mapping, Optional
3
+
4
+ from jinja2 import Template
5
+
6
+ from trilogy.core.enums import (
7
+ ComparisonOperator,
8
+ FunctionType,
9
+ UnnestMode,
10
+ WindowType,
11
+ )
12
+ from trilogy.core.models.core import (
13
+ DataType,
14
+ )
15
+ from trilogy.core.models.execute import CTE, CompiledCTE, UnionCTE
16
+ from trilogy.core.statements.execute import ProcessedQueryPersist
17
+ from trilogy.dialect.base import BaseDialect, safe_quote
18
+
19
+ WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
20
+
21
+
22
+ def transform_date_part(part: str) -> str:
23
+ part_upper = part.upper()
24
+ if part_upper == "DAY_OF_WEEK":
25
+ return "DAYOFWEEK"
26
+ return part_upper
27
+
28
+
29
+ def handle_length(args, types: list[DataType] | None = None) -> str:
30
+ arg = args[0]
31
+ if types and types[0].data_type == DataType.ARRAY:
32
+ return f"ARRAY_LENGTH({arg})"
33
+ return f"LENGTH({arg})"
34
+
35
+
36
+ FUNCTION_MAP = {
37
+ FunctionType.COUNT: lambda x, types: f"count({x[0]})",
38
+ FunctionType.SUM: lambda x, types: f"sum({x[0]})",
39
+ FunctionType.LENGTH: lambda x, types: handle_length(x, types),
40
+ FunctionType.AVG: lambda x, types: f"avg({x[0]})",
41
+ FunctionType.LIKE: lambda x, types: (
42
+ f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
43
+ ),
44
+ FunctionType.IS_NULL: lambda x, types: f"{x[0]} IS NULL",
45
+ FunctionType.MINUTE: lambda x, types: f"EXTRACT(MINUTE from {x[0]})",
46
+ FunctionType.SECOND: lambda x, types: f"EXTRACT(SECOND from {x[0]})",
47
+ FunctionType.HOUR: lambda x, types: f"EXTRACT(HOUR from {x[0]})",
48
+ FunctionType.DAY_OF_WEEK: lambda x, types: f"EXTRACT(DAYOFWEEK from {x[0]})-1", # BigQuery's DAYOFWEEK returns 1 for Sunday
49
+ FunctionType.DAY: lambda x, types: f"EXTRACT(DAY from {x[0]})",
50
+ FunctionType.YEAR: lambda x, types: f"EXTRACT(YEAR from {x[0]})",
51
+ FunctionType.MONTH: lambda x, types: f"EXTRACT(MONTH from {x[0]})",
52
+ FunctionType.WEEK: lambda x, types: f"EXTRACT(WEEK from {x[0]})",
53
+ FunctionType.QUARTER: lambda x, types: f"EXTRACT(QUARTER from {x[0]})",
54
+ # math
55
+ FunctionType.POWER: lambda x, types: f"POWER({x[0]}, {x[1]})",
56
+ FunctionType.DIVIDE: lambda x, types: f"COALESCE(SAFE_DIVIDE({x[0]},{x[1]}),0)",
57
+ FunctionType.DATE_ADD: lambda x, types: f"DATE_ADD({x[0]}, INTERVAL {x[2]} {x[1]})",
58
+ FunctionType.DATE_SUB: lambda x, types: f"DATE_SUB({x[0]}, INTERVAL {x[2]} {x[1]})",
59
+ FunctionType.DATE_PART: lambda x, types: f"EXTRACT({transform_date_part(x[1])} FROM {x[0]})",
60
+ FunctionType.MONTH_NAME: lambda x, types: f"FORMAT_DATE('%B', {x[0]})",
61
+ FunctionType.DAY_NAME: lambda x, types: f"FORMAT_DATE('%A', {x[0]})",
62
+ # string
63
+ FunctionType.CONTAINS: lambda x, types: f"CONTAINS_SUBSTR({x[0]}, {x[1]})",
64
+ FunctionType.RANDOM: lambda x, types: f"FLOOR(RAND()*{x[0]})",
65
+ FunctionType.ARRAY_SUM: lambda x, types: f"(select sum(x) from unnest({x[0]}) as x)",
66
+ FunctionType.ARRAY_DISTINCT: lambda x, types: f"ARRAY(SELECT DISTINCT element FROM UNNEST({x[0]}) AS element)",
67
+ FunctionType.ARRAY_SORT: lambda x, types: f"ARRAY(SELECT element FROM UNNEST({x[0]}) AS element ORDER BY element)",
68
+ # aggregate
69
+ FunctionType.BOOL_AND: lambda x, types: f"LOGICAL_AND({x[0]})",
70
+ FunctionType.BOOL_OR: lambda x, types: f"LOGICAL_OR({x[0]})",
71
+ }
72
+
73
+ FUNCTION_GRAIN_MATCH_MAP = {
74
+ **FUNCTION_MAP,
75
+ FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
76
+ FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
77
+ FunctionType.SUM: lambda args, types: f"{args[0]}",
78
+ FunctionType.AVG: lambda args, types: f"{args[0]}",
79
+ }
80
+
81
+ DATATYPE_MAP: dict[DataType, str] = {
82
+ DataType.STRING: "STRING",
83
+ DataType.INTEGER: "INT64",
84
+ DataType.FLOAT: "FLOAT64",
85
+ DataType.BOOL: "BOOL",
86
+ DataType.NUMERIC: "NUMERIC",
87
+ DataType.MAP: "MAP",
88
+ DataType.DATE: "DATE",
89
+ DataType.DATETIME: "DATETIME",
90
+ DataType.TIMESTAMP: "TIMESTAMP",
91
+ }
92
+
93
+
94
+ BQ_SQL_TEMPLATE = Template(
95
+ """{%- if output %}
96
+ {{output}}
97
+ {% endif %}{%- if ctes %}
98
+ WITH {% if recursive%}RECURSIVE{% endif %}{% for cte in ctes %}
99
+ {{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% else%}
100
+ {% endif %}{% endfor %}{% endif %}
101
+ {%- if full_select -%}
102
+ {{full_select}}
103
+ {%- else -%}
104
+ SELECT
105
+ {%- for select in select_columns %}
106
+ {{ select }}{% if not loop.last %},{% endif %}{% endfor %}
107
+ {% if base %}FROM
108
+ {{ base }}{% endif %}{% if joins %}{% for join in joins %}
109
+ {{ join }}{% endfor %}{% endif %}
110
+ {% if where %}WHERE
111
+ {{ where }}
112
+ {% endif %}
113
+ {%- if group_by %}GROUP BY {% for group in group_by %}
114
+ {{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
115
+ HAVING
116
+ \t{{ having }}{% endif %}
117
+ {%- if order_by %}
118
+ ORDER BY {% for order in order_by %}
119
+ {{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
120
+ {%- if limit is not none %}
121
+ LIMIT {{ limit }}{% endif %}{% endif %}
122
+ """
123
+ )
124
+
125
+
126
+ BQ_CREATE_TABLE_SQL_TEMPLATE = Template(
127
+ """
128
+ CREATE {% if create_mode == "create_or_replace" %}OR REPLACE TABLE{% elif create_mode == "create_if_not_exists" %}TABLE IF NOT EXISTS{% else %}TABLE{% endif %} {{ name}} (
129
+ {%- for column in columns %}
130
+ `{{ column.name }}` {{ type_map[column.name] }}{% if column.description %} OPTIONS(description='{{ column.description }}'){% endif %}{% if not loop.last %},{% endif %}
131
+ {%- endfor %}
132
+ )
133
+ {%- if partition_by %}
134
+ PARTITION BY {{ partition_by }}
135
+ {%- endif %}
136
+ {%- if cluster_by %}
137
+ CLUSTER BY {{ cluster_by | join(', ') }}
138
+ {%- endif %}
139
+ {%- if table_description %}
140
+ OPTIONS(
141
+ description='{{ table_description }}'
142
+ )
143
+ {%- endif %};
144
+ """.strip()
145
+ )
146
+
147
+ PARTITIONED_INSERT_TEMPLATE = Template(
148
+ """
149
+ -- Step 1: materialize results
150
+ CREATE TEMP TABLE {{ tmp_table }} AS SELECT * FROM {{ target_table }} limit 0;
151
+
152
+ INSERT INTO {{ tmp_table }}
153
+ {{ final_select }}
154
+ ;
155
+
156
+ -- Step 2: extract distinct partitions and generate dynamic statements
157
+ BEGIN
158
+ DECLARE partition_values ARRAY<{{ partition_type }}>;
159
+ DECLARE current_partition {{ partition_type }};
160
+ DECLARE i INT64 DEFAULT 0;
161
+
162
+ -- Get all distinct partition values
163
+ SET partition_values = (
164
+ SELECT ARRAY_AGG(DISTINCT {{ partition_key[0] }})
165
+ FROM {{ tmp_table }}
166
+ );
167
+
168
+ -- Loop through each partition value
169
+ WHILE i < ARRAY_LENGTH(partition_values) DO
170
+ SET current_partition = partition_values[OFFSET(i)];
171
+
172
+ -- Delete existing records for this partition
173
+ EXECUTE IMMEDIATE FORMAT(
174
+ 'DELETE FROM {{ target_table }} WHERE {{ partition_key[0] }} = "%t"',
175
+ current_partition
176
+ );
177
+
178
+ -- Insert new records for this partition
179
+ EXECUTE IMMEDIATE FORMAT(
180
+ 'INSERT INTO {{ target_table }} SELECT * FROM {{ tmp_table }} WHERE {{ partition_key[0] }} = "%t"',
181
+ current_partition
182
+ );
183
+
184
+ SET i = i + 1;
185
+ END WHILE;
186
+ END;
187
+ """
188
+ )
189
+
190
+ MAX_IDENTIFIER_LENGTH = 50
191
+
192
+
193
+ def parse_bigquery_table_name(
194
+ table_name: str, schema: str | None = None
195
+ ) -> tuple[str, str | None]:
196
+ """Parse BigQuery table names supporting project.dataset.table format."""
197
+ if "." in table_name and not schema:
198
+ parts = table_name.split(".")
199
+ if len(parts) == 2:
200
+ schema = parts[0]
201
+ table_name = parts[1]
202
+ elif len(parts) == 3:
203
+ # project.dataset.table format
204
+ schema = f"{parts[0]}.{parts[1]}"
205
+ table_name = parts[2]
206
+ return table_name, schema
207
+
208
+
209
+ class BigqueryDialect(BaseDialect):
210
+ WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
211
+ FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
212
+ FUNCTION_GRAIN_MATCH_MAP = {
213
+ **BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
214
+ **FUNCTION_GRAIN_MATCH_MAP,
215
+ }
216
+ QUOTE_CHARACTER = "`"
217
+ SQL_TEMPLATE = BQ_SQL_TEMPLATE
218
+ CREATE_TABLE_SQL_TEMPLATE = BQ_CREATE_TABLE_SQL_TEMPLATE
219
+ UNNEST_MODE = UnnestMode.CROSS_JOIN_UNNEST
220
+ DATATYPE_MAP = DATATYPE_MAP
221
+
222
+ def hash_column_value(self, column_name: str) -> str:
223
+ return f"FARM_FINGERPRINT(CAST({safe_quote(column_name, self.QUOTE_CHARACTER)} AS STRING))"
224
+
225
+ def aggregate_checksum(self, hash_expr: str) -> str:
226
+ return f"BIT_XOR({hash_expr})"
227
+
228
+ def get_table_schema(
229
+ self, executor, table_name: str, schema: str | None = None
230
+ ) -> list[tuple]:
231
+ """BigQuery uses dataset instead of schema and supports project.dataset.table format."""
232
+ table_name, schema = parse_bigquery_table_name(table_name, schema)
233
+
234
+ column_query = f"""
235
+ SELECT
236
+ column_name,
237
+ data_type,
238
+ is_nullable,
239
+ '' as column_comment
240
+ FROM `{schema}.INFORMATION_SCHEMA.COLUMNS`
241
+ WHERE table_name = '{table_name}'
242
+ ORDER BY ordinal_position
243
+ """
244
+
245
+ rows = executor.execute_raw_sql(column_query).fetchall()
246
+ return rows
247
+
248
+ def get_table_primary_keys(
249
+ self, executor, table_name: str, schema: str | None = None
250
+ ) -> list[str]:
251
+ """BigQuery doesn't enforce primary keys; rely on data-driven grain detection."""
252
+ table_name, schema = parse_bigquery_table_name(table_name, schema)
253
+
254
+ pk_query = f"""
255
+ SELECT column_name
256
+ FROM `{schema}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE`
257
+ WHERE table_name = '{table_name}'
258
+ AND constraint_name LIKE '%PRIMARY%'
259
+ """
260
+
261
+ rows = executor.execute_raw_sql(pk_query).fetchall()
262
+ return [row[0] for row in rows]
263
+
264
+ def render_array_unnest(
265
+ self,
266
+ left,
267
+ right,
268
+ operator: ComparisonOperator,
269
+ cte: CTE | UnionCTE | None = None,
270
+ cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
271
+ raise_invalid: bool = False,
272
+ ):
273
+ return f"{self.render_expr(left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {operator.value} unnest({self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)})"
274
+
275
+ def generate_partitioned_insert(
276
+ self,
277
+ query: ProcessedQueryPersist,
278
+ recursive: bool,
279
+ compiled_ctes: list[CompiledCTE],
280
+ ) -> str:
281
+ tmp_table = f"tmp__{uuid.uuid4().hex}"
282
+ final_select = compiled_ctes[-1].statement
283
+ ctes = compiled_ctes[:-1]
284
+
285
+ if not query.partition_by:
286
+ raise ValueError("partition_by must be set for partitioned inserts.")
287
+
288
+ partition_key = query.partition_by
289
+ target_table = safe_quote(
290
+ query.output_to.address.location, self.QUOTE_CHARACTER
291
+ )
292
+
293
+ # render intermediate CTEs
294
+ ctes_sql = ""
295
+ if ctes:
296
+ rendered = []
297
+ for c in ctes:
298
+ rendered.append(f"{c.name} AS ({c.statement})")
299
+ ctes_sql = "WITH " + ",\n".join(rendered)
300
+
301
+ # create temp table first
302
+ full_select_with_ctes = (
303
+ final_select if not ctes_sql else f"{ctes_sql}\n{final_select}"
304
+ )
305
+
306
+ sql_script = PARTITIONED_INSERT_TEMPLATE.render(
307
+ tmp_table=tmp_table,
308
+ final_select=full_select_with_ctes,
309
+ partition_key=partition_key,
310
+ target_table=target_table,
311
+ partition_type=self.DATATYPE_MAP[query.partition_types[0]],
312
+ )
313
+
314
+ return sql_script
@@ -0,0 +1,147 @@
1
+ from typing import Callable
2
+
3
+ from trilogy.core.constants import UNNEST_NAME
4
+ from trilogy.core.enums import Modifier, UnnestMode
5
+ from trilogy.core.models.build import (
6
+ BuildComparison,
7
+ BuildConcept,
8
+ BuildConditional,
9
+ BuildFunction,
10
+ BuildParamaterizedConceptReference,
11
+ BuildParenthetical,
12
+ )
13
+ from trilogy.core.models.execute import (
14
+ CTE,
15
+ InstantiatedUnnestJoin,
16
+ Join,
17
+ UnionCTE,
18
+ )
19
+
20
+
21
+ def render_unnest(
22
+ unnest_mode: UnnestMode,
23
+ quote_character: str,
24
+ concept: BuildConcept | BuildParamaterizedConceptReference | BuildFunction,
25
+ render_func: Callable[
26
+ [BuildConcept | BuildParamaterizedConceptReference | BuildFunction, CTE], str
27
+ ],
28
+ cte: CTE,
29
+ ):
30
+ if not isinstance(concept, (BuildConcept, BuildParamaterizedConceptReference)):
31
+ address = UNNEST_NAME
32
+ else:
33
+ address = concept.safe_address
34
+ if unnest_mode == UnnestMode.CROSS_JOIN:
35
+ return f"{render_func(concept, cte)} as {quote_character}{address}{quote_character}"
36
+ elif unnest_mode == UnnestMode.CROSS_JOIN_UNNEST:
37
+ return f"unnest({render_func(concept, cte)}) as {quote_character}{address}{quote_character}"
38
+ elif unnest_mode == UnnestMode.PRESTO:
39
+ return f"unnest({render_func(concept, cte)}) as t({quote_character}{UNNEST_NAME}{quote_character})"
40
+ elif unnest_mode == UnnestMode.CROSS_JOIN_ALIAS:
41
+ return f"{render_func(concept, cte)} as unnest_wrapper ({quote_character}{address}{quote_character})"
42
+ elif unnest_mode == UnnestMode.SNOWFLAKE:
43
+ # if we don't actually have a join, we're directly unnesting a concept, and we can skip the flatten
44
+ if not cte.render_from_clause:
45
+ return f"{render_func(concept, cte)} as unnest_wrapper ( unnest1, unnest2, unnest3, unnest4, {quote_character}{cte.join_derived_concepts[0].safe_address}{quote_character})"
46
+ # otherwise, flatten the concept for the join
47
+ return f"flatten({render_func(concept, cte)}) as unnest_wrapper ( unnest1, unnest2, unnest3, unnest4, {quote_character}{cte.join_derived_concepts[0].safe_address}{quote_character})"
48
+ return f"{render_func(concept, cte)} as {quote_character}{address}{quote_character}"
49
+
50
+
51
+ def render_join_concept(
52
+ name: str,
53
+ quote_character: str,
54
+ cte: CTE | UnionCTE,
55
+ concept: BuildConcept,
56
+ render_expr,
57
+ inlined_ctes: set[str],
58
+ use_map: dict[str, set[str]],
59
+ ):
60
+ if cte.name in inlined_ctes:
61
+ base = render_expr(concept, cte)
62
+ return base
63
+ use_map[name].add(concept.address)
64
+ return f"{quote_character}{name}{quote_character}.{quote_character}{concept.safe_address}{quote_character}"
65
+
66
+
67
+ def render_join(
68
+ join: Join | InstantiatedUnnestJoin,
69
+ quote_character: str,
70
+ render_expr_func: Callable[
71
+ [
72
+ BuildConcept
73
+ | BuildParamaterizedConceptReference
74
+ | BuildFunction
75
+ | BuildConditional
76
+ | BuildComparison
77
+ | BuildParenthetical,
78
+ CTE,
79
+ ],
80
+ str,
81
+ ],
82
+ cte: CTE,
83
+ use_map: dict[str, set[str]],
84
+ null_wrapper: Callable[[str, str, list[Modifier]], str],
85
+ unnest_mode: UnnestMode = UnnestMode.CROSS_APPLY,
86
+ ) -> str | None:
87
+ # {% for key in join.joinkeys %}{{ key.inner }} = {{ key.outer}}{% endfor %}
88
+ if isinstance(join, InstantiatedUnnestJoin):
89
+ if unnest_mode == UnnestMode.DIRECT:
90
+ return None
91
+ if not cte:
92
+ raise ValueError("must provide a cte to build an unnest joins")
93
+ if unnest_mode in (
94
+ UnnestMode.CROSS_JOIN,
95
+ UnnestMode.CROSS_JOIN_UNNEST,
96
+ UnnestMode.CROSS_JOIN_ALIAS,
97
+ UnnestMode.PRESTO,
98
+ ):
99
+ return f"CROSS JOIN {render_unnest(unnest_mode, quote_character, join.object_to_unnest, render_expr_func, cte)}"
100
+ if unnest_mode == UnnestMode.SNOWFLAKE:
101
+ return f"LEFT JOIN LATERAL {render_unnest(unnest_mode, quote_character, join.object_to_unnest, render_expr_func, cte)}"
102
+ return f"FULL JOIN {render_unnest(unnest_mode, quote_character, join.object_to_unnest, render_expr_func, cte)}"
103
+ # left_name = join.left_name
104
+ right_name = join.right_name
105
+ join.quote = quote_character
106
+ # if cte.quote_address.get(join.right_name, False):
107
+ # join.quote = quote_character
108
+ right_base = join.right_ref
109
+ base_joinkeys = []
110
+ if join.joinkey_pairs:
111
+ base_joinkeys.extend(
112
+ [
113
+ null_wrapper(
114
+ render_join_concept(
115
+ join.get_name(pair.cte),
116
+ quote_character,
117
+ pair.cte,
118
+ pair.left,
119
+ render_expr_func,
120
+ join.inlined_ctes,
121
+ use_map=use_map,
122
+ ),
123
+ render_join_concept(
124
+ right_name,
125
+ quote_character,
126
+ join.right_cte,
127
+ pair.right,
128
+ render_expr_func,
129
+ join.inlined_ctes,
130
+ use_map=use_map,
131
+ ),
132
+ pair.modifiers
133
+ + (pair.left.modifiers or [])
134
+ + (pair.right.modifiers or [])
135
+ + (join.modifiers or []),
136
+ )
137
+ for pair in join.joinkey_pairs
138
+ ]
139
+ )
140
+ if not base_joinkeys:
141
+ base_joinkeys = ["1=1"]
142
+
143
+ joinkeys = " AND ".join(sorted(base_joinkeys))
144
+ base = f"{join.jointype.value.upper()} JOIN {right_base} on {joinkeys}"
145
+ if join.condition:
146
+ base = f"{base} and {render_expr_func(join.condition, cte)}"
147
+ return base
@@ -0,0 +1,159 @@
1
+ from typing import TYPE_CHECKING, Any
2
+
3
+ if TYPE_CHECKING:
4
+ try:
5
+ from pandas import DataFrame
6
+ except ImportError:
7
+ DataFrame = Any
8
+
9
+
10
+ class DialectConfig:
11
+ def __init__(self):
12
+ pass
13
+
14
+ def connection_string(self) -> str:
15
+ raise NotImplementedError
16
+
17
+ def create_connect_args(self) -> dict:
18
+ return {}
19
+
20
+ def merge_config(self, other: "DialectConfig") -> "DialectConfig":
21
+ for key, value in other.__dict__.items():
22
+ if value is not None:
23
+ setattr(self, key, value)
24
+ return self
25
+
26
+
27
+ class BigQueryConfig(DialectConfig):
28
+ def __init__(self, project: str | None = None, client: Any | None = None):
29
+ self.project = project
30
+ self.client = client
31
+
32
+ def connection_string(self) -> str:
33
+ return f"bigquery://{self.project}?user_supplied_client=True"
34
+
35
+ def create_connect_args(self) -> dict:
36
+ if not self.client:
37
+ from google.auth import default
38
+ from google.cloud import bigquery
39
+
40
+ credentials, project = default()
41
+ self.client = bigquery.Client(credentials=credentials, project=project)
42
+ self.project = project
43
+
44
+ return {"client": self.client}
45
+
46
+
47
+ class DuckDBConfig(DialectConfig):
48
+ def __init__(
49
+ self,
50
+ path: str | None = None,
51
+ enable_python_datasources: bool | None = None,
52
+ enable_gcs: bool | None = None,
53
+ ):
54
+ self.path = path
55
+ self._enable_python_datasources = enable_python_datasources
56
+ self._enable_gcs = enable_gcs
57
+ self.guid = id(self)
58
+
59
+ @property
60
+ def enable_python_datasources(self) -> bool:
61
+ return self._enable_python_datasources or False
62
+
63
+ @property
64
+ def enable_gcs(self) -> bool:
65
+ return self._enable_gcs or False
66
+
67
+ def connection_string(self) -> str:
68
+ if not self.path:
69
+ return "duckdb:///:memory:"
70
+ return f"duckdb:///{self.path}"
71
+
72
+
73
+ class PostgresConfig(DialectConfig):
74
+ def __init__(
75
+ self, host: str, port: int, username: str, password: str, database: str
76
+ ):
77
+ self.host = host
78
+ self.port = port
79
+ self.username = username
80
+ self.password = password
81
+ self.database = database
82
+
83
+ def connection_string(self) -> str:
84
+ return f"postgresql://{self.username}:{self.password}@{self.host}:{self.port}"
85
+
86
+
87
+ class SQLServerConfig(DialectConfig):
88
+ def __init__(
89
+ self, host: str, port: int, username: str, password: str, database: str
90
+ ):
91
+ self.host = host
92
+ self.port = port
93
+ self.username = username
94
+ self.password = password
95
+ self.database = database
96
+
97
+ def connection_string(self) -> str:
98
+ return f"sqlserver//{self.username}:{self.password}@{self.host}:{self.port}"
99
+
100
+
101
+ class SnowflakeConfig(DialectConfig):
102
+ def __init__(
103
+ self,
104
+ account: str,
105
+ username: str,
106
+ password: str,
107
+ database: str | None = None,
108
+ schema: str | None = None,
109
+ ):
110
+ self.account = account
111
+ self.username = username
112
+ self.password = password
113
+ self.database = database
114
+ self.schema = schema
115
+ if self.schema and not self.database:
116
+ raise ValueError("Setting snowflake schema also requires setting database")
117
+
118
+ def connection_string(self) -> str:
119
+ if self.schema:
120
+ return f"snowflake://{self.username}:{self.password}@{self.account}/{self.database}/{self.schema}"
121
+ if self.database:
122
+ return f"snowflake://{self.username}:{self.password}@{self.account}/{self.database}"
123
+ return f"snowflake://{self.username}:{self.password}@{self.account}"
124
+
125
+
126
+ class PrestoConfig(DialectConfig):
127
+ def __init__(
128
+ self,
129
+ host: str,
130
+ port: int,
131
+ username: str,
132
+ password: str,
133
+ catalog: str,
134
+ schema: str | None = None,
135
+ ):
136
+ self.host = host
137
+ self.port = port
138
+ self.username = username
139
+ self.password = password
140
+ self.catalog = catalog
141
+ self.schema = schema
142
+
143
+ def connection_string(self) -> str:
144
+ if self.schema:
145
+ return f"presto://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}/{self.schema}"
146
+ return f"presto://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}"
147
+
148
+
149
+ class TrinoConfig(PrestoConfig):
150
+ def connection_string(self) -> str:
151
+ if self.schema:
152
+ return f"trino://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}/{self.schema}"
153
+ return f"trino://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}"
154
+
155
+
156
+ class DataFrameConfig(DuckDBConfig):
157
+ def __init__(self, dataframes: dict[str, "DataFrame"]):
158
+ super().__init__()
159
+ self.dataframes = dataframes
@@ -0,0 +1,50 @@
1
+ from typing import TYPE_CHECKING, Any
2
+
3
+ from sqlalchemy import text
4
+
5
+ from trilogy.core.models.environment import Environment
6
+ from trilogy.dialect.duckdb import DuckDBDialect
7
+ from trilogy.engine import ExecutionEngine
8
+
9
+ if TYPE_CHECKING:
10
+ try:
11
+ from pandas import DataFrame
12
+ except ImportError:
13
+ DataFrame = Any
14
+
15
+
16
+ class DataframeDialect(DuckDBDialect):
17
+ pass
18
+
19
+
20
+ class DataframeConnectionWrapper(ExecutionEngine):
21
+ def __init__(self, engine: ExecutionEngine, dataframes: dict[str, "DataFrame"]):
22
+ self.engine = engine
23
+ self.dataframes = dataframes
24
+ self.connection = None
25
+
26
+ def setup(self, env: Environment, connection):
27
+ self._register_dataframes(env, connection)
28
+
29
+ def _register_dataframes(self, env: Environment, connection):
30
+ for ds in env.datasources.values():
31
+ if ds.safe_address in self.dataframes:
32
+ connection.execute(
33
+ text("register(:name, :df)"),
34
+ {"name": ds.safe_address, "df": self.dataframes[ds.safe_address]},
35
+ )
36
+ else:
37
+ raise ValueError(
38
+ f"Dataframe {ds.safe_address} not found in dataframes on connection config, have {self.dataframes.keys()}"
39
+ )
40
+ pass
41
+
42
+ def add_dataframe(self, name: str, df: "DataFrame", connection, env: Environment):
43
+ self.dataframes[name] = df
44
+ self._register_dataframes(env, connection)
45
+
46
+ def connect(self) -> Any:
47
+ return self.engine.connect()
48
+
49
+ def dispose(self, close=True):
50
+ return super().dispose(close)