pytrilogy 0.3.142__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-313-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.142.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.142.dist-info/RECORD +200 -0
  6. pytrilogy-0.3.142.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +16 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2669 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +501 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +751 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1392 -0
  112. trilogy/dialect/bigquery.py +308 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +231 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +769 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +9 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/agent.py +41 -0
  148. trilogy/scripts/agent_info.py +303 -0
  149. trilogy/scripts/common.py +355 -0
  150. trilogy/scripts/dependency/Cargo.lock +617 -0
  151. trilogy/scripts/dependency/Cargo.toml +39 -0
  152. trilogy/scripts/dependency/README.md +131 -0
  153. trilogy/scripts/dependency/build.sh +25 -0
  154. trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
  155. trilogy/scripts/dependency/src/lib.rs +16 -0
  156. trilogy/scripts/dependency/src/main.rs +770 -0
  157. trilogy/scripts/dependency/src/parser.rs +435 -0
  158. trilogy/scripts/dependency/src/preql.pest +208 -0
  159. trilogy/scripts/dependency/src/python_bindings.rs +303 -0
  160. trilogy/scripts/dependency/src/resolver.rs +716 -0
  161. trilogy/scripts/dependency/tests/base.preql +3 -0
  162. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  163. trilogy/scripts/dependency/tests/customer.preql +6 -0
  164. trilogy/scripts/dependency/tests/main.preql +9 -0
  165. trilogy/scripts/dependency/tests/orders.preql +7 -0
  166. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  167. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  168. trilogy/scripts/dependency.py +323 -0
  169. trilogy/scripts/display.py +512 -0
  170. trilogy/scripts/environment.py +46 -0
  171. trilogy/scripts/fmt.py +32 -0
  172. trilogy/scripts/ingest.py +471 -0
  173. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  174. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  175. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  176. trilogy/scripts/ingest_helpers/typing.py +161 -0
  177. trilogy/scripts/init.py +105 -0
  178. trilogy/scripts/parallel_execution.py +713 -0
  179. trilogy/scripts/plan.py +189 -0
  180. trilogy/scripts/run.py +63 -0
  181. trilogy/scripts/serve.py +140 -0
  182. trilogy/scripts/serve_helpers/__init__.py +41 -0
  183. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  184. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  185. trilogy/scripts/serve_helpers/models.py +38 -0
  186. trilogy/scripts/single_execution.py +131 -0
  187. trilogy/scripts/testing.py +119 -0
  188. trilogy/scripts/trilogy.py +68 -0
  189. trilogy/std/__init__.py +0 -0
  190. trilogy/std/color.preql +3 -0
  191. trilogy/std/date.preql +13 -0
  192. trilogy/std/display.preql +18 -0
  193. trilogy/std/geography.preql +22 -0
  194. trilogy/std/metric.preql +15 -0
  195. trilogy/std/money.preql +67 -0
  196. trilogy/std/net.preql +14 -0
  197. trilogy/std/ranking.preql +7 -0
  198. trilogy/std/report.preql +5 -0
  199. trilogy/std/semantic.preql +6 -0
  200. trilogy/utility.py +34 -0
@@ -0,0 +1,308 @@
1
+ import uuid
2
+ from typing import Any, Callable, Dict, Mapping, Optional
3
+
4
+ from jinja2 import Template
5
+
6
+ from trilogy.core.enums import (
7
+ ComparisonOperator,
8
+ FunctionType,
9
+ UnnestMode,
10
+ WindowType,
11
+ )
12
+ from trilogy.core.models.core import (
13
+ DataType,
14
+ )
15
+ from trilogy.core.models.execute import CTE, CompiledCTE, UnionCTE
16
+ from trilogy.core.statements.execute import ProcessedQueryPersist
17
+ from trilogy.dialect.base import BaseDialect, safe_quote
18
+
19
+ WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
20
+
21
+
22
+ def transform_date_part(part: str) -> str:
23
+ part_upper = part.upper()
24
+ if part_upper == "DAY_OF_WEEK":
25
+ return "DAYOFWEEK"
26
+ return part_upper
27
+
28
+
29
+ def handle_length(args, types: list[DataType] | None = None) -> str:
30
+ arg = args[0]
31
+ if types and types[0].data_type == DataType.ARRAY:
32
+ return f"ARRAY_LENGTH({arg})"
33
+ return f"LENGTH({arg})"
34
+
35
+
36
+ FUNCTION_MAP = {
37
+ FunctionType.COUNT: lambda x, types: f"count({x[0]})",
38
+ FunctionType.SUM: lambda x, types: f"sum({x[0]})",
39
+ FunctionType.LENGTH: lambda x, types: handle_length(x, types),
40
+ FunctionType.AVG: lambda x, types: f"avg({x[0]})",
41
+ FunctionType.LIKE: lambda x, types: (
42
+ f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
43
+ ),
44
+ FunctionType.IS_NULL: lambda x, types: f"{x[0]} IS NULL",
45
+ FunctionType.MINUTE: lambda x, types: f"EXTRACT(MINUTE from {x[0]})",
46
+ FunctionType.SECOND: lambda x, types: f"EXTRACT(SECOND from {x[0]})",
47
+ FunctionType.HOUR: lambda x, types: f"EXTRACT(HOUR from {x[0]})",
48
+ FunctionType.DAY_OF_WEEK: lambda x, types: f"EXTRACT(DAYOFWEEK from {x[0]})-1", # BigQuery's DAYOFWEEK returns 1 for Sunday
49
+ FunctionType.DAY: lambda x, types: f"EXTRACT(DAY from {x[0]})",
50
+ FunctionType.YEAR: lambda x, types: f"EXTRACT(YEAR from {x[0]})",
51
+ FunctionType.MONTH: lambda x, types: f"EXTRACT(MONTH from {x[0]})",
52
+ FunctionType.WEEK: lambda x, types: f"EXTRACT(WEEK from {x[0]})",
53
+ FunctionType.QUARTER: lambda x, types: f"EXTRACT(QUARTER from {x[0]})",
54
+ # math
55
+ FunctionType.POWER: lambda x, types: f"POWER({x[0]}, {x[1]})",
56
+ FunctionType.DIVIDE: lambda x, types: f"COALESCE(SAFE_DIVIDE({x[0]},{x[1]}),0)",
57
+ FunctionType.DATE_ADD: lambda x, types: f"DATE_ADD({x[0]}, INTERVAL {x[2]} {x[1]})",
58
+ FunctionType.DATE_SUB: lambda x, types: f"DATE_SUB({x[0]}, INTERVAL {x[2]} {x[1]})",
59
+ FunctionType.DATE_PART: lambda x, types: f"EXTRACT({transform_date_part(x[1])} FROM {x[0]})",
60
+ FunctionType.MONTH_NAME: lambda x, types: f"FORMAT_DATE('%B', {x[0]})",
61
+ FunctionType.DAY_NAME: lambda x, types: f"FORMAT_DATE('%A', {x[0]})",
62
+ # string
63
+ FunctionType.CONTAINS: lambda x, types: f"CONTAINS_SUBSTR({x[0]}, {x[1]})",
64
+ FunctionType.RANDOM: lambda x, types: f"FLOOR(RAND()*{x[0]})",
65
+ FunctionType.ARRAY_SUM: lambda x, types: f"(select sum(x) from unnest({x[0]}) as x)",
66
+ FunctionType.ARRAY_DISTINCT: lambda x, types: f"ARRAY(SELECT DISTINCT element FROM UNNEST({x[0]}) AS element)",
67
+ FunctionType.ARRAY_SORT: lambda x, types: f"ARRAY(SELECT element FROM UNNEST({x[0]}) AS element ORDER BY element)",
68
+ # aggregate
69
+ FunctionType.BOOL_AND: lambda x, types: f"LOGICAL_AND({x[0]})",
70
+ FunctionType.BOOL_OR: lambda x, types: f"LOGICAL_OR({x[0]})",
71
+ }
72
+
73
+ FUNCTION_GRAIN_MATCH_MAP = {
74
+ **FUNCTION_MAP,
75
+ FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
76
+ FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
77
+ FunctionType.SUM: lambda args, types: f"{args[0]}",
78
+ FunctionType.AVG: lambda args, types: f"{args[0]}",
79
+ }
80
+
81
+ DATATYPE_MAP: dict[DataType, str] = {
82
+ DataType.STRING: "STRING",
83
+ DataType.INTEGER: "INT64",
84
+ DataType.FLOAT: "FLOAT64",
85
+ DataType.BOOL: "BOOL",
86
+ DataType.NUMERIC: "NUMERIC",
87
+ DataType.MAP: "MAP",
88
+ DataType.DATE: "DATE",
89
+ DataType.DATETIME: "DATETIME",
90
+ DataType.TIMESTAMP: "TIMESTAMP",
91
+ }
92
+
93
+
94
+ BQ_SQL_TEMPLATE = Template(
95
+ """{%- if output %}
96
+ {{output}}
97
+ {% endif %}{%- if ctes %}
98
+ WITH {% if recursive%}RECURSIVE{% endif %}{% for cte in ctes %}
99
+ {{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% else%}
100
+ {% endif %}{% endfor %}{% endif %}
101
+ {%- if full_select -%}
102
+ {{full_select}}
103
+ {%- else -%}
104
+ SELECT
105
+ {%- for select in select_columns %}
106
+ {{ select }}{% if not loop.last %},{% endif %}{% endfor %}
107
+ {% if base %}FROM
108
+ {{ base }}{% endif %}{% if joins %}{% for join in joins %}
109
+ {{ join }}{% endfor %}{% endif %}
110
+ {% if where %}WHERE
111
+ {{ where }}
112
+ {% endif %}
113
+ {%- if group_by %}GROUP BY {% for group in group_by %}
114
+ {{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
115
+ HAVING
116
+ \t{{ having }}{% endif %}
117
+ {%- if order_by %}
118
+ ORDER BY {% for order in order_by %}
119
+ {{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
120
+ {%- if limit is not none %}
121
+ LIMIT {{ limit }}{% endif %}{% endif %}
122
+ """
123
+ )
124
+
125
+
126
+ BQ_CREATE_TABLE_SQL_TEMPLATE = Template(
127
+ """
128
+ CREATE {% if create_mode == "create_or_replace" %}OR REPLACE TABLE{% elif create_mode == "create_if_not_exists" %}TABLE IF NOT EXISTS{% else %}TABLE{% endif %} {{ name}} (
129
+ {%- for column in columns %}
130
+ `{{ column.name }}` {{ type_map[column.name] }}{% if column.description %} OPTIONS(description='{{ column.description }}'){% endif %}{% if not loop.last %},{% endif %}
131
+ {%- endfor %}
132
+ )
133
+ {%- if partition_by %}
134
+ PARTITION BY {{ partition_by }}
135
+ {%- endif %}
136
+ {%- if cluster_by %}
137
+ CLUSTER BY {{ cluster_by | join(', ') }}
138
+ {%- endif %}
139
+ {%- if table_description %}
140
+ OPTIONS(
141
+ description='{{ table_description }}'
142
+ )
143
+ {%- endif %};
144
+ """.strip()
145
+ )
146
+
147
+ PARTITIONED_INSERT_TEMPLATE = Template(
148
+ """
149
+ -- Step 1: materialize results
150
+ CREATE TEMP TABLE {{ tmp_table }} AS SELECT * FROM {{ target_table }} limit 0;
151
+
152
+ INSERT INTO {{ tmp_table }}
153
+ {{ final_select }}
154
+ ;
155
+
156
+ -- Step 2: extract distinct partitions and generate dynamic statements
157
+ BEGIN
158
+ DECLARE partition_values ARRAY<{{ partition_type }}>;
159
+ DECLARE current_partition {{ partition_type }};
160
+ DECLARE i INT64 DEFAULT 0;
161
+
162
+ -- Get all distinct partition values
163
+ SET partition_values = (
164
+ SELECT ARRAY_AGG(DISTINCT {{ partition_key[0] }})
165
+ FROM {{ tmp_table }}
166
+ );
167
+
168
+ -- Loop through each partition value
169
+ WHILE i < ARRAY_LENGTH(partition_values) DO
170
+ SET current_partition = partition_values[OFFSET(i)];
171
+
172
+ -- Delete existing records for this partition
173
+ EXECUTE IMMEDIATE FORMAT(
174
+ 'DELETE FROM {{ target_table }} WHERE {{ partition_key[0] }} = "%t"',
175
+ current_partition
176
+ );
177
+
178
+ -- Insert new records for this partition
179
+ EXECUTE IMMEDIATE FORMAT(
180
+ 'INSERT INTO {{ target_table }} SELECT * FROM {{ tmp_table }} WHERE {{ partition_key[0] }} = "%t"',
181
+ current_partition
182
+ );
183
+
184
+ SET i = i + 1;
185
+ END WHILE;
186
+ END;
187
+ """
188
+ )
189
+
190
+ MAX_IDENTIFIER_LENGTH = 50
191
+
192
+
193
+ def parse_bigquery_table_name(
194
+ table_name: str, schema: str | None = None
195
+ ) -> tuple[str, str | None]:
196
+ """Parse BigQuery table names supporting project.dataset.table format."""
197
+ if "." in table_name and not schema:
198
+ parts = table_name.split(".")
199
+ if len(parts) == 2:
200
+ schema = parts[0]
201
+ table_name = parts[1]
202
+ elif len(parts) == 3:
203
+ # project.dataset.table format
204
+ schema = f"{parts[0]}.{parts[1]}"
205
+ table_name = parts[2]
206
+ return table_name, schema
207
+
208
+
209
+ class BigqueryDialect(BaseDialect):
210
+ WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
211
+ FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
212
+ FUNCTION_GRAIN_MATCH_MAP = {
213
+ **BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
214
+ **FUNCTION_GRAIN_MATCH_MAP,
215
+ }
216
+ QUOTE_CHARACTER = "`"
217
+ SQL_TEMPLATE = BQ_SQL_TEMPLATE
218
+ CREATE_TABLE_SQL_TEMPLATE = BQ_CREATE_TABLE_SQL_TEMPLATE
219
+ UNNEST_MODE = UnnestMode.CROSS_JOIN_UNNEST
220
+ DATATYPE_MAP = DATATYPE_MAP
221
+
222
+ def get_table_schema(
223
+ self, executor, table_name: str, schema: str | None = None
224
+ ) -> list[tuple]:
225
+ """BigQuery uses dataset instead of schema and supports project.dataset.table format."""
226
+ table_name, schema = parse_bigquery_table_name(table_name, schema)
227
+
228
+ column_query = f"""
229
+ SELECT
230
+ column_name,
231
+ data_type,
232
+ is_nullable,
233
+ '' as column_comment
234
+ FROM `{schema}.INFORMATION_SCHEMA.COLUMNS`
235
+ WHERE table_name = '{table_name}'
236
+ ORDER BY ordinal_position
237
+ """
238
+
239
+ rows = executor.execute_raw_sql(column_query).fetchall()
240
+ return rows
241
+
242
+ def get_table_primary_keys(
243
+ self, executor, table_name: str, schema: str | None = None
244
+ ) -> list[str]:
245
+ """BigQuery doesn't enforce primary keys; rely on data-driven grain detection."""
246
+ table_name, schema = parse_bigquery_table_name(table_name, schema)
247
+
248
+ pk_query = f"""
249
+ SELECT column_name
250
+ FROM `{schema}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE`
251
+ WHERE table_name = '{table_name}'
252
+ AND constraint_name LIKE '%PRIMARY%'
253
+ """
254
+
255
+ rows = executor.execute_raw_sql(pk_query).fetchall()
256
+ return [row[0] for row in rows]
257
+
258
+ def render_array_unnest(
259
+ self,
260
+ left,
261
+ right,
262
+ operator: ComparisonOperator,
263
+ cte: CTE | UnionCTE | None = None,
264
+ cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
265
+ raise_invalid: bool = False,
266
+ ):
267
+ return f"{self.render_expr(left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {operator.value} unnest({self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)})"
268
+
269
+ def generate_partitioned_insert(
270
+ self,
271
+ query: ProcessedQueryPersist,
272
+ recursive: bool,
273
+ compiled_ctes: list[CompiledCTE],
274
+ ) -> str:
275
+ tmp_table = f"tmp__{uuid.uuid4().hex}"
276
+ final_select = compiled_ctes[-1].statement
277
+ ctes = compiled_ctes[:-1]
278
+
279
+ if not query.partition_by:
280
+ raise ValueError("partition_by must be set for partitioned inserts.")
281
+
282
+ partition_key = query.partition_by
283
+ target_table = safe_quote(
284
+ query.output_to.address.location, self.QUOTE_CHARACTER
285
+ )
286
+
287
+ # render intermediate CTEs
288
+ ctes_sql = ""
289
+ if ctes:
290
+ rendered = []
291
+ for c in ctes:
292
+ rendered.append(f"{c.name} AS ({c.statement})")
293
+ ctes_sql = "WITH " + ",\n".join(rendered)
294
+
295
+ # create temp table first
296
+ full_select_with_ctes = (
297
+ final_select if not ctes_sql else f"{ctes_sql}\n{final_select}"
298
+ )
299
+
300
+ sql_script = PARTITIONED_INSERT_TEMPLATE.render(
301
+ tmp_table=tmp_table,
302
+ final_select=full_select_with_ctes,
303
+ partition_key=partition_key,
304
+ target_table=target_table,
305
+ partition_type=self.DATATYPE_MAP[query.partition_types[0]],
306
+ )
307
+
308
+ return sql_script
@@ -0,0 +1,147 @@
1
+ from typing import Callable
2
+
3
+ from trilogy.core.constants import UNNEST_NAME
4
+ from trilogy.core.enums import Modifier, UnnestMode
5
+ from trilogy.core.models.build import (
6
+ BuildComparison,
7
+ BuildConcept,
8
+ BuildConditional,
9
+ BuildFunction,
10
+ BuildParamaterizedConceptReference,
11
+ BuildParenthetical,
12
+ )
13
+ from trilogy.core.models.execute import (
14
+ CTE,
15
+ InstantiatedUnnestJoin,
16
+ Join,
17
+ UnionCTE,
18
+ )
19
+
20
+
21
+ def render_unnest(
22
+ unnest_mode: UnnestMode,
23
+ quote_character: str,
24
+ concept: BuildConcept | BuildParamaterizedConceptReference | BuildFunction,
25
+ render_func: Callable[
26
+ [BuildConcept | BuildParamaterizedConceptReference | BuildFunction, CTE], str
27
+ ],
28
+ cte: CTE,
29
+ ):
30
+ if not isinstance(concept, (BuildConcept, BuildParamaterizedConceptReference)):
31
+ address = UNNEST_NAME
32
+ else:
33
+ address = concept.safe_address
34
+ if unnest_mode == UnnestMode.CROSS_JOIN:
35
+ return f"{render_func(concept, cte)} as {quote_character}{address}{quote_character}"
36
+ elif unnest_mode == UnnestMode.CROSS_JOIN_UNNEST:
37
+ return f"unnest({render_func(concept, cte)}) as {quote_character}{address}{quote_character}"
38
+ elif unnest_mode == UnnestMode.PRESTO:
39
+ return f"unnest({render_func(concept, cte)}) as t({quote_character}{UNNEST_NAME}{quote_character})"
40
+ elif unnest_mode == UnnestMode.CROSS_JOIN_ALIAS:
41
+ return f"{render_func(concept, cte)} as unnest_wrapper ({quote_character}{address}{quote_character})"
42
+ elif unnest_mode == UnnestMode.SNOWFLAKE:
43
+ # if we don't actually have a join, we're directly unnesting a concept, and we can skip the flatten
44
+ if not cte.render_from_clause:
45
+ return f"{render_func(concept, cte)} as unnest_wrapper ( unnest1, unnest2, unnest3, unnest4, {quote_character}{cte.join_derived_concepts[0].safe_address}{quote_character})"
46
+ # otherwise, flatten the concept for the join
47
+ return f"flatten({render_func(concept, cte)}) as unnest_wrapper ( unnest1, unnest2, unnest3, unnest4, {quote_character}{cte.join_derived_concepts[0].safe_address}{quote_character})"
48
+ return f"{render_func(concept, cte)} as {quote_character}{address}{quote_character}"
49
+
50
+
51
+ def render_join_concept(
52
+ name: str,
53
+ quote_character: str,
54
+ cte: CTE | UnionCTE,
55
+ concept: BuildConcept,
56
+ render_expr,
57
+ inlined_ctes: set[str],
58
+ use_map: dict[str, set[str]],
59
+ ):
60
+ if cte.name in inlined_ctes:
61
+ base = render_expr(concept, cte)
62
+ return base
63
+ use_map[name].add(concept.address)
64
+ return f"{quote_character}{name}{quote_character}.{quote_character}{concept.safe_address}{quote_character}"
65
+
66
+
67
+ def render_join(
68
+ join: Join | InstantiatedUnnestJoin,
69
+ quote_character: str,
70
+ render_expr_func: Callable[
71
+ [
72
+ BuildConcept
73
+ | BuildParamaterizedConceptReference
74
+ | BuildFunction
75
+ | BuildConditional
76
+ | BuildComparison
77
+ | BuildParenthetical,
78
+ CTE,
79
+ ],
80
+ str,
81
+ ],
82
+ cte: CTE,
83
+ use_map: dict[str, set[str]],
84
+ null_wrapper: Callable[[str, str, list[Modifier]], str],
85
+ unnest_mode: UnnestMode = UnnestMode.CROSS_APPLY,
86
+ ) -> str | None:
87
+ # {% for key in join.joinkeys %}{{ key.inner }} = {{ key.outer}}{% endfor %}
88
+ if isinstance(join, InstantiatedUnnestJoin):
89
+ if unnest_mode == UnnestMode.DIRECT:
90
+ return None
91
+ if not cte:
92
+ raise ValueError("must provide a cte to build an unnest joins")
93
+ if unnest_mode in (
94
+ UnnestMode.CROSS_JOIN,
95
+ UnnestMode.CROSS_JOIN_UNNEST,
96
+ UnnestMode.CROSS_JOIN_ALIAS,
97
+ UnnestMode.PRESTO,
98
+ ):
99
+ return f"CROSS JOIN {render_unnest(unnest_mode, quote_character, join.object_to_unnest, render_expr_func, cte)}"
100
+ if unnest_mode == UnnestMode.SNOWFLAKE:
101
+ return f"LEFT JOIN LATERAL {render_unnest(unnest_mode, quote_character, join.object_to_unnest, render_expr_func, cte)}"
102
+ return f"FULL JOIN {render_unnest(unnest_mode, quote_character, join.object_to_unnest, render_expr_func, cte)}"
103
+ # left_name = join.left_name
104
+ right_name = join.right_name
105
+ join.quote = quote_character
106
+ # if cte.quote_address.get(join.right_name, False):
107
+ # join.quote = quote_character
108
+ right_base = join.right_ref
109
+ base_joinkeys = []
110
+ if join.joinkey_pairs:
111
+ base_joinkeys.extend(
112
+ [
113
+ null_wrapper(
114
+ render_join_concept(
115
+ join.get_name(pair.cte),
116
+ quote_character,
117
+ pair.cte,
118
+ pair.left,
119
+ render_expr_func,
120
+ join.inlined_ctes,
121
+ use_map=use_map,
122
+ ),
123
+ render_join_concept(
124
+ right_name,
125
+ quote_character,
126
+ join.right_cte,
127
+ pair.right,
128
+ render_expr_func,
129
+ join.inlined_ctes,
130
+ use_map=use_map,
131
+ ),
132
+ pair.modifiers
133
+ + (pair.left.modifiers or [])
134
+ + (pair.right.modifiers or [])
135
+ + (join.modifiers or []),
136
+ )
137
+ for pair in join.joinkey_pairs
138
+ ]
139
+ )
140
+ if not base_joinkeys:
141
+ base_joinkeys = ["1=1"]
142
+
143
+ joinkeys = " AND ".join(sorted(base_joinkeys))
144
+ base = f"{join.jointype.value.upper()} JOIN {right_base} on {joinkeys}"
145
+ if join.condition:
146
+ base = f"{base} and {render_expr_func(join.condition, cte)}"
147
+ return base
@@ -0,0 +1,144 @@
1
+ from typing import TYPE_CHECKING, Any
2
+
3
+ if TYPE_CHECKING:
4
+ try:
5
+ from pandas import DataFrame
6
+ except ImportError:
7
+ DataFrame = Any
8
+
9
+
10
+ class DialectConfig:
11
+ def __init__(self):
12
+ pass
13
+
14
+ def connection_string(self) -> str:
15
+ raise NotImplementedError
16
+
17
+ def create_connect_args(self) -> dict:
18
+ return {}
19
+
20
+ def merge_config(self, other: "DialectConfig") -> "DialectConfig":
21
+ for key, value in other.__dict__.items():
22
+ if value is not None:
23
+ setattr(self, key, value)
24
+ return self
25
+
26
+
27
+ class BigQueryConfig(DialectConfig):
28
+ def __init__(self, project: str | None = None, client: Any | None = None):
29
+ self.project = project
30
+ self.client = client
31
+
32
+ def connection_string(self) -> str:
33
+ return f"bigquery://{self.project}?user_supplied_client=True"
34
+
35
+ def create_connect_args(self) -> dict:
36
+ if not self.client:
37
+ from google.auth import default
38
+ from google.cloud import bigquery
39
+
40
+ credentials, project = default()
41
+ self.client = bigquery.Client(credentials=credentials, project=project)
42
+ self.project = project
43
+
44
+ return {"client": self.client}
45
+
46
+
47
+ class DuckDBConfig(DialectConfig):
48
+ def __init__(self, path: str | None = None):
49
+ self.path = path
50
+ self.guid = id(self)
51
+
52
+ def connection_string(self) -> str:
53
+ if not self.path:
54
+ return "duckdb:///:memory:"
55
+ return f"duckdb:///{self.path}"
56
+
57
+
58
+ class PostgresConfig(DialectConfig):
59
+ def __init__(
60
+ self, host: str, port: int, username: str, password: str, database: str
61
+ ):
62
+ self.host = host
63
+ self.port = port
64
+ self.username = username
65
+ self.password = password
66
+ self.database = database
67
+
68
+ def connection_string(self) -> str:
69
+ return f"postgresql://{self.username}:{self.password}@{self.host}:{self.port}"
70
+
71
+
72
+ class SQLServerConfig(DialectConfig):
73
+ def __init__(
74
+ self, host: str, port: int, username: str, password: str, database: str
75
+ ):
76
+ self.host = host
77
+ self.port = port
78
+ self.username = username
79
+ self.password = password
80
+ self.database = database
81
+
82
+ def connection_string(self) -> str:
83
+ return f"sqlserver//{self.username}:{self.password}@{self.host}:{self.port}"
84
+
85
+
86
+ class SnowflakeConfig(DialectConfig):
87
+ def __init__(
88
+ self,
89
+ account: str,
90
+ username: str,
91
+ password: str,
92
+ database: str | None = None,
93
+ schema: str | None = None,
94
+ ):
95
+ self.account = account
96
+ self.username = username
97
+ self.password = password
98
+ self.database = database
99
+ self.schema = schema
100
+ if self.schema and not self.database:
101
+ raise ValueError("Setting snowflake schema also requires setting database")
102
+
103
+ def connection_string(self) -> str:
104
+ if self.schema:
105
+ return f"snowflake://{self.username}:{self.password}@{self.account}/{self.database}/{self.schema}"
106
+ if self.database:
107
+ return f"snowflake://{self.username}:{self.password}@{self.account}/{self.database}"
108
+ return f"snowflake://{self.username}:{self.password}@{self.account}"
109
+
110
+
111
+ class PrestoConfig(DialectConfig):
112
+ def __init__(
113
+ self,
114
+ host: str,
115
+ port: int,
116
+ username: str,
117
+ password: str,
118
+ catalog: str,
119
+ schema: str | None = None,
120
+ ):
121
+ self.host = host
122
+ self.port = port
123
+ self.username = username
124
+ self.password = password
125
+ self.catalog = catalog
126
+ self.schema = schema
127
+
128
+ def connection_string(self) -> str:
129
+ if self.schema:
130
+ return f"presto://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}/{self.schema}"
131
+ return f"presto://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}"
132
+
133
+
134
+ class TrinoConfig(PrestoConfig):
135
+ def connection_string(self) -> str:
136
+ if self.schema:
137
+ return f"trino://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}/{self.schema}"
138
+ return f"trino://{self.username}:{self.password}@{self.host}:{self.port}/{self.catalog}"
139
+
140
+
141
+ class DataFrameConfig(DuckDBConfig):
142
+ def __init__(self, dataframes: dict[str, "DataFrame"]):
143
+ super().__init__()
144
+ self.dataframes = dataframes
@@ -0,0 +1,50 @@
1
+ from typing import TYPE_CHECKING, Any
2
+
3
+ from sqlalchemy import text
4
+
5
+ from trilogy.core.models.environment import Environment
6
+ from trilogy.dialect.duckdb import DuckDBDialect
7
+ from trilogy.engine import ExecutionEngine
8
+
9
+ if TYPE_CHECKING:
10
+ try:
11
+ from pandas import DataFrame
12
+ except ImportError:
13
+ DataFrame = Any
14
+
15
+
16
+ class DataframeDialect(DuckDBDialect):
17
+ pass
18
+
19
+
20
+ class DataframeConnectionWrapper(ExecutionEngine):
21
+ def __init__(self, engine: ExecutionEngine, dataframes: dict[str, "DataFrame"]):
22
+ self.engine = engine
23
+ self.dataframes = dataframes
24
+ self.connection = None
25
+
26
+ def setup(self, env: Environment, connection):
27
+ self._register_dataframes(env, connection)
28
+
29
+ def _register_dataframes(self, env: Environment, connection):
30
+ for ds in env.datasources.values():
31
+ if ds.safe_address in self.dataframes:
32
+ connection.execute(
33
+ text("register(:name, :df)"),
34
+ {"name": ds.safe_address, "df": self.dataframes[ds.safe_address]},
35
+ )
36
+ else:
37
+ raise ValueError(
38
+ f"Dataframe {ds.safe_address} not found in dataframes on connection config, have {self.dataframes.keys()}"
39
+ )
40
+ pass
41
+
42
+ def add_dataframe(self, name: str, df: "DataFrame", connection, env: Environment):
43
+ self.dataframes[name] = df
44
+ self._register_dataframes(env, connection)
45
+
46
+ def connect(self) -> Any:
47
+ return self.engine.connect()
48
+
49
+ def dispose(self, close=True):
50
+ return super().dispose(close)