pytrilogy 0.3.149__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp313-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.149.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.149.dist-info/RECORD +207 -0
  6. pytrilogy-0.3.149.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.149.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.149.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2670 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +436 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +846 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1432 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +397 -0
  117. trilogy/dialect/enums.py +151 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/exceptions.py +26 -0
  130. trilogy/execution/state/file_state_store.py +0 -0
  131. trilogy/execution/state/sqllite_state_store.py +0 -0
  132. trilogy/execution/state/state_store.py +406 -0
  133. trilogy/executor.py +692 -0
  134. trilogy/hooks/__init__.py +4 -0
  135. trilogy/hooks/base_hook.py +40 -0
  136. trilogy/hooks/graph_hook.py +135 -0
  137. trilogy/hooks/query_debugger.py +166 -0
  138. trilogy/metadata/__init__.py +0 -0
  139. trilogy/parser.py +10 -0
  140. trilogy/parsing/README.md +21 -0
  141. trilogy/parsing/__init__.py +0 -0
  142. trilogy/parsing/common.py +1069 -0
  143. trilogy/parsing/config.py +5 -0
  144. trilogy/parsing/exceptions.py +8 -0
  145. trilogy/parsing/helpers.py +1 -0
  146. trilogy/parsing/parse_engine.py +2876 -0
  147. trilogy/parsing/render.py +775 -0
  148. trilogy/parsing/trilogy.lark +546 -0
  149. trilogy/py.typed +0 -0
  150. trilogy/render.py +45 -0
  151. trilogy/scripts/README.md +9 -0
  152. trilogy/scripts/__init__.py +0 -0
  153. trilogy/scripts/agent.py +41 -0
  154. trilogy/scripts/agent_info.py +306 -0
  155. trilogy/scripts/common.py +432 -0
  156. trilogy/scripts/dependency/Cargo.lock +617 -0
  157. trilogy/scripts/dependency/Cargo.toml +39 -0
  158. trilogy/scripts/dependency/README.md +131 -0
  159. trilogy/scripts/dependency/build.sh +25 -0
  160. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  161. trilogy/scripts/dependency/src/lib.rs +16 -0
  162. trilogy/scripts/dependency/src/main.rs +770 -0
  163. trilogy/scripts/dependency/src/parser.rs +435 -0
  164. trilogy/scripts/dependency/src/preql.pest +208 -0
  165. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  166. trilogy/scripts/dependency/src/resolver.rs +716 -0
  167. trilogy/scripts/dependency/tests/base.preql +3 -0
  168. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  169. trilogy/scripts/dependency/tests/customer.preql +6 -0
  170. trilogy/scripts/dependency/tests/main.preql +9 -0
  171. trilogy/scripts/dependency/tests/orders.preql +7 -0
  172. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  173. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  174. trilogy/scripts/dependency.py +323 -0
  175. trilogy/scripts/display.py +555 -0
  176. trilogy/scripts/environment.py +59 -0
  177. trilogy/scripts/fmt.py +32 -0
  178. trilogy/scripts/ingest.py +487 -0
  179. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  180. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  181. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  182. trilogy/scripts/ingest_helpers/typing.py +161 -0
  183. trilogy/scripts/init.py +105 -0
  184. trilogy/scripts/parallel_execution.py +762 -0
  185. trilogy/scripts/plan.py +189 -0
  186. trilogy/scripts/refresh.py +161 -0
  187. trilogy/scripts/run.py +79 -0
  188. trilogy/scripts/serve.py +202 -0
  189. trilogy/scripts/serve_helpers/__init__.py +41 -0
  190. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  191. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  192. trilogy/scripts/serve_helpers/models.py +38 -0
  193. trilogy/scripts/single_execution.py +131 -0
  194. trilogy/scripts/testing.py +143 -0
  195. trilogy/scripts/trilogy.py +75 -0
  196. trilogy/std/__init__.py +0 -0
  197. trilogy/std/color.preql +3 -0
  198. trilogy/std/date.preql +13 -0
  199. trilogy/std/display.preql +18 -0
  200. trilogy/std/geography.preql +22 -0
  201. trilogy/std/metric.preql +15 -0
  202. trilogy/std/money.preql +67 -0
  203. trilogy/std/net.preql +14 -0
  204. trilogy/std/ranking.preql +7 -0
  205. trilogy/std/report.preql +5 -0
  206. trilogy/std/semantic.preql +6 -0
  207. trilogy/utility.py +34 -0
@@ -0,0 +1,1432 @@
1
+ from collections import defaultdict
2
+ from datetime import date, datetime
3
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Union
4
+
5
+ if TYPE_CHECKING:
6
+ from trilogy.dialect.config import DialectConfig
7
+
8
+ from jinja2 import Template
9
+
10
+ from trilogy.constants import (
11
+ CONFIG,
12
+ DEFAULT_NAMESPACE,
13
+ MagicConstants,
14
+ Rendering,
15
+ logger,
16
+ )
17
+ from trilogy.core.constants import UNNEST_NAME
18
+ from trilogy.core.enums import (
19
+ AddressType,
20
+ ComparisonOperator,
21
+ CreateMode,
22
+ DatePart,
23
+ FunctionType,
24
+ GroupMode,
25
+ Modifier,
26
+ Ordering,
27
+ PersistMode,
28
+ ShowCategory,
29
+ UnnestMode,
30
+ WindowType,
31
+ )
32
+ from trilogy.core.internal import DEFAULT_CONCEPTS
33
+ from trilogy.core.models.author import ArgBinding, arg_to_datatype
34
+ from trilogy.core.models.build import (
35
+ BuildAggregateWrapper,
36
+ BuildCaseElse,
37
+ BuildCaseWhen,
38
+ BuildComparison,
39
+ BuildConcept,
40
+ BuildConditional,
41
+ BuildFilterItem,
42
+ BuildFunction,
43
+ BuildMultiSelectLineage,
44
+ BuildOrderItem,
45
+ BuildParamaterizedConceptReference,
46
+ BuildParenthetical,
47
+ BuildRowsetItem,
48
+ BuildSubselectComparison,
49
+ BuildWindowItem,
50
+ )
51
+ from trilogy.core.models.core import (
52
+ ArrayType,
53
+ DataType,
54
+ ListWrapper,
55
+ MapType,
56
+ MapWrapper,
57
+ NumericType,
58
+ StructType,
59
+ TraitDataType,
60
+ TupleWrapper,
61
+ )
62
+ from trilogy.core.models.datasource import Address, Datasource, RawColumnExpr
63
+ from trilogy.core.models.environment import Environment
64
+ from trilogy.core.models.execute import CTE, CompiledCTE, RecursiveCTE, UnionCTE
65
+ from trilogy.core.processing.utility import (
66
+ decompose_condition,
67
+ is_scalar_condition,
68
+ sort_select_output,
69
+ )
70
+ from trilogy.core.query_processor import process_copy, process_persist, process_query
71
+ from trilogy.core.statements.author import (
72
+ ConceptDeclarationStatement,
73
+ CopyStatement,
74
+ CreateStatement,
75
+ FunctionDeclaration,
76
+ ImportStatement,
77
+ MergeStatementV2,
78
+ MockStatement,
79
+ MultiSelectStatement,
80
+ PersistStatement,
81
+ PublishStatement,
82
+ RawSQLStatement,
83
+ RowsetDerivationStatement,
84
+ SelectStatement,
85
+ ShowStatement,
86
+ ValidateStatement,
87
+ )
88
+ from trilogy.core.statements.execute import (
89
+ PROCESSED_STATEMENT_TYPES,
90
+ ProcessedCopyStatement,
91
+ ProcessedCreateStatement,
92
+ ProcessedMockStatement,
93
+ ProcessedPublishStatement,
94
+ ProcessedQuery,
95
+ ProcessedQueryPersist,
96
+ ProcessedRawSQLStatement,
97
+ ProcessedShowStatement,
98
+ ProcessedStaticValueOutput,
99
+ ProcessedValidateStatement,
100
+ )
101
+ from trilogy.core.table_processor import (
102
+ CreateTableInfo,
103
+ datasource_to_create_table_info,
104
+ process_create_statement,
105
+ )
106
+ from trilogy.core.utility import safe_quote
107
+ from trilogy.dialect.common import render_join, render_unnest
108
+ from trilogy.hooks.base_hook import BaseHook
109
+
110
+
111
+ def null_wrapper(lval: str, rval: str, modifiers: list[Modifier]) -> str:
112
+
113
+ if Modifier.NULLABLE in modifiers:
114
+ return f"({lval} = {rval} or ({lval} is null and {rval} is null))"
115
+ return f"{lval} = {rval}"
116
+
117
+
118
+ LOGGER_PREFIX = "[RENDERING]"
119
+
120
+ WINDOW_ITEMS = (BuildWindowItem,)
121
+ FILTER_ITEMS = (BuildFilterItem,)
122
+ AGGREGATE_ITEMS = (BuildAggregateWrapper,)
123
+ FUNCTION_ITEMS = (BuildFunction,)
124
+ PARENTHETICAL_ITEMS = (BuildParenthetical,)
125
+ CASE_WHEN_ITEMS = (BuildCaseWhen,)
126
+ CASE_ELSE_ITEMS = (BuildCaseElse,)
127
+ SUBSELECT_COMPARISON_ITEMS = (BuildSubselectComparison,)
128
+ COMPARISON_ITEMS = (BuildComparison,)
129
+ CONDITIONAL_ITEMS = (BuildConditional,)
130
+
131
+
132
+ def INVALID_REFERENCE_STRING(x: Any, callsite: str = ""):
133
+ # if CONFIG.validate_missing:
134
+ # raise SyntaxError(f"INVALID_REFERENCE_BUG_{callsite}<{x}>")
135
+
136
+ return f"INVALID_REFERENCE_BUG_{callsite}<{x}>"
137
+
138
+
139
+ def window_factory(string: str, include_concept: bool = False) -> Callable:
140
+ def render_window(
141
+ concept: str, window: str, sort: str, offset: int | None = None
142
+ ) -> str:
143
+ if not include_concept:
144
+ concept = ""
145
+ if offset is not None:
146
+ base = f"{string}({concept}, {offset})"
147
+ else:
148
+ base = f"{string}({concept})"
149
+ if window and sort:
150
+ return f"{base} over (partition by {window} order by {sort} )"
151
+ elif window:
152
+ return f"{base} over (partition by {window})"
153
+ elif sort:
154
+ return f"{base} over (order by {sort} )"
155
+ else:
156
+ return f"{base} over ()"
157
+
158
+ return render_window
159
+
160
+
161
+ WINDOW_FUNCTION_MAP = {
162
+ WindowType.LAG: window_factory("lag", include_concept=True),
163
+ WindowType.LEAD: window_factory("lead", include_concept=True),
164
+ WindowType.RANK: window_factory("rank"),
165
+ WindowType.ROW_NUMBER: window_factory("row_number"),
166
+ WindowType.SUM: window_factory("sum", include_concept=True),
167
+ WindowType.COUNT: window_factory("count", include_concept=True),
168
+ WindowType.AVG: window_factory("avg", include_concept=True),
169
+ }
170
+
171
+ DATATYPE_MAP: dict[DataType, str] = {
172
+ DataType.STRING: "string",
173
+ DataType.INTEGER: "int",
174
+ DataType.FLOAT: "float",
175
+ DataType.BOOL: "bool",
176
+ DataType.NUMERIC: "numeric",
177
+ DataType.MAP: "map",
178
+ DataType.DATE: "date",
179
+ DataType.DATETIME: "datetime",
180
+ DataType.ARRAY: "list",
181
+ }
182
+
183
+ COMPLEX_DATATYPE_MAP = {
184
+ DataType.ARRAY: lambda x: f"{x}[]",
185
+ }
186
+
187
+
188
+ def render_case(args):
189
+ return "CASE\n\t" + "\n\t".join(args) + "\n\tEND"
190
+
191
+
192
+ def struct_arg(args):
193
+ return [f"{x[1]}: {x[0]}" for x in zip(args[::2], args[1::2])]
194
+
195
+
196
+ def hash_from_args(val, hash_type):
197
+ hash_type = hash_type[1:-1]
198
+ if hash_type.lower() == "md5":
199
+ return f"md5({val})"
200
+ elif hash_type.lower() == "sha1":
201
+ return f"sha1({val})"
202
+ elif hash_type.lower() == "sha256":
203
+ return f"sha256({val})"
204
+ elif hash_type.lower() == "sha512":
205
+ return f"sha512({val})"
206
+ else:
207
+ raise ValueError(f"Unsupported hash type: {hash_type}")
208
+
209
+
210
+ FUNCTION_MAP = {
211
+ # generic types
212
+ FunctionType.ALIAS: lambda x, types: f"{x[0]}",
213
+ FunctionType.GROUP: lambda x, types: f"{x[0]}",
214
+ FunctionType.CONSTANT: lambda x, types: f"{x[0]}",
215
+ FunctionType.TYPED_CONSTANT: lambda x, types: f"{x[0]}",
216
+ FunctionType.COALESCE: lambda x, types: f"coalesce({','.join(x)})",
217
+ FunctionType.NULLIF: lambda x, types: f"nullif({x[0]},{x[1]})",
218
+ FunctionType.CAST: lambda x, types: f"cast({x[0]} as {x[1]})",
219
+ FunctionType.CASE: lambda x, types: render_case(x),
220
+ FunctionType.SPLIT: lambda x, types: f"split({x[0]}, {x[1]})",
221
+ FunctionType.IS_NULL: lambda x, types: f"{x[0]} is null",
222
+ FunctionType.BOOL: lambda x, types: f"CASE WHEN {x[0]} THEN TRUE ELSE FALSE END",
223
+ FunctionType.PARENTHETICAL: lambda x, types: f"({x[0]})",
224
+ # Complex
225
+ FunctionType.INDEX_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
226
+ FunctionType.MAP_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
227
+ FunctionType.UNNEST: lambda x, types: f"unnest({x[0]})",
228
+ FunctionType.DATE_SPINE: lambda x, types: f"""unnest(
229
+ generate_series(
230
+ {x[0]},
231
+ {x[1]},
232
+ INTERVAL '1 day'
233
+ )
234
+ )""",
235
+ FunctionType.RECURSE_EDGE: lambda x, types: f"CASE WHEN {x[1]} IS NULL THEN {x[0]} ELSE {x[1]} END",
236
+ FunctionType.ATTR_ACCESS: lambda x, types: f"""{x[0]}.{x[1].replace("'", "")}""",
237
+ FunctionType.STRUCT: lambda x, types: f"{{{', '.join(struct_arg(x))}}}",
238
+ FunctionType.ARRAY: lambda x, types: f"[{', '.join(x)}]",
239
+ FunctionType.DATE_LITERAL: lambda x, types: f"date '{x}'",
240
+ FunctionType.DATETIME_LITERAL: lambda x, types: f"datetime '{x}'",
241
+ # MAP
242
+ FunctionType.MAP_KEYS: lambda x, types: f"map_keys({x[0]})",
243
+ FunctionType.MAP_VALUES: lambda x, types: f"map_values({x[0]})",
244
+ # ARRAY
245
+ FunctionType.GENERATE_ARRAY: lambda x, types: f"generate_series({x[0]}, {x[1]}, {x[2]})",
246
+ FunctionType.ARRAY_SUM: lambda x, types: f"array_sum({x[0]})",
247
+ FunctionType.ARRAY_DISTINCT: lambda x, types: f"array_distinct({x[0]})",
248
+ FunctionType.ARRAY_SORT: lambda x, types: f"array_sort({x[0]})",
249
+ FunctionType.ARRAY_TRANSFORM: lambda args, types: (
250
+ f"array_transform({args[0]}, {args[1]} -> {args[2]})"
251
+ ),
252
+ FunctionType.ARRAY_TO_STRING: lambda args, types: (
253
+ f"array_to_string({args[0]}, {args[1]})"
254
+ ),
255
+ FunctionType.ARRAY_FILTER: lambda args, types: (
256
+ f"array_filter({args[0]}, {args[1]} -> {args[2]})"
257
+ ),
258
+ # math
259
+ FunctionType.ADD: lambda x, types: " + ".join(x),
260
+ FunctionType.ABS: lambda x, types: f"abs({x[0]})",
261
+ FunctionType.SUBTRACT: lambda x, types: " - ".join(x),
262
+ FunctionType.DIVIDE: lambda x, types: " / ".join(x),
263
+ FunctionType.MULTIPLY: lambda x, types: " * ".join(x),
264
+ FunctionType.ROUND: lambda x, types: f"round({x[0]},{x[1]})",
265
+ FunctionType.FLOOR: lambda x, types: f"floor({x[0]})",
266
+ FunctionType.CEIL: lambda x, types: f"ceil({x[0]})",
267
+ FunctionType.MOD: lambda x, types: f"{x[0]} % {x[1]}",
268
+ FunctionType.POWER: lambda x, types: f"{x[0]} ** {x[1]}",
269
+ FunctionType.SQRT: lambda x, types: f"sqrt({x[0]})",
270
+ FunctionType.RANDOM: lambda x, types: "random()",
271
+ FunctionType.LOG: lambda x, types: (
272
+ f"log({x[0]})" if x[1] == 10 else f"log({x[0]}, {x[1]})"
273
+ ),
274
+ # aggregate types
275
+ FunctionType.COUNT_DISTINCT: lambda x, types: f"count(distinct {x[0]})",
276
+ FunctionType.COUNT: lambda x, types: f"count({x[0]})",
277
+ FunctionType.SUM: lambda x, types: f"sum({x[0]})",
278
+ FunctionType.ARRAY_AGG: lambda x, types: f"array_agg({x[0]})",
279
+ FunctionType.LENGTH: lambda x, types: f"length({x[0]})",
280
+ FunctionType.AVG: lambda x, types: f"avg({x[0]})",
281
+ FunctionType.MAX: lambda x, types: f"max({x[0]})",
282
+ FunctionType.MIN: lambda x, types: f"min({x[0]})",
283
+ FunctionType.ANY: lambda x, types: f"any_value({x[0]})",
284
+ FunctionType.BOOL_OR: lambda x, types: f"bool_or({x[0]})",
285
+ FunctionType.BOOL_AND: lambda x, types: f"bool_and({x[0]})",
286
+ # string types
287
+ FunctionType.LIKE: lambda x, types: f" {x[0]} like {x[1]} ",
288
+ FunctionType.UPPER: lambda x, types: f"UPPER({x[0]}) ",
289
+ FunctionType.LOWER: lambda x, types: f"LOWER({x[0]}) ",
290
+ FunctionType.SUBSTRING: lambda x, types: f"SUBSTRING({x[0]},{x[1]},{x[2]})",
291
+ FunctionType.STRPOS: lambda x, types: f"STRPOS({x[0]},{x[1]})",
292
+ FunctionType.CONTAINS: lambda x, types: f"CONTAINS({x[0]},{x[1]})",
293
+ FunctionType.REGEXP_CONTAINS: lambda x, types: f"REGEXP_CONTAINS({x[0]},{x[1]})",
294
+ FunctionType.REGEXP_EXTRACT: lambda x, types: f"REGEXP_EXTRACT({x[0]},{x[1]})",
295
+ FunctionType.REGEXP_REPLACE: lambda x, types: f"REGEXP_REPLACE({x[0]},{x[1]}, {x[2]})",
296
+ FunctionType.TRIM: lambda x, types: f"TRIM({x[0]})",
297
+ FunctionType.REPLACE: lambda x, types: f"REPLACE({x[0]},{x[1]},{x[2]})",
298
+ FunctionType.HASH: lambda x, types: hash_from_args(x[0], x[1]),
299
+ # FunctionType.NOT_LIKE: lambda x: f" CASE WHEN {x[0]} like {x[1]} THEN 0 ELSE 1 END",
300
+ # date types
301
+ FunctionType.DATE_TRUNCATE: lambda x, types: f"date_trunc({x[0]},{x[1]})",
302
+ FunctionType.DATE_PART: lambda x, types: f"date_part({x[0]},{x[1]})",
303
+ FunctionType.DATE_ADD: lambda x, types: f"date_add({x[0]},{x[1]}, {x[2]})",
304
+ FunctionType.DATE_SUB: lambda x, types: f"date_sub({x[0]},{x[1]}, {x[2]})",
305
+ FunctionType.DATE_DIFF: lambda x, types: f"date_diff({x[0]},{x[1]}, {x[2]})",
306
+ FunctionType.DATE: lambda x, types: f"date({x[0]})",
307
+ FunctionType.DATETIME: lambda x, types: f"datetime({x[0]})",
308
+ FunctionType.TIMESTAMP: lambda x, types: f"timestamp({x[0]})",
309
+ FunctionType.SECOND: lambda x, types: f"second({x[0]})",
310
+ FunctionType.MINUTE: lambda x, types: f"minute({x[0]})",
311
+ FunctionType.HOUR: lambda x, types: f"hour({x[0]})",
312
+ FunctionType.DAY: lambda x, types: f"day({x[0]})",
313
+ FunctionType.DAY_NAME: lambda x, types: f"dayname({x[0]})",
314
+ FunctionType.DAY_OF_WEEK: lambda x, types: f"day_of_week({x[0]})",
315
+ FunctionType.WEEK: lambda x, types: f"week({x[0]})",
316
+ FunctionType.MONTH: lambda x, types: f"month({x[0]})",
317
+ FunctionType.MONTH_NAME: lambda x, types: f"monthname({x[0]})",
318
+ FunctionType.QUARTER: lambda x, types: f"quarter({x[0]})",
319
+ FunctionType.YEAR: lambda x, types: f"year({x[0]})",
320
+ # string types
321
+ FunctionType.CONCAT: lambda x, types: f"concat({','.join(x)})",
322
+ # constant types
323
+ FunctionType.CURRENT_DATE: lambda x, types: "current_date()",
324
+ FunctionType.CURRENT_DATETIME: lambda x, types: "current_datetime()",
325
+ }
326
+
327
+ FUNCTION_GRAIN_MATCH_MAP = {
328
+ **FUNCTION_MAP,
329
+ FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
330
+ FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
331
+ FunctionType.SUM: lambda args, types: f"{args[0]}",
332
+ FunctionType.AVG: lambda args, types: f"{args[0]}",
333
+ FunctionType.MAX: lambda args, types: f"{args[0]}",
334
+ FunctionType.MIN: lambda args, types: f"{args[0]}",
335
+ FunctionType.ANY: lambda args, types: f"{args[0]}",
336
+ }
337
+
338
+
339
+ GENERIC_SQL_TEMPLATE: Template = Template(
340
+ """{%- if ctes %}
341
+ WITH {% if recursive%} RECURSIVE {% endif %}{% for cte in ctes %}
342
+ {{cte.name}} as (
343
+ {{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
344
+ {%- if full_select -%}
345
+ {{full_select}}
346
+ {% else -%}
347
+ SELECT
348
+ {%- if limit is not none %}
349
+ TOP {{ limit }}{% endif %}
350
+ {%- for select in select_columns %}
351
+ \t{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
352
+ {% if base %}FROM
353
+ \t{{ base }}{% endif %}{% if joins %}{% for join in joins %}
354
+ \t{{ join }}{% endfor %}{% endif %}{% if where %}
355
+ WHERE
356
+ \t{{ where }}{% endif %}{%- if group_by %}
357
+ GROUP BY {% for group in group_by %}
358
+ \t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
359
+ HAVING
360
+ \t{{ having }}{% endif %}{%- if order_by %}
361
+ ORDER BY{% for order in order_by %}
362
+ \t{{ order }}{% if not loop.last %},{% endif %}{% endfor %}
363
+ {% endif %}{% endif %}
364
+ """
365
+ )
366
+
367
+
368
+ CREATE_TABLE_SQL_TEMPLATE = Template(
369
+ """
370
+ CREATE {% if create_mode == "create_or_replace" %}OR REPLACE TABLE{% elif create_mode == "create_if_not_exists" %}TABLE IF NOT EXISTS{% else %}TABLE{% endif %} {{ name }} (
371
+ {%- for column in columns %}
372
+ {{ column.name }} {{ type_map[column.name] }}{% if column.comment %} COMMENT '{{ column.comment }}'{% endif %}{% if not loop.last %},{% endif %}
373
+ {%- endfor %}
374
+ )
375
+ {%- if partition_keys %}
376
+ PARTITIONED BY (
377
+ {%- for partition_key in partition_keys %}
378
+ {{ partition_key }}{% if not loop.last %},{% endif %}
379
+ {%- endfor %}
380
+ )
381
+ {%- endif %};
382
+ """.strip()
383
+ )
384
+
385
+
386
+ def safe_get_cte_value(
387
+ coalesce: Callable,
388
+ cte: CTE | UnionCTE,
389
+ c: BuildConcept,
390
+ quote_char: str,
391
+ render_expr: Callable,
392
+ use_map: dict[str, set[str]],
393
+ ) -> Optional[str]:
394
+ address = c.address
395
+ raw = cte.source_map.get(address, None)
396
+
397
+ if not raw:
398
+ return None
399
+ if isinstance(raw, str):
400
+ rendered = cte.get_alias(c, raw)
401
+ use_map[raw].add(c.address)
402
+ return f"{quote_char}{raw}{quote_char}.{safe_quote(rendered, quote_char)}"
403
+ if isinstance(raw, list) and len(raw) == 1:
404
+ rendered = cte.get_alias(c, raw[0])
405
+ if isinstance(rendered, FUNCTION_ITEMS):
406
+ # if it's a function, we need to render it as a function
407
+ return f"{render_expr(rendered, cte=cte, raise_invalid=True)}"
408
+ use_map[raw[0]].add(c.address)
409
+ return f"{quote_char}{raw[0]}{quote_char}.{safe_quote(rendered, quote_char)}"
410
+ for x in raw:
411
+ use_map[x].add(c.address)
412
+ return coalesce(
413
+ sorted(
414
+ [
415
+ f"{quote_char}{x}{quote_char}.{safe_quote(cte.get_alias(c, x), quote_char)}"
416
+ for x in raw
417
+ ]
418
+ ),
419
+ [],
420
+ )
421
+
422
+
423
+ class BaseDialect:
424
+ WINDOW_FUNCTION_MAP = WINDOW_FUNCTION_MAP
425
+ FUNCTION_MAP = FUNCTION_MAP
426
+ FUNCTION_GRAIN_MATCH_MAP = FUNCTION_GRAIN_MATCH_MAP
427
+ QUOTE_CHARACTER = "`"
428
+ SQL_TEMPLATE = GENERIC_SQL_TEMPLATE
429
+ CREATE_TABLE_SQL_TEMPLATE = CREATE_TABLE_SQL_TEMPLATE
430
+ DATATYPE_MAP = DATATYPE_MAP
431
+ COMPLEX_DATATYPE_MAP = COMPLEX_DATATYPE_MAP
432
+ UNNEST_MODE = UnnestMode.CROSS_APPLY
433
+ GROUP_MODE = GroupMode.AUTO
434
+ EXPLAIN_KEYWORD = "EXPLAIN"
435
+ NULL_WRAPPER = staticmethod(null_wrapper)
436
+ ALIAS_ORDER_REFERENCING_ALLOWED = True
437
+ TABLE_NOT_FOUND_PATTERN: str | None = None # Dialect-specific pattern to match
438
+ HTTP_NOT_FOUND_PATTERN: str | None = None # Pattern for HTTP 404 errors (e.g., GCS)
439
+
440
+ def __init__(
441
+ self,
442
+ rendering: Rendering | None = None,
443
+ config: "DialectConfig | None" = None,
444
+ ):
445
+ self.rendering = rendering or CONFIG.rendering
446
+ self.config = config
447
+ self.used_map: dict[str, set[str]] = defaultdict(set)
448
+
449
+ def render_source(self, address: Address) -> str:
450
+ if address.type == AddressType.QUERY:
451
+ return f"({address.location})"
452
+ if address.is_file:
453
+ if address.type == AddressType.SQL:
454
+ with open(address.location, "r", encoding="utf-8") as f:
455
+ return f"({f.read()})"
456
+ raise NotImplementedError(
457
+ f"File source type {address.type} not supported by this dialect"
458
+ )
459
+ return self.safe_quote(address.location)
460
+
461
+ def get_table_schema(
462
+ self, executor, table_name: str, schema: str | None = None
463
+ ) -> list[tuple]:
464
+ raise NotImplementedError
465
+
466
+ def get_table_primary_keys(
467
+ self, executor, table_name: str, schema: str | None = None
468
+ ) -> list[str]:
469
+ raise NotImplementedError
470
+
471
+ def get_table_sample(
472
+ self,
473
+ executor,
474
+ table_name: str,
475
+ schema: str | None = None,
476
+ sample_size: int = 10000,
477
+ ) -> list[tuple]:
478
+ if schema:
479
+ qualified_name = f"{schema}.{table_name}"
480
+ else:
481
+ qualified_name = table_name
482
+
483
+ sample_query = (
484
+ f"SELECT * FROM {self.safe_quote(qualified_name)} LIMIT {sample_size}"
485
+ )
486
+ rows = executor.execute_raw_sql(sample_query).fetchall()
487
+ return rows
488
+
489
+ def get_table_last_modified(
490
+ self, executor, table_name: str, schema: str | None = None
491
+ ) -> str | None:
492
+ from datetime import datetime, timezone
493
+
494
+ return datetime.now(timezone.utc).isoformat()
495
+
496
+ def hash_column_value(self, column_name: str) -> str:
497
+ return f"md5(CAST({self.safe_quote(column_name)} AS VARCHAR))"
498
+
499
+ def aggregate_checksum(self, hash_expr: str) -> str:
500
+ return f"BIT_XOR(hash({hash_expr}))"
501
+
502
+ def render_order_item(
503
+ self,
504
+ order_item: BuildOrderItem,
505
+ cte: CTE | UnionCTE,
506
+ ) -> str:
507
+ if (
508
+ isinstance(order_item.expr, BuildConcept)
509
+ and order_item.expr.address in cte.output_columns
510
+ and self.ALIAS_ORDER_REFERENCING_ALLOWED
511
+ ):
512
+ if cte.source_map.get(order_item.expr.address, []):
513
+ # if it is sourced from somewhere, we need to reference the alias directly
514
+ return f"{self.render_expr(order_item.expr, cte=cte, )} {order_item.order.value}"
515
+ # otherwise we've derived it, safe to use alias
516
+ return f"{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
517
+ return (
518
+ f"{self.render_expr(order_item.expr, cte=cte, )} {order_item.order.value}"
519
+ )
520
+
521
+ def render_concept_sql(
522
+ self,
523
+ c: BuildConcept,
524
+ cte: CTE | UnionCTE,
525
+ alias: bool = True,
526
+ raise_invalid: bool = False,
527
+ ) -> str:
528
+ result = None
529
+ if c.pseudonyms:
530
+ candidates = [y for y in [cte.get_concept(x) for x in c.pseudonyms] if y]
531
+ logger.debug(
532
+ f"{LOGGER_PREFIX} [{c.address}] pseudonym candidates are {[x.address for x in candidates]}"
533
+ )
534
+ for candidate in [c] + candidates:
535
+ try:
536
+ logger.debug(
537
+ f"{LOGGER_PREFIX} [{c.address}] Attempting rendering w/ candidate {candidate.address}"
538
+ )
539
+ result = self._render_concept_sql(
540
+ candidate,
541
+ cte,
542
+ raise_invalid=True,
543
+ )
544
+ if result:
545
+ break
546
+ except ValueError:
547
+ continue
548
+ if not result:
549
+ result = self._render_concept_sql(
550
+ c,
551
+ cte,
552
+ raise_invalid=raise_invalid,
553
+ )
554
+ if alias:
555
+ return f"{result} as {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
556
+ return result
557
+
558
+ def _render_concept_sql(
559
+ self,
560
+ c: BuildConcept,
561
+ cte: CTE | UnionCTE,
562
+ raise_invalid: bool = False,
563
+ ) -> str:
564
+ # only recurse while it's in sources of the current cte
565
+ logger.debug(
566
+ f"{LOGGER_PREFIX} [{c.address}] Starting rendering loop on cte: {cte.name}"
567
+ )
568
+
569
+ # check if it's not inherited AND no pseudonyms are inherited
570
+ if c.lineage and cte.source_map.get(c.address, []) == []:
571
+ logger.debug(
572
+ f"{LOGGER_PREFIX} [{c.address}] rendering concept with lineage that is not already existing"
573
+ )
574
+ if isinstance(c.lineage, WINDOW_ITEMS):
575
+ rendered_order_components = [
576
+ f"{self.render_expr(x.expr, cte, raise_invalid=raise_invalid)} {x.order.value}"
577
+ for x in c.lineage.order_by
578
+ ]
579
+ rendered_over_components = [
580
+ self.render_concept_sql(
581
+ x, cte, alias=False, raise_invalid=raise_invalid
582
+ )
583
+ for x in c.lineage.over
584
+ ]
585
+
586
+ rval = self.WINDOW_FUNCTION_MAP[c.lineage.type](
587
+ concept=self.render_concept_sql(
588
+ c.lineage.content,
589
+ cte=cte,
590
+ alias=False,
591
+ raise_invalid=raise_invalid,
592
+ ),
593
+ window=",".join(rendered_over_components),
594
+ sort=",".join(rendered_order_components),
595
+ offset=c.lineage.index,
596
+ )
597
+ elif isinstance(c.lineage, FILTER_ITEMS):
598
+ # for cases when we've optimized this
599
+ if cte.condition == c.lineage.where.conditional:
600
+ rval = self.render_expr(
601
+ c.lineage.content, cte=cte, raise_invalid=raise_invalid
602
+ )
603
+ else:
604
+ rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_expr(c.lineage.content, cte=cte, raise_invalid=raise_invalid)} ELSE NULL END"
605
+ elif isinstance(c.lineage, BuildRowsetItem):
606
+ rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False, raise_invalid=raise_invalid)}"
607
+ elif isinstance(c.lineage, BuildMultiSelectLineage):
608
+ if c.address in c.lineage.calculated_derivations:
609
+ assert c.lineage.derive is not None
610
+ for x in c.lineage.derive.items:
611
+ if x.address == c.address:
612
+ rval = self.render_expr(
613
+ x.expr,
614
+ cte=cte,
615
+ raise_invalid=raise_invalid,
616
+ )
617
+ break
618
+ else:
619
+ rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False, raise_invalid=raise_invalid)}"
620
+ elif isinstance(c.lineage, BuildComparison):
621
+ rval = f"{self.render_expr(c.lineage.left, cte=cte, raise_invalid=raise_invalid)} {c.lineage.operator.value} {self.render_expr(c.lineage.right, cte=cte, raise_invalid=raise_invalid)}"
622
+ elif isinstance(c.lineage, AGGREGATE_ITEMS):
623
+ args = [
624
+ self.render_expr(v, cte) # , alias=False)
625
+ for v in c.lineage.function.arguments
626
+ ]
627
+ if cte.group_to_grain:
628
+ rval = self.FUNCTION_MAP[c.lineage.function.operator](args, [])
629
+ else:
630
+ logger.debug(
631
+ f"{LOGGER_PREFIX} [{c.address}] ignoring aggregate, already at"
632
+ " target grain"
633
+ )
634
+ rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.function.operator](args, [])}"
635
+ elif (
636
+ isinstance(c.lineage, FUNCTION_ITEMS)
637
+ and c.lineage.operator == FunctionType.UNION
638
+ ):
639
+ local_matched = [
640
+ x
641
+ for x in c.lineage.arguments
642
+ if isinstance(x, BuildConcept) and x.address in cte.output_columns
643
+ ]
644
+ # if we're sorting by the output of the union
645
+ if not local_matched:
646
+ rval = c.safe_address
647
+ else:
648
+ rval = self.render_expr(local_matched[0], cte)
649
+ elif (
650
+ isinstance(c.lineage, FUNCTION_ITEMS)
651
+ and c.lineage.operator == FunctionType.CONSTANT
652
+ and self.rendering.parameters is True
653
+ and c.datatype.data_type != DataType.MAP
654
+ ):
655
+ rval = f":{c.safe_address}"
656
+ else:
657
+ args = []
658
+ types = []
659
+ for arg in c.lineage.arguments:
660
+ if (
661
+ isinstance(arg, BuildConcept)
662
+ and arg.lineage
663
+ and isinstance(arg.lineage, FUNCTION_ITEMS)
664
+ and arg.lineage.operator
665
+ in (
666
+ FunctionType.ADD,
667
+ FunctionType.SUBTRACT,
668
+ FunctionType.DIVIDE,
669
+ FunctionType.MULTIPLY,
670
+ )
671
+ ):
672
+ args.append(
673
+ self.render_expr(
674
+ BuildParenthetical(content=arg),
675
+ cte=cte,
676
+ raise_invalid=raise_invalid,
677
+ )
678
+ )
679
+ else:
680
+ args.append(
681
+ self.render_expr(arg, cte=cte, raise_invalid=raise_invalid)
682
+ )
683
+ types.append(arg_to_datatype(arg))
684
+
685
+ if cte.group_to_grain:
686
+ rval = f"{self.FUNCTION_MAP[c.lineage.operator](args, types)}"
687
+ else:
688
+
689
+ rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.operator](args, types)}"
690
+ else:
691
+ logger.debug(
692
+ f"{LOGGER_PREFIX} [{c.address}] Rendering basic lookup from {cte.source_map.get(c.address,None)}"
693
+ )
694
+
695
+ raw_content = cte.get_alias(c)
696
+ parent = cte.source_map.get(c.address, None)
697
+ if parent:
698
+ self.used_map[parent[0]].add(c.address)
699
+ if isinstance(raw_content, RawColumnExpr):
700
+ rval = raw_content.text
701
+ elif isinstance(raw_content, FUNCTION_ITEMS):
702
+ rval = self.render_expr(
703
+ raw_content, cte=cte, raise_invalid=raise_invalid
704
+ )
705
+ else:
706
+ rval = safe_get_cte_value(
707
+ self.FUNCTION_MAP[FunctionType.COALESCE],
708
+ cte,
709
+ c,
710
+ self.QUOTE_CHARACTER,
711
+ self.render_expr,
712
+ self.used_map,
713
+ )
714
+ if not rval:
715
+ # unions won't have a specific source mapped; just use a generic column reference
716
+ # we shouldn't ever have an expression at this point, so will be safe
717
+ if isinstance(cte, UnionCTE):
718
+ rval = c.safe_address
719
+ else:
720
+ if raise_invalid:
721
+ raise ValueError(
722
+ f"Invalid reference string found in query: {rval}, this should never occur. Please report this issue."
723
+ )
724
+ rval = INVALID_REFERENCE_STRING(
725
+ f"Missing source reference to {c.address}"
726
+ )
727
+ return rval
728
+
729
+ def render_array_unnest(
730
+ self,
731
+ left,
732
+ right,
733
+ operator: ComparisonOperator,
734
+ cte: CTE | UnionCTE | None = None,
735
+ cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
736
+ raise_invalid: bool = False,
737
+ ):
738
+ return f"{self.render_expr(left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
739
+
740
+ def render_expr(
741
+ self,
742
+ e: Union[
743
+ BuildConcept,
744
+ BuildFunction,
745
+ BuildConditional,
746
+ BuildAggregateWrapper,
747
+ BuildComparison,
748
+ BuildCaseWhen,
749
+ BuildCaseElse,
750
+ BuildSubselectComparison,
751
+ BuildWindowItem,
752
+ BuildFilterItem,
753
+ BuildParenthetical,
754
+ BuildParamaterizedConceptReference,
755
+ BuildMultiSelectLineage,
756
+ BuildRowsetItem,
757
+ str,
758
+ int,
759
+ list,
760
+ bool,
761
+ float,
762
+ date,
763
+ datetime,
764
+ DataType,
765
+ TraitDataType,
766
+ MagicConstants,
767
+ MapWrapper[Any, Any],
768
+ MapType,
769
+ NumericType,
770
+ StructType,
771
+ ArrayType,
772
+ ListWrapper[Any],
773
+ TupleWrapper[Any],
774
+ DatePart,
775
+ ],
776
+ cte: Optional[CTE | UnionCTE] = None,
777
+ cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
778
+ raise_invalid: bool = False,
779
+ ) -> str:
780
+ if isinstance(e, SUBSELECT_COMPARISON_ITEMS):
781
+ right: Any = e.right
782
+ while isinstance(right, BuildParenthetical):
783
+ right = right.content
784
+ if isinstance(right, BuildConcept):
785
+ # we won't always have an existnce map
786
+ # so fall back to the normal map
787
+ lookup_cte = cte
788
+ if cte_map and not lookup_cte:
789
+ lookup_cte = cte_map.get(right.address)
790
+
791
+ assert lookup_cte, "Subselects must be rendered with a CTE in context"
792
+ if right.address not in lookup_cte.existence_source_map:
793
+ lookup = lookup_cte.source_map.get(
794
+ right.address,
795
+ [
796
+ INVALID_REFERENCE_STRING(
797
+ f"Missing source reference to {right.address}"
798
+ )
799
+ ],
800
+ )
801
+ else:
802
+ lookup = lookup_cte.existence_source_map[right.address]
803
+ if len(lookup) > 0:
804
+ target = lookup[0]
805
+ else:
806
+ target = INVALID_REFERENCE_STRING(
807
+ f"Missing source CTE for {right.address}"
808
+ )
809
+ assert cte, "CTE must be provided for inlined CTEs"
810
+ self.used_map[target].add(right.address)
811
+ if target in cte.inlined_ctes:
812
+ info = cte.inlined_ctes[target]
813
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} from {info.new_base} as {target} where {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} is not null)"
814
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} is not null)"
815
+ elif isinstance(right, BuildParamaterizedConceptReference):
816
+ if isinstance(right.concept.lineage, BuildFunction) and isinstance(
817
+ right.concept.lineage.arguments[0], ListWrapper
818
+ ):
819
+ return self.render_array_unnest(
820
+ e.left,
821
+ right,
822
+ e.operator,
823
+ cte=cte,
824
+ cte_map=cte_map,
825
+ raise_invalid=raise_invalid,
826
+ )
827
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
828
+ elif isinstance(
829
+ right,
830
+ (ListWrapper, TupleWrapper, BuildParenthetical),
831
+ ):
832
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
833
+
834
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} ({self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)})"
835
+ elif isinstance(e, COMPARISON_ITEMS):
836
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
837
+ elif isinstance(e, CONDITIONAL_ITEMS):
838
+ # conditions need to be nested in parentheses
839
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
840
+ elif isinstance(e, WINDOW_ITEMS):
841
+ rendered_order_components = [
842
+ f"{self.render_expr(x.expr, cte, cte_map=cte_map, raise_invalid=raise_invalid)} {x.order.value}"
843
+ for x in e.order_by
844
+ ]
845
+ rendered_over_components = [
846
+ self.render_expr(x, cte, cte_map=cte_map, raise_invalid=raise_invalid)
847
+ for x in e.over
848
+ ]
849
+ return f"{self.WINDOW_FUNCTION_MAP[e.type](concept = self.render_expr(e.content, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
850
+ elif isinstance(e, PARENTHETICAL_ITEMS):
851
+ # conditions need to be nested in parentheses
852
+ if isinstance(e.content, list):
853
+ return f"( {','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e.content])} )"
854
+ return f"( {self.render_expr(e.content, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} )"
855
+ elif isinstance(e, CASE_WHEN_ITEMS):
856
+ return f"WHEN {self.render_expr(e.comparison, cte=cte, cte_map=cte_map) } THEN {self.render_expr(e.expr, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) }"
857
+ elif isinstance(e, CASE_ELSE_ITEMS):
858
+ return f"ELSE {self.render_expr(e.expr, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) }"
859
+ elif isinstance(e, FUNCTION_ITEMS):
860
+ arguments = []
861
+ for arg in e.arguments:
862
+ if (
863
+ isinstance(arg, BuildConcept)
864
+ and arg.lineage
865
+ and isinstance(arg.lineage, FUNCTION_ITEMS)
866
+ and arg.lineage.operator
867
+ in (
868
+ FunctionType.ADD,
869
+ FunctionType.SUBTRACT,
870
+ FunctionType.DIVIDE,
871
+ FunctionType.MULTIPLY,
872
+ )
873
+ ):
874
+ arguments.append(
875
+ self.render_expr(
876
+ BuildParenthetical(content=arg),
877
+ cte=cte,
878
+ cte_map=cte_map,
879
+ raise_invalid=raise_invalid,
880
+ )
881
+ )
882
+ else:
883
+ arguments.append(
884
+ self.render_expr(
885
+ arg, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid
886
+ )
887
+ )
888
+
889
+ if cte and cte.group_to_grain:
890
+ return self.FUNCTION_MAP[e.operator](arguments, [])
891
+
892
+ return self.FUNCTION_GRAIN_MATCH_MAP[e.operator](arguments, [])
893
+ elif isinstance(e, AGGREGATE_ITEMS):
894
+ return self.render_expr(
895
+ e.function, cte, cte_map=cte_map, raise_invalid=raise_invalid
896
+ )
897
+ elif isinstance(e, FILTER_ITEMS):
898
+ return f"CASE WHEN {self.render_expr(e.where.conditional,cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} THEN {self.render_expr(e.content, cte, cte_map=cte_map, raise_invalid=raise_invalid)} ELSE NULL END"
899
+ elif isinstance(e, BuildConcept):
900
+ if (
901
+ isinstance(e.lineage, FUNCTION_ITEMS)
902
+ and e.lineage.operator == FunctionType.CONSTANT
903
+ and self.rendering.parameters is True
904
+ and e.datatype.data_type != DataType.MAP
905
+ ):
906
+ return f":{e.safe_address}"
907
+ if cte:
908
+ return self.render_concept_sql(
909
+ e,
910
+ cte,
911
+ alias=False,
912
+ raise_invalid=raise_invalid,
913
+ )
914
+ elif cte_map:
915
+ self.used_map[cte_map[e.address].name].add(e.address)
916
+ return f"{cte_map[e.address].name}.{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
917
+ return f"{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
918
+ elif isinstance(e, bool):
919
+ return f"{e}"
920
+ elif isinstance(e, str):
921
+ return f"'{e}'"
922
+ elif isinstance(e, (int, float)):
923
+ return str(e)
924
+ elif isinstance(e, TupleWrapper):
925
+ return f"({','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])})"
926
+ elif isinstance(e, MapWrapper):
927
+ return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}:{self.render_expr(v, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}' for k, v in e.items()])}}}"
928
+ elif isinstance(e, ListWrapper):
929
+ return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e], [])}"
930
+ elif isinstance(e, DataType):
931
+ return self.DATATYPE_MAP.get(e, e.value)
932
+ elif isinstance(e, DatePart):
933
+ return str(e.value)
934
+ elif isinstance(e, NumericType):
935
+ return f"{self.DATATYPE_MAP[DataType.NUMERIC]}({e.precision},{e.scale})"
936
+ elif isinstance(e, MagicConstants):
937
+ if e == MagicConstants.NULL:
938
+ return "null"
939
+ return str(e.value)
940
+ elif isinstance(e, date):
941
+ return self.FUNCTION_MAP[FunctionType.DATE_LITERAL](e, [])
942
+ elif isinstance(e, datetime):
943
+ return self.FUNCTION_MAP[FunctionType.DATETIME_LITERAL](e, [])
944
+ elif isinstance(e, TraitDataType):
945
+ return self.render_expr(e.type, cte=cte, cte_map=cte_map)
946
+ elif isinstance(e, ArgBinding):
947
+ return e.name
948
+ elif isinstance(e, Ordering):
949
+ return str(e.value)
950
+ elif isinstance(e, ArrayType):
951
+ return f"{self.COMPLEX_DATATYPE_MAP[DataType.ARRAY](self.render_expr(e.value_data_type, cte=cte, cte_map=cte_map))}"
952
+ elif isinstance(e, list):
953
+ return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e], [])}"
954
+ elif isinstance(e, BuildParamaterizedConceptReference):
955
+ if self.rendering.parameters:
956
+ if e.concept.namespace == DEFAULT_NAMESPACE:
957
+ return f":{e.concept.name}"
958
+ return f":{e.concept.address.replace('.', '_')}"
959
+ elif e.concept.lineage:
960
+ return self.render_expr(e.concept.lineage, cte=cte, cte_map=cte_map)
961
+ return f"{self.QUOTE_CHARACTER}{e.concept.address}{self.QUOTE_CHARACTER}"
962
+
963
+ else:
964
+ raise ValueError(f"Unable to render type {type(e)} {e}")
965
+
966
+ def render_cte_group_by(
967
+ self, cte: CTE | UnionCTE, select_columns
968
+ ) -> Optional[list[str]]:
969
+
970
+ if not cte.group_to_grain:
971
+ return None
972
+ base = set(
973
+ [self.render_concept_sql(c, cte, alias=False) for c in cte.group_concepts]
974
+ )
975
+ if self.GROUP_MODE == GroupMode.AUTO:
976
+ return sorted(list(base))
977
+
978
+ else:
979
+ # find the index of each column in the select columns
980
+ final = []
981
+ found = []
982
+ for idx, c in enumerate(select_columns):
983
+ pre_alias = c.split(" as ")[0]
984
+ if pre_alias in base:
985
+ final.append(str(idx + 1))
986
+ found.append(pre_alias)
987
+ if not all(c in found for c in base):
988
+ raise ValueError(
989
+ f"Group by columns {base} not found in select columns {select_columns}"
990
+ )
991
+ return final
992
+
993
+ def safe_quote(self, name: str) -> str:
994
+ return safe_quote(name, self.QUOTE_CHARACTER)
995
+
996
+ def quote(self, name: str) -> str:
997
+ return f"{self.QUOTE_CHARACTER}{name}{self.QUOTE_CHARACTER}"
998
+
999
+ def render_cte(self, cte: CTE | UnionCTE, auto_sort: bool = True) -> CompiledCTE:
1000
+ if isinstance(cte, UnionCTE):
1001
+ base_statement = f"\n{cte.operator}\n".join(
1002
+ [
1003
+ self.render_cte(child, auto_sort=False).statement
1004
+ for child in cte.internal_ctes
1005
+ ]
1006
+ )
1007
+ if cte.order_by:
1008
+
1009
+ ordering = [self.render_order_item(i, cte) for i in cte.order_by.items]
1010
+ base_statement += "\nORDER BY " + ",".join(ordering)
1011
+ return CompiledCTE(name=cte.name, statement=base_statement)
1012
+ elif isinstance(cte, RecursiveCTE):
1013
+ base_statement = "\nUNION ALL\n".join(
1014
+ [self.render_cte(child, False).statement for child in cte.internal_ctes]
1015
+ )
1016
+ return CompiledCTE(name=cte.name, statement=base_statement)
1017
+ if self.UNNEST_MODE in (
1018
+ UnnestMode.CROSS_APPLY,
1019
+ UnnestMode.CROSS_JOIN,
1020
+ UnnestMode.CROSS_JOIN_ALIAS,
1021
+ UnnestMode.SNOWFLAKE,
1022
+ ):
1023
+ # for a cross apply, derivation happens in the join
1024
+ # so we only use the alias to select
1025
+ select_columns = [
1026
+ self.render_concept_sql(c, cte)
1027
+ for c in cte.output_columns
1028
+ if c.address not in [y.address for y in cte.join_derived_concepts]
1029
+ and c.address not in cte.hidden_concepts
1030
+ ] + [
1031
+ f"{self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
1032
+ for c in cte.join_derived_concepts
1033
+ if c.address not in cte.hidden_concepts
1034
+ ]
1035
+ elif self.UNNEST_MODE in (UnnestMode.CROSS_JOIN_UNNEST, UnnestMode.PRESTO):
1036
+ select_columns = [
1037
+ self.render_concept_sql(c, cte)
1038
+ for c in cte.output_columns
1039
+ if c.address not in [y.address for y in cte.join_derived_concepts]
1040
+ and c.address not in cte.hidden_concepts
1041
+ ] + [
1042
+ f"{UNNEST_NAME} as {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
1043
+ for c in cte.join_derived_concepts
1044
+ if c.address not in cte.hidden_concepts
1045
+ ]
1046
+ else:
1047
+ # otherwse, assume we are unnesting directly in the select
1048
+ select_columns = [
1049
+ self.render_concept_sql(c, cte)
1050
+ for c in cte.output_columns
1051
+ if c.address not in cte.hidden_concepts
1052
+ ]
1053
+ if auto_sort:
1054
+ select_columns = sorted(select_columns, key=lambda x: x)
1055
+ source: str | None = cte.base_name
1056
+ if not cte.render_from_clause:
1057
+ if len(cte.joins) > 0:
1058
+ if cte.join_derived_concepts and self.UNNEST_MODE in (
1059
+ UnnestMode.CROSS_JOIN_ALIAS,
1060
+ # UnnestMode.CROSS_JOIN_UNNEST,
1061
+ UnnestMode.CROSS_JOIN,
1062
+ UnnestMode.CROSS_APPLY,
1063
+ ):
1064
+
1065
+ source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_expr, cte)}"
1066
+ elif cte.join_derived_concepts and self.UNNEST_MODE in (
1067
+ UnnestMode.CROSS_JOIN_UNNEST,
1068
+ ):
1069
+ source = f"{self.render_expr(cte.join_derived_concepts[0], cte)} as {self.QUOTE_CHARACTER}{UNNEST_NAME}{self.QUOTE_CHARACTER}"
1070
+ elif cte.join_derived_concepts and self.UNNEST_MODE in (
1071
+ UnnestMode.PRESTO,
1072
+ ):
1073
+ source = f"{self.render_expr(cte.join_derived_concepts[0], cte)} as t({self.QUOTE_CHARACTER}{UNNEST_NAME}{self.QUOTE_CHARACTER})"
1074
+ elif (
1075
+ cte.join_derived_concepts
1076
+ and self.UNNEST_MODE == UnnestMode.SNOWFLAKE
1077
+ ):
1078
+ source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_expr, cte)}"
1079
+ # direct - eg DUCK DB - can be directly selected inline
1080
+ elif (
1081
+ cte.join_derived_concepts and self.UNNEST_MODE == UnnestMode.DIRECT
1082
+ ):
1083
+ source = None
1084
+ else:
1085
+ raise SyntaxError("CTE has joins but no from clause")
1086
+ else:
1087
+ source = None
1088
+ else:
1089
+ addr = cte.source_address
1090
+ if isinstance(addr, Address):
1091
+ source = self.render_source(addr)
1092
+ elif cte.quote_address:
1093
+ source = safe_quote(addr, self.QUOTE_CHARACTER)
1094
+ else:
1095
+ source = addr
1096
+ if cte.base_name != cte.base_alias:
1097
+ source = f"{source} as {self.QUOTE_CHARACTER}{cte.base_alias}{self.QUOTE_CHARACTER}"
1098
+ if not cte.render_from_clause:
1099
+ final_joins = []
1100
+ else:
1101
+ final_joins = cte.joins or []
1102
+ where: BuildConditional | BuildParenthetical | BuildComparison | None = None
1103
+ having: BuildConditional | BuildParenthetical | BuildComparison | None = None
1104
+ materialized = {x for x, v in cte.source_map.items() if v}
1105
+ if cte.condition:
1106
+ if not cte.group_to_grain or is_scalar_condition(
1107
+ cte.condition, materialized=materialized
1108
+ ):
1109
+ where = cte.condition
1110
+
1111
+ else:
1112
+ components = decompose_condition(cte.condition)
1113
+ for x in components:
1114
+ if is_scalar_condition(x, materialized=materialized):
1115
+ where = where + x if where else x
1116
+ else:
1117
+ having = having + x if having else x
1118
+
1119
+ logger.info(f"{LOGGER_PREFIX} {len(final_joins)} joins for cte {cte.name}")
1120
+ return CompiledCTE(
1121
+ name=cte.name,
1122
+ statement=self.SQL_TEMPLATE.render(
1123
+ select_columns=select_columns,
1124
+ base=f"{source}" if source else None,
1125
+ grain=cte.grain,
1126
+ limit=cte.limit,
1127
+ comment=cte.comment if CONFIG.show_comments else None,
1128
+ # some joins may not need to be rendered
1129
+ joins=[
1130
+ j
1131
+ for j in [
1132
+ render_join(
1133
+ join,
1134
+ self.QUOTE_CHARACTER,
1135
+ self.render_expr,
1136
+ cte,
1137
+ use_map=self.used_map,
1138
+ unnest_mode=self.UNNEST_MODE,
1139
+ null_wrapper=self.NULL_WRAPPER,
1140
+ )
1141
+ for join in final_joins
1142
+ ]
1143
+ if j
1144
+ ],
1145
+ where=(self.render_expr(where, cte) if where else None),
1146
+ having=(self.render_expr(having, cte) if having else None),
1147
+ order_by=(
1148
+ [self.render_order_item(i, cte) for i in cte.order_by.items]
1149
+ if cte.order_by
1150
+ else None
1151
+ ),
1152
+ group_by=self.render_cte_group_by(cte, select_columns),
1153
+ ),
1154
+ )
1155
+
1156
+ def generate_ctes(
1157
+ self,
1158
+ query: ProcessedQuery,
1159
+ ) -> List[CompiledCTE]:
1160
+ return [self.render_cte(cte) for cte in query.ctes[:-1]] + [
1161
+ # last CTE needs to respect the user output order
1162
+ self.render_cte(sort_select_output(query.ctes[-1], query), auto_sort=False)
1163
+ ]
1164
+
1165
+ def create_show_output(
1166
+ self,
1167
+ environment: Environment,
1168
+ content: ShowCategory,
1169
+ ):
1170
+ if content == ShowCategory.CONCEPTS:
1171
+ output_columns = [
1172
+ environment.concepts[
1173
+ DEFAULT_CONCEPTS["concept_address"].address
1174
+ ].reference,
1175
+ environment.concepts[
1176
+ DEFAULT_CONCEPTS["concept_datatype"].address
1177
+ ].reference,
1178
+ environment.concepts[
1179
+ DEFAULT_CONCEPTS["concept_description"].address
1180
+ ].reference,
1181
+ ]
1182
+ output_values = [
1183
+ {
1184
+ DEFAULT_CONCEPTS["concept_address"].address: (
1185
+ concept.name
1186
+ if concept.namespace == DEFAULT_NAMESPACE
1187
+ else concept.address
1188
+ ),
1189
+ DEFAULT_CONCEPTS["concept_datatype"].address: str(concept.datatype),
1190
+ DEFAULT_CONCEPTS[
1191
+ "concept_description"
1192
+ ].address: concept.metadata.description
1193
+ or "",
1194
+ }
1195
+ for _, concept in environment.concepts.items()
1196
+ if not concept.is_internal
1197
+ ]
1198
+ else:
1199
+ raise NotImplementedError(f"Show category {content} not implemented")
1200
+ return ProcessedShowStatement(
1201
+ output_columns=output_columns,
1202
+ output_values=[ProcessedStaticValueOutput(values=output_values)],
1203
+ )
1204
+
1205
+ def generate_queries(
1206
+ self,
1207
+ environment: Environment,
1208
+ statements: Sequence[
1209
+ SelectStatement
1210
+ | MultiSelectStatement
1211
+ | PersistStatement
1212
+ | ShowStatement
1213
+ | ConceptDeclarationStatement
1214
+ | RowsetDerivationStatement
1215
+ | ImportStatement
1216
+ | RawSQLStatement
1217
+ | MergeStatementV2
1218
+ | CopyStatement
1219
+ | ValidateStatement
1220
+ | CreateStatement
1221
+ | PublishStatement
1222
+ | MockStatement
1223
+ ],
1224
+ hooks: Optional[List[BaseHook]] = None,
1225
+ ) -> List[PROCESSED_STATEMENT_TYPES]:
1226
+ output: List[PROCESSED_STATEMENT_TYPES] = []
1227
+ for statement in statements:
1228
+ if isinstance(statement, PersistStatement):
1229
+ if hooks:
1230
+ for hook in hooks:
1231
+ hook.process_persist_info(statement)
1232
+ persist = process_persist(environment, statement, hooks=hooks)
1233
+ output.append(persist)
1234
+ elif isinstance(statement, CopyStatement):
1235
+ if hooks:
1236
+ for hook in hooks:
1237
+ hook.process_select_info(statement.select)
1238
+ copy = process_copy(environment, statement, hooks=hooks)
1239
+ output.append(copy)
1240
+ elif isinstance(statement, SelectStatement):
1241
+ if hooks:
1242
+ for hook in hooks:
1243
+ hook.process_select_info(statement)
1244
+ output.append(process_query(environment, statement, hooks=hooks))
1245
+ elif isinstance(statement, MultiSelectStatement):
1246
+ if hooks:
1247
+ for hook in hooks:
1248
+ hook.process_multiselect_info(statement)
1249
+ output.append(process_query(environment, statement, hooks=hooks))
1250
+ elif isinstance(statement, RowsetDerivationStatement):
1251
+ if hooks:
1252
+ for hook in hooks:
1253
+ hook.process_rowset_info(statement)
1254
+ elif isinstance(statement, ShowStatement):
1255
+ # TODO - encapsulate this a little better
1256
+ if isinstance(statement.content, SelectStatement):
1257
+
1258
+ output.append(
1259
+ ProcessedShowStatement(
1260
+ output_columns=[
1261
+ environment.concepts[
1262
+ DEFAULT_CONCEPTS["query_text"].address
1263
+ ].reference
1264
+ ],
1265
+ output_values=[
1266
+ process_query(
1267
+ environment, statement.content, hooks=hooks
1268
+ )
1269
+ ],
1270
+ )
1271
+ )
1272
+ elif isinstance(statement.content, ShowCategory):
1273
+ output.append(
1274
+ self.create_show_output(environment, statement.content)
1275
+ )
1276
+ elif isinstance(statement.content, ValidateStatement):
1277
+ output.append(
1278
+ ProcessedShowStatement(
1279
+ output_columns=[
1280
+ environment.concepts[
1281
+ DEFAULT_CONCEPTS["label"].address
1282
+ ].reference,
1283
+ environment.concepts[
1284
+ DEFAULT_CONCEPTS["query_text"].address
1285
+ ].reference,
1286
+ environment.concepts[
1287
+ DEFAULT_CONCEPTS["expected"].address
1288
+ ].reference,
1289
+ ],
1290
+ output_values=[
1291
+ ProcessedValidateStatement(
1292
+ scope=statement.content.scope,
1293
+ targets=statement.content.targets,
1294
+ )
1295
+ ],
1296
+ )
1297
+ )
1298
+ else:
1299
+ raise NotImplementedError(type(statement.content))
1300
+ elif isinstance(statement, RawSQLStatement):
1301
+ output.append(ProcessedRawSQLStatement(text=statement.text))
1302
+ elif isinstance(statement, ValidateStatement):
1303
+ output.append(
1304
+ ProcessedValidateStatement(
1305
+ scope=statement.scope,
1306
+ targets=statement.targets,
1307
+ )
1308
+ )
1309
+ elif isinstance(statement, MockStatement):
1310
+ output.append(
1311
+ ProcessedMockStatement(
1312
+ scope=statement.scope,
1313
+ targets=statement.targets,
1314
+ )
1315
+ )
1316
+ elif isinstance(statement, CreateStatement):
1317
+ output.append(process_create_statement(statement, environment))
1318
+ elif isinstance(statement, PublishStatement):
1319
+ output.append(
1320
+ ProcessedPublishStatement(
1321
+ scope=statement.scope,
1322
+ targets=statement.targets,
1323
+ action=statement.action,
1324
+ )
1325
+ )
1326
+ elif isinstance(
1327
+ statement,
1328
+ (
1329
+ ConceptDeclarationStatement,
1330
+ MergeStatementV2,
1331
+ ImportStatement,
1332
+ RowsetDerivationStatement,
1333
+ Datasource,
1334
+ FunctionDeclaration,
1335
+ ),
1336
+ ):
1337
+ continue
1338
+ else:
1339
+ raise NotImplementedError(type(statement))
1340
+ return output
1341
+
1342
+ def generate_partitioned_insert(
1343
+ self,
1344
+ query: ProcessedQueryPersist,
1345
+ recursive: bool,
1346
+ compiled_ctes: list[CompiledCTE],
1347
+ ) -> str:
1348
+ return self.SQL_TEMPLATE.render(
1349
+ recursive=recursive,
1350
+ output=f"INSERT OVERWRITE {self.safe_quote(query.output_to.address.location)}",
1351
+ full_select=compiled_ctes[-1].statement,
1352
+ ctes=compiled_ctes[:-1],
1353
+ )
1354
+
1355
+ def compile_create_table_statement(
1356
+ self, target: CreateTableInfo, create_mode: CreateMode
1357
+ ) -> str:
1358
+ type_map = {}
1359
+ for c in target.columns:
1360
+ type_map[c.name] = self.render_expr(c.type)
1361
+ return self.CREATE_TABLE_SQL_TEMPLATE.render(
1362
+ create_mode=create_mode.value,
1363
+ name=self.safe_quote(target.name),
1364
+ columns=target.columns,
1365
+ type_map=type_map,
1366
+ partition_keys=target.partition_keys,
1367
+ )
1368
+
1369
+ def compile_statement(
1370
+ self,
1371
+ query: PROCESSED_STATEMENT_TYPES,
1372
+ ) -> str:
1373
+ if isinstance(query, ProcessedShowStatement):
1374
+ return ";\n".join(
1375
+ [
1376
+ f"{self.EXPLAIN_KEYWORD} {self.compile_statement(x)}"
1377
+ for x in query.output_values
1378
+ if isinstance(x, (ProcessedQuery, ProcessedCopyStatement))
1379
+ ]
1380
+ )
1381
+ elif isinstance(query, ProcessedRawSQLStatement):
1382
+ return query.text
1383
+
1384
+ elif isinstance(query, ProcessedValidateStatement):
1385
+ return "--Trilogy validate statements do not have a generic SQL representation;\nselect 1;"
1386
+ elif isinstance(query, ProcessedMockStatement):
1387
+ return "--Trilogy mock statements do not have a generic SQL representation;\nselect 1;"
1388
+ elif isinstance(query, ProcessedPublishStatement):
1389
+ return "--Trilogy publish statements do not have a generic SQL representation;\nselect 1;"
1390
+ elif isinstance(query, ProcessedCreateStatement):
1391
+
1392
+ text = []
1393
+ for target in query.targets:
1394
+ text.append(
1395
+ self.compile_create_table_statement(target, query.create_mode)
1396
+ )
1397
+ return "\n".join(text)
1398
+
1399
+ recursive = any(isinstance(x, RecursiveCTE) for x in query.ctes)
1400
+
1401
+ compiled_ctes = self.generate_ctes(query)
1402
+ output = None
1403
+ if isinstance(query, ProcessedQueryPersist):
1404
+ if query.persist_mode == PersistMode.OVERWRITE:
1405
+ create_table_info = datasource_to_create_table_info(query.datasource)
1406
+ output = f"{self.compile_create_table_statement(create_table_info, CreateMode.CREATE_OR_REPLACE)} INSERT INTO {self.safe_quote(query.output_to.address.location)} "
1407
+ elif query.persist_mode == PersistMode.APPEND:
1408
+ if query.partition_by:
1409
+ return self.generate_partitioned_insert(
1410
+ query, recursive, compiled_ctes
1411
+ )
1412
+ else:
1413
+ output = f"INSERT INTO {self.safe_quote(query.output_to.address.location)} "
1414
+ else:
1415
+ raise NotImplementedError(
1416
+ f"Persist mode {query.persist_mode} not implemented"
1417
+ )
1418
+
1419
+ final = self.SQL_TEMPLATE.render(
1420
+ recursive=recursive,
1421
+ output=output,
1422
+ full_select=compiled_ctes[-1].statement,
1423
+ ctes=compiled_ctes[:-1],
1424
+ )
1425
+
1426
+ if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
1427
+ raise ValueError(
1428
+ f"Invalid reference string found in query: {final}, this should never"
1429
+ " occur. Please create an issue to report this."
1430
+ )
1431
+ logger.info(f"{LOGGER_PREFIX} Compiled query: {final}")
1432
+ return final