pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.148.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.148.dist-info/RECORD +206 -0
  6. pytrilogy-0.3.148.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2662 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +434 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +786 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1431 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +376 -0
  117. trilogy/dialect/enums.py +149 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/file_state_store.py +0 -0
  130. trilogy/execution/state/sqllite_state_store.py +0 -0
  131. trilogy/execution/state/state_store.py +301 -0
  132. trilogy/executor.py +656 -0
  133. trilogy/hooks/__init__.py +4 -0
  134. trilogy/hooks/base_hook.py +40 -0
  135. trilogy/hooks/graph_hook.py +135 -0
  136. trilogy/hooks/query_debugger.py +166 -0
  137. trilogy/metadata/__init__.py +0 -0
  138. trilogy/parser.py +10 -0
  139. trilogy/parsing/README.md +21 -0
  140. trilogy/parsing/__init__.py +0 -0
  141. trilogy/parsing/common.py +1069 -0
  142. trilogy/parsing/config.py +5 -0
  143. trilogy/parsing/exceptions.py +8 -0
  144. trilogy/parsing/helpers.py +1 -0
  145. trilogy/parsing/parse_engine.py +2863 -0
  146. trilogy/parsing/render.py +773 -0
  147. trilogy/parsing/trilogy.lark +544 -0
  148. trilogy/py.typed +0 -0
  149. trilogy/render.py +45 -0
  150. trilogy/scripts/README.md +9 -0
  151. trilogy/scripts/__init__.py +0 -0
  152. trilogy/scripts/agent.py +41 -0
  153. trilogy/scripts/agent_info.py +306 -0
  154. trilogy/scripts/common.py +430 -0
  155. trilogy/scripts/dependency/Cargo.lock +617 -0
  156. trilogy/scripts/dependency/Cargo.toml +39 -0
  157. trilogy/scripts/dependency/README.md +131 -0
  158. trilogy/scripts/dependency/build.sh +25 -0
  159. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  160. trilogy/scripts/dependency/src/lib.rs +16 -0
  161. trilogy/scripts/dependency/src/main.rs +770 -0
  162. trilogy/scripts/dependency/src/parser.rs +435 -0
  163. trilogy/scripts/dependency/src/preql.pest +208 -0
  164. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  165. trilogy/scripts/dependency/src/resolver.rs +716 -0
  166. trilogy/scripts/dependency/tests/base.preql +3 -0
  167. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  168. trilogy/scripts/dependency/tests/customer.preql +6 -0
  169. trilogy/scripts/dependency/tests/main.preql +9 -0
  170. trilogy/scripts/dependency/tests/orders.preql +7 -0
  171. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  172. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  173. trilogy/scripts/dependency.py +323 -0
  174. trilogy/scripts/display.py +555 -0
  175. trilogy/scripts/environment.py +59 -0
  176. trilogy/scripts/fmt.py +32 -0
  177. trilogy/scripts/ingest.py +472 -0
  178. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  179. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  180. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  181. trilogy/scripts/ingest_helpers/typing.py +161 -0
  182. trilogy/scripts/init.py +105 -0
  183. trilogy/scripts/parallel_execution.py +748 -0
  184. trilogy/scripts/plan.py +189 -0
  185. trilogy/scripts/refresh.py +106 -0
  186. trilogy/scripts/run.py +79 -0
  187. trilogy/scripts/serve.py +202 -0
  188. trilogy/scripts/serve_helpers/__init__.py +41 -0
  189. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  190. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  191. trilogy/scripts/serve_helpers/models.py +38 -0
  192. trilogy/scripts/single_execution.py +131 -0
  193. trilogy/scripts/testing.py +129 -0
  194. trilogy/scripts/trilogy.py +75 -0
  195. trilogy/std/__init__.py +0 -0
  196. trilogy/std/color.preql +3 -0
  197. trilogy/std/date.preql +13 -0
  198. trilogy/std/display.preql +18 -0
  199. trilogy/std/geography.preql +22 -0
  200. trilogy/std/metric.preql +15 -0
  201. trilogy/std/money.preql +67 -0
  202. trilogy/std/net.preql +14 -0
  203. trilogy/std/ranking.preql +7 -0
  204. trilogy/std/report.preql +5 -0
  205. trilogy/std/semantic.preql +6 -0
  206. trilogy/utility.py +34 -0
@@ -0,0 +1,1431 @@
1
+ from collections import defaultdict
2
+ from datetime import date, datetime
3
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Union
4
+
5
+ if TYPE_CHECKING:
6
+ from trilogy.dialect.config import DialectConfig
7
+
8
+ from jinja2 import Template
9
+
10
+ from trilogy.constants import (
11
+ CONFIG,
12
+ DEFAULT_NAMESPACE,
13
+ MagicConstants,
14
+ Rendering,
15
+ logger,
16
+ )
17
+ from trilogy.core.constants import UNNEST_NAME
18
+ from trilogy.core.enums import (
19
+ AddressType,
20
+ ComparisonOperator,
21
+ CreateMode,
22
+ DatePart,
23
+ FunctionType,
24
+ GroupMode,
25
+ Modifier,
26
+ Ordering,
27
+ PersistMode,
28
+ ShowCategory,
29
+ UnnestMode,
30
+ WindowType,
31
+ )
32
+ from trilogy.core.internal import DEFAULT_CONCEPTS
33
+ from trilogy.core.models.author import ArgBinding, arg_to_datatype
34
+ from trilogy.core.models.build import (
35
+ BuildAggregateWrapper,
36
+ BuildCaseElse,
37
+ BuildCaseWhen,
38
+ BuildComparison,
39
+ BuildConcept,
40
+ BuildConditional,
41
+ BuildFilterItem,
42
+ BuildFunction,
43
+ BuildMultiSelectLineage,
44
+ BuildOrderItem,
45
+ BuildParamaterizedConceptReference,
46
+ BuildParenthetical,
47
+ BuildRowsetItem,
48
+ BuildSubselectComparison,
49
+ BuildWindowItem,
50
+ )
51
+ from trilogy.core.models.core import (
52
+ ArrayType,
53
+ DataType,
54
+ ListWrapper,
55
+ MapType,
56
+ MapWrapper,
57
+ NumericType,
58
+ StructType,
59
+ TraitDataType,
60
+ TupleWrapper,
61
+ )
62
+ from trilogy.core.models.datasource import Address, Datasource, RawColumnExpr
63
+ from trilogy.core.models.environment import Environment
64
+ from trilogy.core.models.execute import CTE, CompiledCTE, RecursiveCTE, UnionCTE
65
+ from trilogy.core.processing.utility import (
66
+ decompose_condition,
67
+ is_scalar_condition,
68
+ sort_select_output,
69
+ )
70
+ from trilogy.core.query_processor import process_copy, process_persist, process_query
71
+ from trilogy.core.statements.author import (
72
+ ConceptDeclarationStatement,
73
+ CopyStatement,
74
+ CreateStatement,
75
+ FunctionDeclaration,
76
+ ImportStatement,
77
+ MergeStatementV2,
78
+ MockStatement,
79
+ MultiSelectStatement,
80
+ PersistStatement,
81
+ PublishStatement,
82
+ RawSQLStatement,
83
+ RowsetDerivationStatement,
84
+ SelectStatement,
85
+ ShowStatement,
86
+ ValidateStatement,
87
+ )
88
+ from trilogy.core.statements.execute import (
89
+ PROCESSED_STATEMENT_TYPES,
90
+ ProcessedCopyStatement,
91
+ ProcessedCreateStatement,
92
+ ProcessedMockStatement,
93
+ ProcessedPublishStatement,
94
+ ProcessedQuery,
95
+ ProcessedQueryPersist,
96
+ ProcessedRawSQLStatement,
97
+ ProcessedShowStatement,
98
+ ProcessedStaticValueOutput,
99
+ ProcessedValidateStatement,
100
+ )
101
+ from trilogy.core.table_processor import (
102
+ CreateTableInfo,
103
+ datasource_to_create_table_info,
104
+ process_create_statement,
105
+ )
106
+ from trilogy.core.utility import safe_quote
107
+ from trilogy.dialect.common import render_join, render_unnest
108
+ from trilogy.hooks.base_hook import BaseHook
109
+
110
+
111
+ def null_wrapper(lval: str, rval: str, modifiers: list[Modifier]) -> str:
112
+
113
+ if Modifier.NULLABLE in modifiers:
114
+ return f"({lval} = {rval} or ({lval} is null and {rval} is null))"
115
+ return f"{lval} = {rval}"
116
+
117
+
118
+ LOGGER_PREFIX = "[RENDERING]"
119
+
120
+ WINDOW_ITEMS = (BuildWindowItem,)
121
+ FILTER_ITEMS = (BuildFilterItem,)
122
+ AGGREGATE_ITEMS = (BuildAggregateWrapper,)
123
+ FUNCTION_ITEMS = (BuildFunction,)
124
+ PARENTHETICAL_ITEMS = (BuildParenthetical,)
125
+ CASE_WHEN_ITEMS = (BuildCaseWhen,)
126
+ CASE_ELSE_ITEMS = (BuildCaseElse,)
127
+ SUBSELECT_COMPARISON_ITEMS = (BuildSubselectComparison,)
128
+ COMPARISON_ITEMS = (BuildComparison,)
129
+ CONDITIONAL_ITEMS = (BuildConditional,)
130
+
131
+
132
+ def INVALID_REFERENCE_STRING(x: Any, callsite: str = ""):
133
+ # if CONFIG.validate_missing:
134
+ # raise SyntaxError(f"INVALID_REFERENCE_BUG_{callsite}<{x}>")
135
+
136
+ return f"INVALID_REFERENCE_BUG_{callsite}<{x}>"
137
+
138
+
139
+ def window_factory(string: str, include_concept: bool = False) -> Callable:
140
+ def render_window(
141
+ concept: str, window: str, sort: str, offset: int | None = None
142
+ ) -> str:
143
+ if not include_concept:
144
+ concept = ""
145
+ if offset is not None:
146
+ base = f"{string}({concept}, {offset})"
147
+ else:
148
+ base = f"{string}({concept})"
149
+ if window and sort:
150
+ return f"{base} over (partition by {window} order by {sort} )"
151
+ elif window:
152
+ return f"{base} over (partition by {window})"
153
+ elif sort:
154
+ return f"{base} over (order by {sort} )"
155
+ else:
156
+ return f"{base} over ()"
157
+
158
+ return render_window
159
+
160
+
161
+ WINDOW_FUNCTION_MAP = {
162
+ WindowType.LAG: window_factory("lag", include_concept=True),
163
+ WindowType.LEAD: window_factory("lead", include_concept=True),
164
+ WindowType.RANK: window_factory("rank"),
165
+ WindowType.ROW_NUMBER: window_factory("row_number"),
166
+ WindowType.SUM: window_factory("sum", include_concept=True),
167
+ WindowType.COUNT: window_factory("count", include_concept=True),
168
+ WindowType.AVG: window_factory("avg", include_concept=True),
169
+ }
170
+
171
+ DATATYPE_MAP: dict[DataType, str] = {
172
+ DataType.STRING: "string",
173
+ DataType.INTEGER: "int",
174
+ DataType.FLOAT: "float",
175
+ DataType.BOOL: "bool",
176
+ DataType.NUMERIC: "numeric",
177
+ DataType.MAP: "map",
178
+ DataType.DATE: "date",
179
+ DataType.DATETIME: "datetime",
180
+ DataType.ARRAY: "list",
181
+ }
182
+
183
+ COMPLEX_DATATYPE_MAP = {
184
+ DataType.ARRAY: lambda x: f"{x}[]",
185
+ }
186
+
187
+
188
+ def render_case(args):
189
+ return "CASE\n\t" + "\n\t".join(args) + "\n\tEND"
190
+
191
+
192
+ def struct_arg(args):
193
+ return [f"{x[1]}: {x[0]}" for x in zip(args[::2], args[1::2])]
194
+
195
+
196
+ def hash_from_args(val, hash_type):
197
+ hash_type = hash_type[1:-1]
198
+ if hash_type.lower() == "md5":
199
+ return f"md5({val})"
200
+ elif hash_type.lower() == "sha1":
201
+ return f"sha1({val})"
202
+ elif hash_type.lower() == "sha256":
203
+ return f"sha256({val})"
204
+ elif hash_type.lower() == "sha512":
205
+ return f"sha512({val})"
206
+ else:
207
+ raise ValueError(f"Unsupported hash type: {hash_type}")
208
+
209
+
210
+ FUNCTION_MAP = {
211
+ # generic types
212
+ FunctionType.ALIAS: lambda x, types: f"{x[0]}",
213
+ FunctionType.GROUP: lambda x, types: f"{x[0]}",
214
+ FunctionType.CONSTANT: lambda x, types: f"{x[0]}",
215
+ FunctionType.TYPED_CONSTANT: lambda x, types: f"{x[0]}",
216
+ FunctionType.COALESCE: lambda x, types: f"coalesce({','.join(x)})",
217
+ FunctionType.NULLIF: lambda x, types: f"nullif({x[0]},{x[1]})",
218
+ FunctionType.CAST: lambda x, types: f"cast({x[0]} as {x[1]})",
219
+ FunctionType.CASE: lambda x, types: render_case(x),
220
+ FunctionType.SPLIT: lambda x, types: f"split({x[0]}, {x[1]})",
221
+ FunctionType.IS_NULL: lambda x, types: f"{x[0]} is null",
222
+ FunctionType.BOOL: lambda x, types: f"CASE WHEN {x[0]} THEN TRUE ELSE FALSE END",
223
+ FunctionType.PARENTHETICAL: lambda x, types: f"({x[0]})",
224
+ # Complex
225
+ FunctionType.INDEX_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
226
+ FunctionType.MAP_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
227
+ FunctionType.UNNEST: lambda x, types: f"unnest({x[0]})",
228
+ FunctionType.DATE_SPINE: lambda x, types: f"""unnest(
229
+ generate_series(
230
+ {x[0]},
231
+ {x[1]},
232
+ INTERVAL '1 day'
233
+ )
234
+ )""",
235
+ FunctionType.RECURSE_EDGE: lambda x, types: f"CASE WHEN {x[1]} IS NULL THEN {x[0]} ELSE {x[1]} END",
236
+ FunctionType.ATTR_ACCESS: lambda x, types: f"""{x[0]}.{x[1].replace("'", "")}""",
237
+ FunctionType.STRUCT: lambda x, types: f"{{{', '.join(struct_arg(x))}}}",
238
+ FunctionType.ARRAY: lambda x, types: f"[{', '.join(x)}]",
239
+ FunctionType.DATE_LITERAL: lambda x, types: f"date '{x}'",
240
+ FunctionType.DATETIME_LITERAL: lambda x, types: f"datetime '{x}'",
241
+ # MAP
242
+ FunctionType.MAP_KEYS: lambda x, types: f"map_keys({x[0]})",
243
+ FunctionType.MAP_VALUES: lambda x, types: f"map_values({x[0]})",
244
+ # ARRAY
245
+ FunctionType.GENERATE_ARRAY: lambda x, types: f"generate_series({x[0]}, {x[1]}, {x[2]})",
246
+ FunctionType.ARRAY_SUM: lambda x, types: f"array_sum({x[0]})",
247
+ FunctionType.ARRAY_DISTINCT: lambda x, types: f"array_distinct({x[0]})",
248
+ FunctionType.ARRAY_SORT: lambda x, types: f"array_sort({x[0]})",
249
+ FunctionType.ARRAY_TRANSFORM: lambda args, types: (
250
+ f"array_transform({args[0]}, {args[1]} -> {args[2]})"
251
+ ),
252
+ FunctionType.ARRAY_TO_STRING: lambda args, types: (
253
+ f"array_to_string({args[0]}, {args[1]})"
254
+ ),
255
+ FunctionType.ARRAY_FILTER: lambda args, types: (
256
+ f"array_filter({args[0]}, {args[1]} -> {args[2]})"
257
+ ),
258
+ # math
259
+ FunctionType.ADD: lambda x, types: " + ".join(x),
260
+ FunctionType.ABS: lambda x, types: f"abs({x[0]})",
261
+ FunctionType.SUBTRACT: lambda x, types: " - ".join(x),
262
+ FunctionType.DIVIDE: lambda x, types: " / ".join(x),
263
+ FunctionType.MULTIPLY: lambda x, types: " * ".join(x),
264
+ FunctionType.ROUND: lambda x, types: f"round({x[0]},{x[1]})",
265
+ FunctionType.FLOOR: lambda x, types: f"floor({x[0]})",
266
+ FunctionType.CEIL: lambda x, types: f"ceil({x[0]})",
267
+ FunctionType.MOD: lambda x, types: f"{x[0]} % {x[1]}",
268
+ FunctionType.POWER: lambda x, types: f"{x[0]} ** {x[1]}",
269
+ FunctionType.SQRT: lambda x, types: f"sqrt({x[0]})",
270
+ FunctionType.RANDOM: lambda x, types: "random()",
271
+ FunctionType.LOG: lambda x, types: (
272
+ f"log({x[0]})" if x[1] == 10 else f"log({x[0]}, {x[1]})"
273
+ ),
274
+ # aggregate types
275
+ FunctionType.COUNT_DISTINCT: lambda x, types: f"count(distinct {x[0]})",
276
+ FunctionType.COUNT: lambda x, types: f"count({x[0]})",
277
+ FunctionType.SUM: lambda x, types: f"sum({x[0]})",
278
+ FunctionType.ARRAY_AGG: lambda x, types: f"array_agg({x[0]})",
279
+ FunctionType.LENGTH: lambda x, types: f"length({x[0]})",
280
+ FunctionType.AVG: lambda x, types: f"avg({x[0]})",
281
+ FunctionType.MAX: lambda x, types: f"max({x[0]})",
282
+ FunctionType.MIN: lambda x, types: f"min({x[0]})",
283
+ FunctionType.ANY: lambda x, types: f"any_value({x[0]})",
284
+ FunctionType.BOOL_OR: lambda x, types: f"bool_or({x[0]})",
285
+ FunctionType.BOOL_AND: lambda x, types: f"bool_and({x[0]})",
286
+ # string types
287
+ FunctionType.LIKE: lambda x, types: f" {x[0]} like {x[1]} ",
288
+ FunctionType.UPPER: lambda x, types: f"UPPER({x[0]}) ",
289
+ FunctionType.LOWER: lambda x, types: f"LOWER({x[0]}) ",
290
+ FunctionType.SUBSTRING: lambda x, types: f"SUBSTRING({x[0]},{x[1]},{x[2]})",
291
+ FunctionType.STRPOS: lambda x, types: f"STRPOS({x[0]},{x[1]})",
292
+ FunctionType.CONTAINS: lambda x, types: f"CONTAINS({x[0]},{x[1]})",
293
+ FunctionType.REGEXP_CONTAINS: lambda x, types: f"REGEXP_CONTAINS({x[0]},{x[1]})",
294
+ FunctionType.REGEXP_EXTRACT: lambda x, types: f"REGEXP_EXTRACT({x[0]},{x[1]})",
295
+ FunctionType.REGEXP_REPLACE: lambda x, types: f"REGEXP_REPLACE({x[0]},{x[1]}, {x[2]})",
296
+ FunctionType.TRIM: lambda x, types: f"TRIM({x[0]})",
297
+ FunctionType.REPLACE: lambda x, types: f"REPLACE({x[0]},{x[1]},{x[2]})",
298
+ FunctionType.HASH: lambda x, types: hash_from_args(x[0], x[1]),
299
+ # FunctionType.NOT_LIKE: lambda x: f" CASE WHEN {x[0]} like {x[1]} THEN 0 ELSE 1 END",
300
+ # date types
301
+ FunctionType.DATE_TRUNCATE: lambda x, types: f"date_trunc({x[0]},{x[1]})",
302
+ FunctionType.DATE_PART: lambda x, types: f"date_part({x[0]},{x[1]})",
303
+ FunctionType.DATE_ADD: lambda x, types: f"date_add({x[0]},{x[1]}, {x[2]})",
304
+ FunctionType.DATE_SUB: lambda x, types: f"date_sub({x[0]},{x[1]}, {x[2]})",
305
+ FunctionType.DATE_DIFF: lambda x, types: f"date_diff({x[0]},{x[1]}, {x[2]})",
306
+ FunctionType.DATE: lambda x, types: f"date({x[0]})",
307
+ FunctionType.DATETIME: lambda x, types: f"datetime({x[0]})",
308
+ FunctionType.TIMESTAMP: lambda x, types: f"timestamp({x[0]})",
309
+ FunctionType.SECOND: lambda x, types: f"second({x[0]})",
310
+ FunctionType.MINUTE: lambda x, types: f"minute({x[0]})",
311
+ FunctionType.HOUR: lambda x, types: f"hour({x[0]})",
312
+ FunctionType.DAY: lambda x, types: f"day({x[0]})",
313
+ FunctionType.DAY_NAME: lambda x, types: f"dayname({x[0]})",
314
+ FunctionType.DAY_OF_WEEK: lambda x, types: f"day_of_week({x[0]})",
315
+ FunctionType.WEEK: lambda x, types: f"week({x[0]})",
316
+ FunctionType.MONTH: lambda x, types: f"month({x[0]})",
317
+ FunctionType.MONTH_NAME: lambda x, types: f"monthname({x[0]})",
318
+ FunctionType.QUARTER: lambda x, types: f"quarter({x[0]})",
319
+ FunctionType.YEAR: lambda x, types: f"year({x[0]})",
320
+ # string types
321
+ FunctionType.CONCAT: lambda x, types: f"concat({','.join(x)})",
322
+ # constant types
323
+ FunctionType.CURRENT_DATE: lambda x, types: "current_date()",
324
+ FunctionType.CURRENT_DATETIME: lambda x, types: "current_datetime()",
325
+ }
326
+
327
+ FUNCTION_GRAIN_MATCH_MAP = {
328
+ **FUNCTION_MAP,
329
+ FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
330
+ FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
331
+ FunctionType.SUM: lambda args, types: f"{args[0]}",
332
+ FunctionType.AVG: lambda args, types: f"{args[0]}",
333
+ FunctionType.MAX: lambda args, types: f"{args[0]}",
334
+ FunctionType.MIN: lambda args, types: f"{args[0]}",
335
+ FunctionType.ANY: lambda args, types: f"{args[0]}",
336
+ }
337
+
338
+
339
+ GENERIC_SQL_TEMPLATE: Template = Template(
340
+ """{%- if ctes %}
341
+ WITH {% if recursive%} RECURSIVE {% endif %}{% for cte in ctes %}
342
+ {{cte.name}} as (
343
+ {{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
344
+ {%- if full_select -%}
345
+ {{full_select}}
346
+ {% else -%}
347
+ SELECT
348
+ {%- if limit is not none %}
349
+ TOP {{ limit }}{% endif %}
350
+ {%- for select in select_columns %}
351
+ \t{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
352
+ {% if base %}FROM
353
+ \t{{ base }}{% endif %}{% if joins %}{% for join in joins %}
354
+ \t{{ join }}{% endfor %}{% endif %}{% if where %}
355
+ WHERE
356
+ \t{{ where }}{% endif %}{%- if group_by %}
357
+ GROUP BY {% for group in group_by %}
358
+ \t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
359
+ HAVING
360
+ \t{{ having }}{% endif %}{%- if order_by %}
361
+ ORDER BY{% for order in order_by %}
362
+ \t{{ order }}{% if not loop.last %},{% endif %}{% endfor %}
363
+ {% endif %}{% endif %}
364
+ """
365
+ )
366
+
367
+
368
+ CREATE_TABLE_SQL_TEMPLATE = Template(
369
+ """
370
+ CREATE {% if create_mode == "create_or_replace" %}OR REPLACE TABLE{% elif create_mode == "create_if_not_exists" %}TABLE IF NOT EXISTS{% else %}TABLE{% endif %} {{ name }} (
371
+ {%- for column in columns %}
372
+ {{ column.name }} {{ type_map[column.name] }}{% if column.comment %} COMMENT '{{ column.comment }}'{% endif %}{% if not loop.last %},{% endif %}
373
+ {%- endfor %}
374
+ )
375
+ {%- if partition_keys %}
376
+ PARTITIONED BY (
377
+ {%- for partition_key in partition_keys %}
378
+ {{ partition_key }}{% if not loop.last %},{% endif %}
379
+ {%- endfor %}
380
+ )
381
+ {%- endif %};
382
+ """.strip()
383
+ )
384
+
385
+
386
+ def safe_get_cte_value(
387
+ coalesce: Callable,
388
+ cte: CTE | UnionCTE,
389
+ c: BuildConcept,
390
+ quote_char: str,
391
+ render_expr: Callable,
392
+ use_map: dict[str, set[str]],
393
+ ) -> Optional[str]:
394
+ address = c.address
395
+ raw = cte.source_map.get(address, None)
396
+
397
+ if not raw:
398
+ return None
399
+ if isinstance(raw, str):
400
+ rendered = cte.get_alias(c, raw)
401
+ use_map[raw].add(c.address)
402
+ return f"{quote_char}{raw}{quote_char}.{safe_quote(rendered, quote_char)}"
403
+ if isinstance(raw, list) and len(raw) == 1:
404
+ rendered = cte.get_alias(c, raw[0])
405
+ if isinstance(rendered, FUNCTION_ITEMS):
406
+ # if it's a function, we need to render it as a function
407
+ return f"{render_expr(rendered, cte=cte, raise_invalid=True)}"
408
+ use_map[raw[0]].add(c.address)
409
+ return f"{quote_char}{raw[0]}{quote_char}.{safe_quote(rendered, quote_char)}"
410
+ for x in raw:
411
+ use_map[x].add(c.address)
412
+ return coalesce(
413
+ sorted(
414
+ [
415
+ f"{quote_char}{x}{quote_char}.{safe_quote(cte.get_alias(c, x), quote_char)}"
416
+ for x in raw
417
+ ]
418
+ ),
419
+ [],
420
+ )
421
+
422
+
423
+ class BaseDialect:
424
+ WINDOW_FUNCTION_MAP = WINDOW_FUNCTION_MAP
425
+ FUNCTION_MAP = FUNCTION_MAP
426
+ FUNCTION_GRAIN_MATCH_MAP = FUNCTION_GRAIN_MATCH_MAP
427
+ QUOTE_CHARACTER = "`"
428
+ SQL_TEMPLATE = GENERIC_SQL_TEMPLATE
429
+ CREATE_TABLE_SQL_TEMPLATE = CREATE_TABLE_SQL_TEMPLATE
430
+ DATATYPE_MAP = DATATYPE_MAP
431
+ COMPLEX_DATATYPE_MAP = COMPLEX_DATATYPE_MAP
432
+ UNNEST_MODE = UnnestMode.CROSS_APPLY
433
+ GROUP_MODE = GroupMode.AUTO
434
+ EXPLAIN_KEYWORD = "EXPLAIN"
435
+ NULL_WRAPPER = staticmethod(null_wrapper)
436
+ ALIAS_ORDER_REFERENCING_ALLOWED = True
437
+ TABLE_NOT_FOUND_PATTERN: str | None = None # Dialect-specific pattern to match
438
+
439
+ def __init__(
440
+ self,
441
+ rendering: Rendering | None = None,
442
+ config: "DialectConfig | None" = None,
443
+ ):
444
+ self.rendering = rendering or CONFIG.rendering
445
+ self.config = config
446
+ self.used_map: dict[str, set[str]] = defaultdict(set)
447
+
448
+ def render_source(self, address: Address) -> str:
449
+ if address.type == AddressType.QUERY:
450
+ return f"({address.location})"
451
+ if address.is_file:
452
+ if address.type == AddressType.SQL:
453
+ with open(address.location, "r", encoding="utf-8") as f:
454
+ return f"({f.read()})"
455
+ raise NotImplementedError(
456
+ f"File source type {address.type} not supported by this dialect"
457
+ )
458
+ return self.safe_quote(address.location)
459
+
460
+ def get_table_schema(
461
+ self, executor, table_name: str, schema: str | None = None
462
+ ) -> list[tuple]:
463
+ raise NotImplementedError
464
+
465
+ def get_table_primary_keys(
466
+ self, executor, table_name: str, schema: str | None = None
467
+ ) -> list[str]:
468
+ raise NotImplementedError
469
+
470
+ def get_table_sample(
471
+ self,
472
+ executor,
473
+ table_name: str,
474
+ schema: str | None = None,
475
+ sample_size: int = 10000,
476
+ ) -> list[tuple]:
477
+ if schema:
478
+ qualified_name = f"{schema}.{table_name}"
479
+ else:
480
+ qualified_name = table_name
481
+
482
+ sample_query = (
483
+ f"SELECT * FROM {self.safe_quote(qualified_name)} LIMIT {sample_size}"
484
+ )
485
+ rows = executor.execute_raw_sql(sample_query).fetchall()
486
+ return rows
487
+
488
+ def get_table_last_modified(
489
+ self, executor, table_name: str, schema: str | None = None
490
+ ) -> str | None:
491
+ from datetime import datetime, timezone
492
+
493
+ return datetime.now(timezone.utc).isoformat()
494
+
495
+ def hash_column_value(self, column_name: str) -> str:
496
+ return f"md5(CAST({self.safe_quote(column_name)} AS VARCHAR))"
497
+
498
+ def aggregate_checksum(self, hash_expr: str) -> str:
499
+ return f"BIT_XOR(hash({hash_expr}))"
500
+
501
+ def render_order_item(
502
+ self,
503
+ order_item: BuildOrderItem,
504
+ cte: CTE | UnionCTE,
505
+ ) -> str:
506
+ if (
507
+ isinstance(order_item.expr, BuildConcept)
508
+ and order_item.expr.address in cte.output_columns
509
+ and self.ALIAS_ORDER_REFERENCING_ALLOWED
510
+ ):
511
+ if cte.source_map.get(order_item.expr.address, []):
512
+ # if it is sourced from somewhere, we need to reference the alias directly
513
+ return f"{self.render_expr(order_item.expr, cte=cte, )} {order_item.order.value}"
514
+ # otherwise we've derived it, safe to use alias
515
+ return f"{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
516
+ return (
517
+ f"{self.render_expr(order_item.expr, cte=cte, )} {order_item.order.value}"
518
+ )
519
+
520
+ def render_concept_sql(
521
+ self,
522
+ c: BuildConcept,
523
+ cte: CTE | UnionCTE,
524
+ alias: bool = True,
525
+ raise_invalid: bool = False,
526
+ ) -> str:
527
+ result = None
528
+ if c.pseudonyms:
529
+ candidates = [y for y in [cte.get_concept(x) for x in c.pseudonyms] if y]
530
+ logger.debug(
531
+ f"{LOGGER_PREFIX} [{c.address}] pseudonym candidates are {[x.address for x in candidates]}"
532
+ )
533
+ for candidate in [c] + candidates:
534
+ try:
535
+ logger.debug(
536
+ f"{LOGGER_PREFIX} [{c.address}] Attempting rendering w/ candidate {candidate.address}"
537
+ )
538
+ result = self._render_concept_sql(
539
+ candidate,
540
+ cte,
541
+ raise_invalid=True,
542
+ )
543
+ if result:
544
+ break
545
+ except ValueError:
546
+ continue
547
+ if not result:
548
+ result = self._render_concept_sql(
549
+ c,
550
+ cte,
551
+ raise_invalid=raise_invalid,
552
+ )
553
+ if alias:
554
+ return f"{result} as {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
555
+ return result
556
+
557
+ def _render_concept_sql(
558
+ self,
559
+ c: BuildConcept,
560
+ cte: CTE | UnionCTE,
561
+ raise_invalid: bool = False,
562
+ ) -> str:
563
+ # only recurse while it's in sources of the current cte
564
+ logger.debug(
565
+ f"{LOGGER_PREFIX} [{c.address}] Starting rendering loop on cte: {cte.name}"
566
+ )
567
+
568
+ # check if it's not inherited AND no pseudonyms are inherited
569
+ if c.lineage and cte.source_map.get(c.address, []) == []:
570
+ logger.debug(
571
+ f"{LOGGER_PREFIX} [{c.address}] rendering concept with lineage that is not already existing"
572
+ )
573
+ if isinstance(c.lineage, WINDOW_ITEMS):
574
+ rendered_order_components = [
575
+ f"{self.render_expr(x.expr, cte, raise_invalid=raise_invalid)} {x.order.value}"
576
+ for x in c.lineage.order_by
577
+ ]
578
+ rendered_over_components = [
579
+ self.render_concept_sql(
580
+ x, cte, alias=False, raise_invalid=raise_invalid
581
+ )
582
+ for x in c.lineage.over
583
+ ]
584
+
585
+ rval = self.WINDOW_FUNCTION_MAP[c.lineage.type](
586
+ concept=self.render_concept_sql(
587
+ c.lineage.content,
588
+ cte=cte,
589
+ alias=False,
590
+ raise_invalid=raise_invalid,
591
+ ),
592
+ window=",".join(rendered_over_components),
593
+ sort=",".join(rendered_order_components),
594
+ offset=c.lineage.index,
595
+ )
596
+ elif isinstance(c.lineage, FILTER_ITEMS):
597
+ # for cases when we've optimized this
598
+ if cte.condition == c.lineage.where.conditional:
599
+ rval = self.render_expr(
600
+ c.lineage.content, cte=cte, raise_invalid=raise_invalid
601
+ )
602
+ else:
603
+ rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_expr(c.lineage.content, cte=cte, raise_invalid=raise_invalid)} ELSE NULL END"
604
+ elif isinstance(c.lineage, BuildRowsetItem):
605
+ rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False, raise_invalid=raise_invalid)}"
606
+ elif isinstance(c.lineage, BuildMultiSelectLineage):
607
+ if c.address in c.lineage.calculated_derivations:
608
+ assert c.lineage.derive is not None
609
+ for x in c.lineage.derive.items:
610
+ if x.address == c.address:
611
+ rval = self.render_expr(
612
+ x.expr,
613
+ cte=cte,
614
+ raise_invalid=raise_invalid,
615
+ )
616
+ break
617
+ else:
618
+ rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False, raise_invalid=raise_invalid)}"
619
+ elif isinstance(c.lineage, BuildComparison):
620
+ rval = f"{self.render_expr(c.lineage.left, cte=cte, raise_invalid=raise_invalid)} {c.lineage.operator.value} {self.render_expr(c.lineage.right, cte=cte, raise_invalid=raise_invalid)}"
621
+ elif isinstance(c.lineage, AGGREGATE_ITEMS):
622
+ args = [
623
+ self.render_expr(v, cte) # , alias=False)
624
+ for v in c.lineage.function.arguments
625
+ ]
626
+ if cte.group_to_grain:
627
+ rval = self.FUNCTION_MAP[c.lineage.function.operator](args, [])
628
+ else:
629
+ logger.debug(
630
+ f"{LOGGER_PREFIX} [{c.address}] ignoring aggregate, already at"
631
+ " target grain"
632
+ )
633
+ rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.function.operator](args, [])}"
634
+ elif (
635
+ isinstance(c.lineage, FUNCTION_ITEMS)
636
+ and c.lineage.operator == FunctionType.UNION
637
+ ):
638
+ local_matched = [
639
+ x
640
+ for x in c.lineage.arguments
641
+ if isinstance(x, BuildConcept) and x.address in cte.output_columns
642
+ ]
643
+ # if we're sorting by the output of the union
644
+ if not local_matched:
645
+ rval = c.safe_address
646
+ else:
647
+ rval = self.render_expr(local_matched[0], cte)
648
+ elif (
649
+ isinstance(c.lineage, FUNCTION_ITEMS)
650
+ and c.lineage.operator == FunctionType.CONSTANT
651
+ and self.rendering.parameters is True
652
+ and c.datatype.data_type != DataType.MAP
653
+ ):
654
+ rval = f":{c.safe_address}"
655
+ else:
656
+ args = []
657
+ types = []
658
+ for arg in c.lineage.arguments:
659
+ if (
660
+ isinstance(arg, BuildConcept)
661
+ and arg.lineage
662
+ and isinstance(arg.lineage, FUNCTION_ITEMS)
663
+ and arg.lineage.operator
664
+ in (
665
+ FunctionType.ADD,
666
+ FunctionType.SUBTRACT,
667
+ FunctionType.DIVIDE,
668
+ FunctionType.MULTIPLY,
669
+ )
670
+ ):
671
+ args.append(
672
+ self.render_expr(
673
+ BuildParenthetical(content=arg),
674
+ cte=cte,
675
+ raise_invalid=raise_invalid,
676
+ )
677
+ )
678
+ else:
679
+ args.append(
680
+ self.render_expr(arg, cte=cte, raise_invalid=raise_invalid)
681
+ )
682
+ types.append(arg_to_datatype(arg))
683
+
684
+ if cte.group_to_grain:
685
+ rval = f"{self.FUNCTION_MAP[c.lineage.operator](args, types)}"
686
+ else:
687
+
688
+ rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.operator](args, types)}"
689
+ else:
690
+ logger.debug(
691
+ f"{LOGGER_PREFIX} [{c.address}] Rendering basic lookup from {cte.source_map.get(c.address,None)}"
692
+ )
693
+
694
+ raw_content = cte.get_alias(c)
695
+ parent = cte.source_map.get(c.address, None)
696
+ if parent:
697
+ self.used_map[parent[0]].add(c.address)
698
+ if isinstance(raw_content, RawColumnExpr):
699
+ rval = raw_content.text
700
+ elif isinstance(raw_content, FUNCTION_ITEMS):
701
+ rval = self.render_expr(
702
+ raw_content, cte=cte, raise_invalid=raise_invalid
703
+ )
704
+ else:
705
+ rval = safe_get_cte_value(
706
+ self.FUNCTION_MAP[FunctionType.COALESCE],
707
+ cte,
708
+ c,
709
+ self.QUOTE_CHARACTER,
710
+ self.render_expr,
711
+ self.used_map,
712
+ )
713
+ if not rval:
714
+ # unions won't have a specific source mapped; just use a generic column reference
715
+ # we shouldn't ever have an expression at this point, so will be safe
716
+ if isinstance(cte, UnionCTE):
717
+ rval = c.safe_address
718
+ else:
719
+ if raise_invalid:
720
+ raise ValueError(
721
+ f"Invalid reference string found in query: {rval}, this should never occur. Please report this issue."
722
+ )
723
+ rval = INVALID_REFERENCE_STRING(
724
+ f"Missing source reference to {c.address}"
725
+ )
726
+ return rval
727
+
728
+ def render_array_unnest(
729
+ self,
730
+ left,
731
+ right,
732
+ operator: ComparisonOperator,
733
+ cte: CTE | UnionCTE | None = None,
734
+ cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
735
+ raise_invalid: bool = False,
736
+ ):
737
+ return f"{self.render_expr(left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
738
+
739
+ def render_expr(
740
+ self,
741
+ e: Union[
742
+ BuildConcept,
743
+ BuildFunction,
744
+ BuildConditional,
745
+ BuildAggregateWrapper,
746
+ BuildComparison,
747
+ BuildCaseWhen,
748
+ BuildCaseElse,
749
+ BuildSubselectComparison,
750
+ BuildWindowItem,
751
+ BuildFilterItem,
752
+ BuildParenthetical,
753
+ BuildParamaterizedConceptReference,
754
+ BuildMultiSelectLineage,
755
+ BuildRowsetItem,
756
+ str,
757
+ int,
758
+ list,
759
+ bool,
760
+ float,
761
+ date,
762
+ datetime,
763
+ DataType,
764
+ TraitDataType,
765
+ MagicConstants,
766
+ MapWrapper[Any, Any],
767
+ MapType,
768
+ NumericType,
769
+ StructType,
770
+ ArrayType,
771
+ ListWrapper[Any],
772
+ TupleWrapper[Any],
773
+ DatePart,
774
+ ],
775
+ cte: Optional[CTE | UnionCTE] = None,
776
+ cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
777
+ raise_invalid: bool = False,
778
+ ) -> str:
779
+ if isinstance(e, SUBSELECT_COMPARISON_ITEMS):
780
+ right: Any = e.right
781
+ while isinstance(right, BuildParenthetical):
782
+ right = right.content
783
+ if isinstance(right, BuildConcept):
784
+ # we won't always have an existnce map
785
+ # so fall back to the normal map
786
+ lookup_cte = cte
787
+ if cte_map and not lookup_cte:
788
+ lookup_cte = cte_map.get(right.address)
789
+
790
+ assert lookup_cte, "Subselects must be rendered with a CTE in context"
791
+ if right.address not in lookup_cte.existence_source_map:
792
+ lookup = lookup_cte.source_map.get(
793
+ right.address,
794
+ [
795
+ INVALID_REFERENCE_STRING(
796
+ f"Missing source reference to {right.address}"
797
+ )
798
+ ],
799
+ )
800
+ else:
801
+ lookup = lookup_cte.existence_source_map[right.address]
802
+ if len(lookup) > 0:
803
+ target = lookup[0]
804
+ else:
805
+ target = INVALID_REFERENCE_STRING(
806
+ f"Missing source CTE for {right.address}"
807
+ )
808
+ assert cte, "CTE must be provided for inlined CTEs"
809
+ self.used_map[target].add(right.address)
810
+ if target in cte.inlined_ctes:
811
+ info = cte.inlined_ctes[target]
812
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} from {info.new_base} as {target} where {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} is not null)"
813
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} is not null)"
814
+ elif isinstance(right, BuildParamaterizedConceptReference):
815
+ if isinstance(right.concept.lineage, BuildFunction) and isinstance(
816
+ right.concept.lineage.arguments[0], ListWrapper
817
+ ):
818
+ return self.render_array_unnest(
819
+ e.left,
820
+ right,
821
+ e.operator,
822
+ cte=cte,
823
+ cte_map=cte_map,
824
+ raise_invalid=raise_invalid,
825
+ )
826
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
827
+ elif isinstance(
828
+ right,
829
+ (ListWrapper, TupleWrapper, BuildParenthetical),
830
+ ):
831
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
832
+
833
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} ({self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)})"
834
+ elif isinstance(e, COMPARISON_ITEMS):
835
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
836
+ elif isinstance(e, CONDITIONAL_ITEMS):
837
+ # conditions need to be nested in parentheses
838
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
839
+ elif isinstance(e, WINDOW_ITEMS):
840
+ rendered_order_components = [
841
+ f"{self.render_expr(x.expr, cte, cte_map=cte_map, raise_invalid=raise_invalid)} {x.order.value}"
842
+ for x in e.order_by
843
+ ]
844
+ rendered_over_components = [
845
+ self.render_expr(x, cte, cte_map=cte_map, raise_invalid=raise_invalid)
846
+ for x in e.over
847
+ ]
848
+ return f"{self.WINDOW_FUNCTION_MAP[e.type](concept = self.render_expr(e.content, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
849
+ elif isinstance(e, PARENTHETICAL_ITEMS):
850
+ # conditions need to be nested in parentheses
851
+ if isinstance(e.content, list):
852
+ return f"( {','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e.content])} )"
853
+ return f"( {self.render_expr(e.content, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} )"
854
+ elif isinstance(e, CASE_WHEN_ITEMS):
855
+ return f"WHEN {self.render_expr(e.comparison, cte=cte, cte_map=cte_map) } THEN {self.render_expr(e.expr, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) }"
856
+ elif isinstance(e, CASE_ELSE_ITEMS):
857
+ return f"ELSE {self.render_expr(e.expr, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) }"
858
+ elif isinstance(e, FUNCTION_ITEMS):
859
+ arguments = []
860
+ for arg in e.arguments:
861
+ if (
862
+ isinstance(arg, BuildConcept)
863
+ and arg.lineage
864
+ and isinstance(arg.lineage, FUNCTION_ITEMS)
865
+ and arg.lineage.operator
866
+ in (
867
+ FunctionType.ADD,
868
+ FunctionType.SUBTRACT,
869
+ FunctionType.DIVIDE,
870
+ FunctionType.MULTIPLY,
871
+ )
872
+ ):
873
+ arguments.append(
874
+ self.render_expr(
875
+ BuildParenthetical(content=arg),
876
+ cte=cte,
877
+ cte_map=cte_map,
878
+ raise_invalid=raise_invalid,
879
+ )
880
+ )
881
+ else:
882
+ arguments.append(
883
+ self.render_expr(
884
+ arg, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid
885
+ )
886
+ )
887
+
888
+ if cte and cte.group_to_grain:
889
+ return self.FUNCTION_MAP[e.operator](arguments, [])
890
+
891
+ return self.FUNCTION_GRAIN_MATCH_MAP[e.operator](arguments, [])
892
+ elif isinstance(e, AGGREGATE_ITEMS):
893
+ return self.render_expr(
894
+ e.function, cte, cte_map=cte_map, raise_invalid=raise_invalid
895
+ )
896
+ elif isinstance(e, FILTER_ITEMS):
897
+ return f"CASE WHEN {self.render_expr(e.where.conditional,cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} THEN {self.render_expr(e.content, cte, cte_map=cte_map, raise_invalid=raise_invalid)} ELSE NULL END"
898
+ elif isinstance(e, BuildConcept):
899
+ if (
900
+ isinstance(e.lineage, FUNCTION_ITEMS)
901
+ and e.lineage.operator == FunctionType.CONSTANT
902
+ and self.rendering.parameters is True
903
+ and e.datatype.data_type != DataType.MAP
904
+ ):
905
+ return f":{e.safe_address}"
906
+ if cte:
907
+ return self.render_concept_sql(
908
+ e,
909
+ cte,
910
+ alias=False,
911
+ raise_invalid=raise_invalid,
912
+ )
913
+ elif cte_map:
914
+ self.used_map[cte_map[e.address].name].add(e.address)
915
+ return f"{cte_map[e.address].name}.{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
916
+ return f"{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
917
+ elif isinstance(e, bool):
918
+ return f"{e}"
919
+ elif isinstance(e, str):
920
+ return f"'{e}'"
921
+ elif isinstance(e, (int, float)):
922
+ return str(e)
923
+ elif isinstance(e, TupleWrapper):
924
+ return f"({','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])})"
925
+ elif isinstance(e, MapWrapper):
926
+ return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}:{self.render_expr(v, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}' for k, v in e.items()])}}}"
927
+ elif isinstance(e, ListWrapper):
928
+ return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e], [])}"
929
+ elif isinstance(e, DataType):
930
+ return self.DATATYPE_MAP.get(e, e.value)
931
+ elif isinstance(e, DatePart):
932
+ return str(e.value)
933
+ elif isinstance(e, NumericType):
934
+ return f"{self.DATATYPE_MAP[DataType.NUMERIC]}({e.precision},{e.scale})"
935
+ elif isinstance(e, MagicConstants):
936
+ if e == MagicConstants.NULL:
937
+ return "null"
938
+ return str(e.value)
939
+ elif isinstance(e, date):
940
+ return self.FUNCTION_MAP[FunctionType.DATE_LITERAL](e, [])
941
+ elif isinstance(e, datetime):
942
+ return self.FUNCTION_MAP[FunctionType.DATETIME_LITERAL](e, [])
943
+ elif isinstance(e, TraitDataType):
944
+ return self.render_expr(e.type, cte=cte, cte_map=cte_map)
945
+ elif isinstance(e, ArgBinding):
946
+ return e.name
947
+ elif isinstance(e, Ordering):
948
+ return str(e.value)
949
+ elif isinstance(e, ArrayType):
950
+ return f"{self.COMPLEX_DATATYPE_MAP[DataType.ARRAY](self.render_expr(e.value_data_type, cte=cte, cte_map=cte_map))}"
951
+ elif isinstance(e, list):
952
+ return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e], [])}"
953
+ elif isinstance(e, BuildParamaterizedConceptReference):
954
+ if self.rendering.parameters:
955
+ if e.concept.namespace == DEFAULT_NAMESPACE:
956
+ return f":{e.concept.name}"
957
+ return f":{e.concept.address.replace('.', '_')}"
958
+ elif e.concept.lineage:
959
+ return self.render_expr(e.concept.lineage, cte=cte, cte_map=cte_map)
960
+ return f"{self.QUOTE_CHARACTER}{e.concept.address}{self.QUOTE_CHARACTER}"
961
+
962
+ else:
963
+ raise ValueError(f"Unable to render type {type(e)} {e}")
964
+
965
+ def render_cte_group_by(
966
+ self, cte: CTE | UnionCTE, select_columns
967
+ ) -> Optional[list[str]]:
968
+
969
+ if not cte.group_to_grain:
970
+ return None
971
+ base = set(
972
+ [self.render_concept_sql(c, cte, alias=False) for c in cte.group_concepts]
973
+ )
974
+ if self.GROUP_MODE == GroupMode.AUTO:
975
+ return sorted(list(base))
976
+
977
+ else:
978
+ # find the index of each column in the select columns
979
+ final = []
980
+ found = []
981
+ for idx, c in enumerate(select_columns):
982
+ pre_alias = c.split(" as ")[0]
983
+ if pre_alias in base:
984
+ final.append(str(idx + 1))
985
+ found.append(pre_alias)
986
+ if not all(c in found for c in base):
987
+ raise ValueError(
988
+ f"Group by columns {base} not found in select columns {select_columns}"
989
+ )
990
+ return final
991
+
992
+ def safe_quote(self, name: str) -> str:
993
+ return safe_quote(name, self.QUOTE_CHARACTER)
994
+
995
+ def quote(self, name: str) -> str:
996
+ return f"{self.QUOTE_CHARACTER}{name}{self.QUOTE_CHARACTER}"
997
+
998
+ def render_cte(self, cte: CTE | UnionCTE, auto_sort: bool = True) -> CompiledCTE:
999
+ if isinstance(cte, UnionCTE):
1000
+ base_statement = f"\n{cte.operator}\n".join(
1001
+ [
1002
+ self.render_cte(child, auto_sort=False).statement
1003
+ for child in cte.internal_ctes
1004
+ ]
1005
+ )
1006
+ if cte.order_by:
1007
+
1008
+ ordering = [self.render_order_item(i, cte) for i in cte.order_by.items]
1009
+ base_statement += "\nORDER BY " + ",".join(ordering)
1010
+ return CompiledCTE(name=cte.name, statement=base_statement)
1011
+ elif isinstance(cte, RecursiveCTE):
1012
+ base_statement = "\nUNION ALL\n".join(
1013
+ [self.render_cte(child, False).statement for child in cte.internal_ctes]
1014
+ )
1015
+ return CompiledCTE(name=cte.name, statement=base_statement)
1016
+ if self.UNNEST_MODE in (
1017
+ UnnestMode.CROSS_APPLY,
1018
+ UnnestMode.CROSS_JOIN,
1019
+ UnnestMode.CROSS_JOIN_ALIAS,
1020
+ UnnestMode.SNOWFLAKE,
1021
+ ):
1022
+ # for a cross apply, derivation happens in the join
1023
+ # so we only use the alias to select
1024
+ select_columns = [
1025
+ self.render_concept_sql(c, cte)
1026
+ for c in cte.output_columns
1027
+ if c.address not in [y.address for y in cte.join_derived_concepts]
1028
+ and c.address not in cte.hidden_concepts
1029
+ ] + [
1030
+ f"{self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
1031
+ for c in cte.join_derived_concepts
1032
+ if c.address not in cte.hidden_concepts
1033
+ ]
1034
+ elif self.UNNEST_MODE in (UnnestMode.CROSS_JOIN_UNNEST, UnnestMode.PRESTO):
1035
+ select_columns = [
1036
+ self.render_concept_sql(c, cte)
1037
+ for c in cte.output_columns
1038
+ if c.address not in [y.address for y in cte.join_derived_concepts]
1039
+ and c.address not in cte.hidden_concepts
1040
+ ] + [
1041
+ f"{UNNEST_NAME} as {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
1042
+ for c in cte.join_derived_concepts
1043
+ if c.address not in cte.hidden_concepts
1044
+ ]
1045
+ else:
1046
+ # otherwse, assume we are unnesting directly in the select
1047
+ select_columns = [
1048
+ self.render_concept_sql(c, cte)
1049
+ for c in cte.output_columns
1050
+ if c.address not in cte.hidden_concepts
1051
+ ]
1052
+ if auto_sort:
1053
+ select_columns = sorted(select_columns, key=lambda x: x)
1054
+ source: str | None = cte.base_name
1055
+ if not cte.render_from_clause:
1056
+ if len(cte.joins) > 0:
1057
+ if cte.join_derived_concepts and self.UNNEST_MODE in (
1058
+ UnnestMode.CROSS_JOIN_ALIAS,
1059
+ # UnnestMode.CROSS_JOIN_UNNEST,
1060
+ UnnestMode.CROSS_JOIN,
1061
+ UnnestMode.CROSS_APPLY,
1062
+ ):
1063
+
1064
+ source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_expr, cte)}"
1065
+ elif cte.join_derived_concepts and self.UNNEST_MODE in (
1066
+ UnnestMode.CROSS_JOIN_UNNEST,
1067
+ ):
1068
+ source = f"{self.render_expr(cte.join_derived_concepts[0], cte)} as {self.QUOTE_CHARACTER}{UNNEST_NAME}{self.QUOTE_CHARACTER}"
1069
+ elif cte.join_derived_concepts and self.UNNEST_MODE in (
1070
+ UnnestMode.PRESTO,
1071
+ ):
1072
+ source = f"{self.render_expr(cte.join_derived_concepts[0], cte)} as t({self.QUOTE_CHARACTER}{UNNEST_NAME}{self.QUOTE_CHARACTER})"
1073
+ elif (
1074
+ cte.join_derived_concepts
1075
+ and self.UNNEST_MODE == UnnestMode.SNOWFLAKE
1076
+ ):
1077
+ source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_expr, cte)}"
1078
+ # direct - eg DUCK DB - can be directly selected inline
1079
+ elif (
1080
+ cte.join_derived_concepts and self.UNNEST_MODE == UnnestMode.DIRECT
1081
+ ):
1082
+ source = None
1083
+ else:
1084
+ raise SyntaxError("CTE has joins but no from clause")
1085
+ else:
1086
+ source = None
1087
+ else:
1088
+ addr = cte.source_address
1089
+ if isinstance(addr, Address):
1090
+ source = self.render_source(addr)
1091
+ elif cte.quote_address:
1092
+ source = safe_quote(addr, self.QUOTE_CHARACTER)
1093
+ else:
1094
+ source = addr
1095
+ if cte.base_name != cte.base_alias:
1096
+ source = f"{source} as {self.QUOTE_CHARACTER}{cte.base_alias}{self.QUOTE_CHARACTER}"
1097
+ if not cte.render_from_clause:
1098
+ final_joins = []
1099
+ else:
1100
+ final_joins = cte.joins or []
1101
+ where: BuildConditional | BuildParenthetical | BuildComparison | None = None
1102
+ having: BuildConditional | BuildParenthetical | BuildComparison | None = None
1103
+ materialized = {x for x, v in cte.source_map.items() if v}
1104
+ if cte.condition:
1105
+ if not cte.group_to_grain or is_scalar_condition(
1106
+ cte.condition, materialized=materialized
1107
+ ):
1108
+ where = cte.condition
1109
+
1110
+ else:
1111
+ components = decompose_condition(cte.condition)
1112
+ for x in components:
1113
+ if is_scalar_condition(x, materialized=materialized):
1114
+ where = where + x if where else x
1115
+ else:
1116
+ having = having + x if having else x
1117
+
1118
+ logger.info(f"{LOGGER_PREFIX} {len(final_joins)} joins for cte {cte.name}")
1119
+ return CompiledCTE(
1120
+ name=cte.name,
1121
+ statement=self.SQL_TEMPLATE.render(
1122
+ select_columns=select_columns,
1123
+ base=f"{source}" if source else None,
1124
+ grain=cte.grain,
1125
+ limit=cte.limit,
1126
+ comment=cte.comment if CONFIG.show_comments else None,
1127
+ # some joins may not need to be rendered
1128
+ joins=[
1129
+ j
1130
+ for j in [
1131
+ render_join(
1132
+ join,
1133
+ self.QUOTE_CHARACTER,
1134
+ self.render_expr,
1135
+ cte,
1136
+ use_map=self.used_map,
1137
+ unnest_mode=self.UNNEST_MODE,
1138
+ null_wrapper=self.NULL_WRAPPER,
1139
+ )
1140
+ for join in final_joins
1141
+ ]
1142
+ if j
1143
+ ],
1144
+ where=(self.render_expr(where, cte) if where else None),
1145
+ having=(self.render_expr(having, cte) if having else None),
1146
+ order_by=(
1147
+ [self.render_order_item(i, cte) for i in cte.order_by.items]
1148
+ if cte.order_by
1149
+ else None
1150
+ ),
1151
+ group_by=self.render_cte_group_by(cte, select_columns),
1152
+ ),
1153
+ )
1154
+
1155
+ def generate_ctes(
1156
+ self,
1157
+ query: ProcessedQuery,
1158
+ ) -> List[CompiledCTE]:
1159
+ return [self.render_cte(cte) for cte in query.ctes[:-1]] + [
1160
+ # last CTE needs to respect the user output order
1161
+ self.render_cte(sort_select_output(query.ctes[-1], query), auto_sort=False)
1162
+ ]
1163
+
1164
+ def create_show_output(
1165
+ self,
1166
+ environment: Environment,
1167
+ content: ShowCategory,
1168
+ ):
1169
+ if content == ShowCategory.CONCEPTS:
1170
+ output_columns = [
1171
+ environment.concepts[
1172
+ DEFAULT_CONCEPTS["concept_address"].address
1173
+ ].reference,
1174
+ environment.concepts[
1175
+ DEFAULT_CONCEPTS["concept_datatype"].address
1176
+ ].reference,
1177
+ environment.concepts[
1178
+ DEFAULT_CONCEPTS["concept_description"].address
1179
+ ].reference,
1180
+ ]
1181
+ output_values = [
1182
+ {
1183
+ DEFAULT_CONCEPTS["concept_address"].address: (
1184
+ concept.name
1185
+ if concept.namespace == DEFAULT_NAMESPACE
1186
+ else concept.address
1187
+ ),
1188
+ DEFAULT_CONCEPTS["concept_datatype"].address: str(concept.datatype),
1189
+ DEFAULT_CONCEPTS[
1190
+ "concept_description"
1191
+ ].address: concept.metadata.description
1192
+ or "",
1193
+ }
1194
+ for _, concept in environment.concepts.items()
1195
+ if not concept.is_internal
1196
+ ]
1197
+ else:
1198
+ raise NotImplementedError(f"Show category {content} not implemented")
1199
+ return ProcessedShowStatement(
1200
+ output_columns=output_columns,
1201
+ output_values=[ProcessedStaticValueOutput(values=output_values)],
1202
+ )
1203
+
1204
+ def generate_queries(
1205
+ self,
1206
+ environment: Environment,
1207
+ statements: Sequence[
1208
+ SelectStatement
1209
+ | MultiSelectStatement
1210
+ | PersistStatement
1211
+ | ShowStatement
1212
+ | ConceptDeclarationStatement
1213
+ | RowsetDerivationStatement
1214
+ | ImportStatement
1215
+ | RawSQLStatement
1216
+ | MergeStatementV2
1217
+ | CopyStatement
1218
+ | ValidateStatement
1219
+ | CreateStatement
1220
+ | PublishStatement
1221
+ | MockStatement
1222
+ ],
1223
+ hooks: Optional[List[BaseHook]] = None,
1224
+ ) -> List[PROCESSED_STATEMENT_TYPES]:
1225
+ output: List[PROCESSED_STATEMENT_TYPES] = []
1226
+ for statement in statements:
1227
+ if isinstance(statement, PersistStatement):
1228
+ if hooks:
1229
+ for hook in hooks:
1230
+ hook.process_persist_info(statement)
1231
+ persist = process_persist(environment, statement, hooks=hooks)
1232
+ output.append(persist)
1233
+ elif isinstance(statement, CopyStatement):
1234
+ if hooks:
1235
+ for hook in hooks:
1236
+ hook.process_select_info(statement.select)
1237
+ copy = process_copy(environment, statement, hooks=hooks)
1238
+ output.append(copy)
1239
+ elif isinstance(statement, SelectStatement):
1240
+ if hooks:
1241
+ for hook in hooks:
1242
+ hook.process_select_info(statement)
1243
+ output.append(process_query(environment, statement, hooks=hooks))
1244
+ elif isinstance(statement, MultiSelectStatement):
1245
+ if hooks:
1246
+ for hook in hooks:
1247
+ hook.process_multiselect_info(statement)
1248
+ output.append(process_query(environment, statement, hooks=hooks))
1249
+ elif isinstance(statement, RowsetDerivationStatement):
1250
+ if hooks:
1251
+ for hook in hooks:
1252
+ hook.process_rowset_info(statement)
1253
+ elif isinstance(statement, ShowStatement):
1254
+ # TODO - encapsulate this a little better
1255
+ if isinstance(statement.content, SelectStatement):
1256
+
1257
+ output.append(
1258
+ ProcessedShowStatement(
1259
+ output_columns=[
1260
+ environment.concepts[
1261
+ DEFAULT_CONCEPTS["query_text"].address
1262
+ ].reference
1263
+ ],
1264
+ output_values=[
1265
+ process_query(
1266
+ environment, statement.content, hooks=hooks
1267
+ )
1268
+ ],
1269
+ )
1270
+ )
1271
+ elif isinstance(statement.content, ShowCategory):
1272
+ output.append(
1273
+ self.create_show_output(environment, statement.content)
1274
+ )
1275
+ elif isinstance(statement.content, ValidateStatement):
1276
+ output.append(
1277
+ ProcessedShowStatement(
1278
+ output_columns=[
1279
+ environment.concepts[
1280
+ DEFAULT_CONCEPTS["label"].address
1281
+ ].reference,
1282
+ environment.concepts[
1283
+ DEFAULT_CONCEPTS["query_text"].address
1284
+ ].reference,
1285
+ environment.concepts[
1286
+ DEFAULT_CONCEPTS["expected"].address
1287
+ ].reference,
1288
+ ],
1289
+ output_values=[
1290
+ ProcessedValidateStatement(
1291
+ scope=statement.content.scope,
1292
+ targets=statement.content.targets,
1293
+ )
1294
+ ],
1295
+ )
1296
+ )
1297
+ else:
1298
+ raise NotImplementedError(type(statement.content))
1299
+ elif isinstance(statement, RawSQLStatement):
1300
+ output.append(ProcessedRawSQLStatement(text=statement.text))
1301
+ elif isinstance(statement, ValidateStatement):
1302
+ output.append(
1303
+ ProcessedValidateStatement(
1304
+ scope=statement.scope,
1305
+ targets=statement.targets,
1306
+ )
1307
+ )
1308
+ elif isinstance(statement, MockStatement):
1309
+ output.append(
1310
+ ProcessedMockStatement(
1311
+ scope=statement.scope,
1312
+ targets=statement.targets,
1313
+ )
1314
+ )
1315
+ elif isinstance(statement, CreateStatement):
1316
+ output.append(process_create_statement(statement, environment))
1317
+ elif isinstance(statement, PublishStatement):
1318
+ output.append(
1319
+ ProcessedPublishStatement(
1320
+ scope=statement.scope,
1321
+ targets=statement.targets,
1322
+ action=statement.action,
1323
+ )
1324
+ )
1325
+ elif isinstance(
1326
+ statement,
1327
+ (
1328
+ ConceptDeclarationStatement,
1329
+ MergeStatementV2,
1330
+ ImportStatement,
1331
+ RowsetDerivationStatement,
1332
+ Datasource,
1333
+ FunctionDeclaration,
1334
+ ),
1335
+ ):
1336
+ continue
1337
+ else:
1338
+ raise NotImplementedError(type(statement))
1339
+ return output
1340
+
1341
+ def generate_partitioned_insert(
1342
+ self,
1343
+ query: ProcessedQueryPersist,
1344
+ recursive: bool,
1345
+ compiled_ctes: list[CompiledCTE],
1346
+ ) -> str:
1347
+ return self.SQL_TEMPLATE.render(
1348
+ recursive=recursive,
1349
+ output=f"INSERT OVERWRITE {self.safe_quote(query.output_to.address.location)}",
1350
+ full_select=compiled_ctes[-1].statement,
1351
+ ctes=compiled_ctes[:-1],
1352
+ )
1353
+
1354
+ def compile_create_table_statement(
1355
+ self, target: CreateTableInfo, create_mode: CreateMode
1356
+ ) -> str:
1357
+ type_map = {}
1358
+ for c in target.columns:
1359
+ type_map[c.name] = self.render_expr(c.type)
1360
+ return self.CREATE_TABLE_SQL_TEMPLATE.render(
1361
+ create_mode=create_mode.value,
1362
+ name=self.safe_quote(target.name),
1363
+ columns=target.columns,
1364
+ type_map=type_map,
1365
+ partition_keys=target.partition_keys,
1366
+ )
1367
+
1368
+ def compile_statement(
1369
+ self,
1370
+ query: PROCESSED_STATEMENT_TYPES,
1371
+ ) -> str:
1372
+ if isinstance(query, ProcessedShowStatement):
1373
+ return ";\n".join(
1374
+ [
1375
+ f"{self.EXPLAIN_KEYWORD} {self.compile_statement(x)}"
1376
+ for x in query.output_values
1377
+ if isinstance(x, (ProcessedQuery, ProcessedCopyStatement))
1378
+ ]
1379
+ )
1380
+ elif isinstance(query, ProcessedRawSQLStatement):
1381
+ return query.text
1382
+
1383
+ elif isinstance(query, ProcessedValidateStatement):
1384
+ return "--Trilogy validate statements do not have a generic SQL representation;\nselect 1;"
1385
+ elif isinstance(query, ProcessedMockStatement):
1386
+ return "--Trilogy mock statements do not have a generic SQL representation;\nselect 1;"
1387
+ elif isinstance(query, ProcessedPublishStatement):
1388
+ return "--Trilogy publish statements do not have a generic SQL representation;\nselect 1;"
1389
+ elif isinstance(query, ProcessedCreateStatement):
1390
+
1391
+ text = []
1392
+ for target in query.targets:
1393
+ text.append(
1394
+ self.compile_create_table_statement(target, query.create_mode)
1395
+ )
1396
+ return "\n".join(text)
1397
+
1398
+ recursive = any(isinstance(x, RecursiveCTE) for x in query.ctes)
1399
+
1400
+ compiled_ctes = self.generate_ctes(query)
1401
+ output = None
1402
+ if isinstance(query, ProcessedQueryPersist):
1403
+ if query.persist_mode == PersistMode.OVERWRITE:
1404
+ create_table_info = datasource_to_create_table_info(query.datasource)
1405
+ output = f"{self.compile_create_table_statement(create_table_info, CreateMode.CREATE_OR_REPLACE)} INSERT INTO {self.safe_quote(query.output_to.address.location)} "
1406
+ elif query.persist_mode == PersistMode.APPEND:
1407
+ if query.partition_by:
1408
+ return self.generate_partitioned_insert(
1409
+ query, recursive, compiled_ctes
1410
+ )
1411
+ else:
1412
+ output = f"INSERT INTO {self.safe_quote(query.output_to.address.location)} "
1413
+ else:
1414
+ raise NotImplementedError(
1415
+ f"Persist mode {query.persist_mode} not implemented"
1416
+ )
1417
+
1418
+ final = self.SQL_TEMPLATE.render(
1419
+ recursive=recursive,
1420
+ output=output,
1421
+ full_select=compiled_ctes[-1].statement,
1422
+ ctes=compiled_ctes[:-1],
1423
+ )
1424
+
1425
+ if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
1426
+ raise ValueError(
1427
+ f"Invalid reference string found in query: {final}, this should never"
1428
+ " occur. Please create an issue to report this."
1429
+ )
1430
+ logger.info(f"{LOGGER_PREFIX} Compiled query: {final}")
1431
+ return final