pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
- pytrilogy-0.3.148.dist-info/METADATA +555 -0
- pytrilogy-0.3.148.dist-info/RECORD +206 -0
- pytrilogy-0.3.148.dist-info/WHEEL +5 -0
- pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +27 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +119 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +454 -0
- trilogy/core/env_processor.py +239 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1240 -0
- trilogy/core/graph_models.py +142 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2662 -0
- trilogy/core/models/build.py +2603 -0
- trilogy/core/models/build_environment.py +165 -0
- trilogy/core/models/core.py +506 -0
- trilogy/core/models/datasource.py +434 -0
- trilogy/core/models/environment.py +756 -0
- trilogy/core/models/execute.py +1213 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +270 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +207 -0
- trilogy/core/processing/node_generators/node_merge_node.py +695 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +786 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +522 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +604 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1431 -0
- trilogy/dialect/bigquery.py +314 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +159 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +376 -0
- trilogy/dialect/enums.py +149 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/__init__.py +17 -0
- trilogy/execution/config.py +119 -0
- trilogy/execution/state/__init__.py +0 -0
- trilogy/execution/state/file_state_store.py +0 -0
- trilogy/execution/state/sqllite_state_store.py +0 -0
- trilogy/execution/state/state_store.py +301 -0
- trilogy/executor.py +656 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +135 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2863 -0
- trilogy/parsing/render.py +773 -0
- trilogy/parsing/trilogy.lark +544 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +45 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +306 -0
- trilogy/scripts/common.py +430 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +311 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +555 -0
- trilogy/scripts/environment.py +59 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +472 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +748 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/refresh.py +106 -0
- trilogy/scripts/run.py +79 -0
- trilogy/scripts/serve.py +202 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +129 -0
- trilogy/scripts/trilogy.py +75 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
trilogy/dialect/base.py
ADDED
|
@@ -0,0 +1,1431 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from datetime import date, datetime
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Union
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from trilogy.dialect.config import DialectConfig
|
|
7
|
+
|
|
8
|
+
from jinja2 import Template
|
|
9
|
+
|
|
10
|
+
from trilogy.constants import (
|
|
11
|
+
CONFIG,
|
|
12
|
+
DEFAULT_NAMESPACE,
|
|
13
|
+
MagicConstants,
|
|
14
|
+
Rendering,
|
|
15
|
+
logger,
|
|
16
|
+
)
|
|
17
|
+
from trilogy.core.constants import UNNEST_NAME
|
|
18
|
+
from trilogy.core.enums import (
|
|
19
|
+
AddressType,
|
|
20
|
+
ComparisonOperator,
|
|
21
|
+
CreateMode,
|
|
22
|
+
DatePart,
|
|
23
|
+
FunctionType,
|
|
24
|
+
GroupMode,
|
|
25
|
+
Modifier,
|
|
26
|
+
Ordering,
|
|
27
|
+
PersistMode,
|
|
28
|
+
ShowCategory,
|
|
29
|
+
UnnestMode,
|
|
30
|
+
WindowType,
|
|
31
|
+
)
|
|
32
|
+
from trilogy.core.internal import DEFAULT_CONCEPTS
|
|
33
|
+
from trilogy.core.models.author import ArgBinding, arg_to_datatype
|
|
34
|
+
from trilogy.core.models.build import (
|
|
35
|
+
BuildAggregateWrapper,
|
|
36
|
+
BuildCaseElse,
|
|
37
|
+
BuildCaseWhen,
|
|
38
|
+
BuildComparison,
|
|
39
|
+
BuildConcept,
|
|
40
|
+
BuildConditional,
|
|
41
|
+
BuildFilterItem,
|
|
42
|
+
BuildFunction,
|
|
43
|
+
BuildMultiSelectLineage,
|
|
44
|
+
BuildOrderItem,
|
|
45
|
+
BuildParamaterizedConceptReference,
|
|
46
|
+
BuildParenthetical,
|
|
47
|
+
BuildRowsetItem,
|
|
48
|
+
BuildSubselectComparison,
|
|
49
|
+
BuildWindowItem,
|
|
50
|
+
)
|
|
51
|
+
from trilogy.core.models.core import (
|
|
52
|
+
ArrayType,
|
|
53
|
+
DataType,
|
|
54
|
+
ListWrapper,
|
|
55
|
+
MapType,
|
|
56
|
+
MapWrapper,
|
|
57
|
+
NumericType,
|
|
58
|
+
StructType,
|
|
59
|
+
TraitDataType,
|
|
60
|
+
TupleWrapper,
|
|
61
|
+
)
|
|
62
|
+
from trilogy.core.models.datasource import Address, Datasource, RawColumnExpr
|
|
63
|
+
from trilogy.core.models.environment import Environment
|
|
64
|
+
from trilogy.core.models.execute import CTE, CompiledCTE, RecursiveCTE, UnionCTE
|
|
65
|
+
from trilogy.core.processing.utility import (
|
|
66
|
+
decompose_condition,
|
|
67
|
+
is_scalar_condition,
|
|
68
|
+
sort_select_output,
|
|
69
|
+
)
|
|
70
|
+
from trilogy.core.query_processor import process_copy, process_persist, process_query
|
|
71
|
+
from trilogy.core.statements.author import (
|
|
72
|
+
ConceptDeclarationStatement,
|
|
73
|
+
CopyStatement,
|
|
74
|
+
CreateStatement,
|
|
75
|
+
FunctionDeclaration,
|
|
76
|
+
ImportStatement,
|
|
77
|
+
MergeStatementV2,
|
|
78
|
+
MockStatement,
|
|
79
|
+
MultiSelectStatement,
|
|
80
|
+
PersistStatement,
|
|
81
|
+
PublishStatement,
|
|
82
|
+
RawSQLStatement,
|
|
83
|
+
RowsetDerivationStatement,
|
|
84
|
+
SelectStatement,
|
|
85
|
+
ShowStatement,
|
|
86
|
+
ValidateStatement,
|
|
87
|
+
)
|
|
88
|
+
from trilogy.core.statements.execute import (
|
|
89
|
+
PROCESSED_STATEMENT_TYPES,
|
|
90
|
+
ProcessedCopyStatement,
|
|
91
|
+
ProcessedCreateStatement,
|
|
92
|
+
ProcessedMockStatement,
|
|
93
|
+
ProcessedPublishStatement,
|
|
94
|
+
ProcessedQuery,
|
|
95
|
+
ProcessedQueryPersist,
|
|
96
|
+
ProcessedRawSQLStatement,
|
|
97
|
+
ProcessedShowStatement,
|
|
98
|
+
ProcessedStaticValueOutput,
|
|
99
|
+
ProcessedValidateStatement,
|
|
100
|
+
)
|
|
101
|
+
from trilogy.core.table_processor import (
|
|
102
|
+
CreateTableInfo,
|
|
103
|
+
datasource_to_create_table_info,
|
|
104
|
+
process_create_statement,
|
|
105
|
+
)
|
|
106
|
+
from trilogy.core.utility import safe_quote
|
|
107
|
+
from trilogy.dialect.common import render_join, render_unnest
|
|
108
|
+
from trilogy.hooks.base_hook import BaseHook
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def null_wrapper(lval: str, rval: str, modifiers: list[Modifier]) -> str:
|
|
112
|
+
|
|
113
|
+
if Modifier.NULLABLE in modifiers:
|
|
114
|
+
return f"({lval} = {rval} or ({lval} is null and {rval} is null))"
|
|
115
|
+
return f"{lval} = {rval}"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
LOGGER_PREFIX = "[RENDERING]"
|
|
119
|
+
|
|
120
|
+
WINDOW_ITEMS = (BuildWindowItem,)
|
|
121
|
+
FILTER_ITEMS = (BuildFilterItem,)
|
|
122
|
+
AGGREGATE_ITEMS = (BuildAggregateWrapper,)
|
|
123
|
+
FUNCTION_ITEMS = (BuildFunction,)
|
|
124
|
+
PARENTHETICAL_ITEMS = (BuildParenthetical,)
|
|
125
|
+
CASE_WHEN_ITEMS = (BuildCaseWhen,)
|
|
126
|
+
CASE_ELSE_ITEMS = (BuildCaseElse,)
|
|
127
|
+
SUBSELECT_COMPARISON_ITEMS = (BuildSubselectComparison,)
|
|
128
|
+
COMPARISON_ITEMS = (BuildComparison,)
|
|
129
|
+
CONDITIONAL_ITEMS = (BuildConditional,)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def INVALID_REFERENCE_STRING(x: Any, callsite: str = ""):
|
|
133
|
+
# if CONFIG.validate_missing:
|
|
134
|
+
# raise SyntaxError(f"INVALID_REFERENCE_BUG_{callsite}<{x}>")
|
|
135
|
+
|
|
136
|
+
return f"INVALID_REFERENCE_BUG_{callsite}<{x}>"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def window_factory(string: str, include_concept: bool = False) -> Callable:
|
|
140
|
+
def render_window(
|
|
141
|
+
concept: str, window: str, sort: str, offset: int | None = None
|
|
142
|
+
) -> str:
|
|
143
|
+
if not include_concept:
|
|
144
|
+
concept = ""
|
|
145
|
+
if offset is not None:
|
|
146
|
+
base = f"{string}({concept}, {offset})"
|
|
147
|
+
else:
|
|
148
|
+
base = f"{string}({concept})"
|
|
149
|
+
if window and sort:
|
|
150
|
+
return f"{base} over (partition by {window} order by {sort} )"
|
|
151
|
+
elif window:
|
|
152
|
+
return f"{base} over (partition by {window})"
|
|
153
|
+
elif sort:
|
|
154
|
+
return f"{base} over (order by {sort} )"
|
|
155
|
+
else:
|
|
156
|
+
return f"{base} over ()"
|
|
157
|
+
|
|
158
|
+
return render_window
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
WINDOW_FUNCTION_MAP = {
|
|
162
|
+
WindowType.LAG: window_factory("lag", include_concept=True),
|
|
163
|
+
WindowType.LEAD: window_factory("lead", include_concept=True),
|
|
164
|
+
WindowType.RANK: window_factory("rank"),
|
|
165
|
+
WindowType.ROW_NUMBER: window_factory("row_number"),
|
|
166
|
+
WindowType.SUM: window_factory("sum", include_concept=True),
|
|
167
|
+
WindowType.COUNT: window_factory("count", include_concept=True),
|
|
168
|
+
WindowType.AVG: window_factory("avg", include_concept=True),
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
DATATYPE_MAP: dict[DataType, str] = {
|
|
172
|
+
DataType.STRING: "string",
|
|
173
|
+
DataType.INTEGER: "int",
|
|
174
|
+
DataType.FLOAT: "float",
|
|
175
|
+
DataType.BOOL: "bool",
|
|
176
|
+
DataType.NUMERIC: "numeric",
|
|
177
|
+
DataType.MAP: "map",
|
|
178
|
+
DataType.DATE: "date",
|
|
179
|
+
DataType.DATETIME: "datetime",
|
|
180
|
+
DataType.ARRAY: "list",
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
COMPLEX_DATATYPE_MAP = {
|
|
184
|
+
DataType.ARRAY: lambda x: f"{x}[]",
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def render_case(args):
|
|
189
|
+
return "CASE\n\t" + "\n\t".join(args) + "\n\tEND"
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def struct_arg(args):
|
|
193
|
+
return [f"{x[1]}: {x[0]}" for x in zip(args[::2], args[1::2])]
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def hash_from_args(val, hash_type):
|
|
197
|
+
hash_type = hash_type[1:-1]
|
|
198
|
+
if hash_type.lower() == "md5":
|
|
199
|
+
return f"md5({val})"
|
|
200
|
+
elif hash_type.lower() == "sha1":
|
|
201
|
+
return f"sha1({val})"
|
|
202
|
+
elif hash_type.lower() == "sha256":
|
|
203
|
+
return f"sha256({val})"
|
|
204
|
+
elif hash_type.lower() == "sha512":
|
|
205
|
+
return f"sha512({val})"
|
|
206
|
+
else:
|
|
207
|
+
raise ValueError(f"Unsupported hash type: {hash_type}")
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
FUNCTION_MAP = {
|
|
211
|
+
# generic types
|
|
212
|
+
FunctionType.ALIAS: lambda x, types: f"{x[0]}",
|
|
213
|
+
FunctionType.GROUP: lambda x, types: f"{x[0]}",
|
|
214
|
+
FunctionType.CONSTANT: lambda x, types: f"{x[0]}",
|
|
215
|
+
FunctionType.TYPED_CONSTANT: lambda x, types: f"{x[0]}",
|
|
216
|
+
FunctionType.COALESCE: lambda x, types: f"coalesce({','.join(x)})",
|
|
217
|
+
FunctionType.NULLIF: lambda x, types: f"nullif({x[0]},{x[1]})",
|
|
218
|
+
FunctionType.CAST: lambda x, types: f"cast({x[0]} as {x[1]})",
|
|
219
|
+
FunctionType.CASE: lambda x, types: render_case(x),
|
|
220
|
+
FunctionType.SPLIT: lambda x, types: f"split({x[0]}, {x[1]})",
|
|
221
|
+
FunctionType.IS_NULL: lambda x, types: f"{x[0]} is null",
|
|
222
|
+
FunctionType.BOOL: lambda x, types: f"CASE WHEN {x[0]} THEN TRUE ELSE FALSE END",
|
|
223
|
+
FunctionType.PARENTHETICAL: lambda x, types: f"({x[0]})",
|
|
224
|
+
# Complex
|
|
225
|
+
FunctionType.INDEX_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
|
|
226
|
+
FunctionType.MAP_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
|
|
227
|
+
FunctionType.UNNEST: lambda x, types: f"unnest({x[0]})",
|
|
228
|
+
FunctionType.DATE_SPINE: lambda x, types: f"""unnest(
|
|
229
|
+
generate_series(
|
|
230
|
+
{x[0]},
|
|
231
|
+
{x[1]},
|
|
232
|
+
INTERVAL '1 day'
|
|
233
|
+
)
|
|
234
|
+
)""",
|
|
235
|
+
FunctionType.RECURSE_EDGE: lambda x, types: f"CASE WHEN {x[1]} IS NULL THEN {x[0]} ELSE {x[1]} END",
|
|
236
|
+
FunctionType.ATTR_ACCESS: lambda x, types: f"""{x[0]}.{x[1].replace("'", "")}""",
|
|
237
|
+
FunctionType.STRUCT: lambda x, types: f"{{{', '.join(struct_arg(x))}}}",
|
|
238
|
+
FunctionType.ARRAY: lambda x, types: f"[{', '.join(x)}]",
|
|
239
|
+
FunctionType.DATE_LITERAL: lambda x, types: f"date '{x}'",
|
|
240
|
+
FunctionType.DATETIME_LITERAL: lambda x, types: f"datetime '{x}'",
|
|
241
|
+
# MAP
|
|
242
|
+
FunctionType.MAP_KEYS: lambda x, types: f"map_keys({x[0]})",
|
|
243
|
+
FunctionType.MAP_VALUES: lambda x, types: f"map_values({x[0]})",
|
|
244
|
+
# ARRAY
|
|
245
|
+
FunctionType.GENERATE_ARRAY: lambda x, types: f"generate_series({x[0]}, {x[1]}, {x[2]})",
|
|
246
|
+
FunctionType.ARRAY_SUM: lambda x, types: f"array_sum({x[0]})",
|
|
247
|
+
FunctionType.ARRAY_DISTINCT: lambda x, types: f"array_distinct({x[0]})",
|
|
248
|
+
FunctionType.ARRAY_SORT: lambda x, types: f"array_sort({x[0]})",
|
|
249
|
+
FunctionType.ARRAY_TRANSFORM: lambda args, types: (
|
|
250
|
+
f"array_transform({args[0]}, {args[1]} -> {args[2]})"
|
|
251
|
+
),
|
|
252
|
+
FunctionType.ARRAY_TO_STRING: lambda args, types: (
|
|
253
|
+
f"array_to_string({args[0]}, {args[1]})"
|
|
254
|
+
),
|
|
255
|
+
FunctionType.ARRAY_FILTER: lambda args, types: (
|
|
256
|
+
f"array_filter({args[0]}, {args[1]} -> {args[2]})"
|
|
257
|
+
),
|
|
258
|
+
# math
|
|
259
|
+
FunctionType.ADD: lambda x, types: " + ".join(x),
|
|
260
|
+
FunctionType.ABS: lambda x, types: f"abs({x[0]})",
|
|
261
|
+
FunctionType.SUBTRACT: lambda x, types: " - ".join(x),
|
|
262
|
+
FunctionType.DIVIDE: lambda x, types: " / ".join(x),
|
|
263
|
+
FunctionType.MULTIPLY: lambda x, types: " * ".join(x),
|
|
264
|
+
FunctionType.ROUND: lambda x, types: f"round({x[0]},{x[1]})",
|
|
265
|
+
FunctionType.FLOOR: lambda x, types: f"floor({x[0]})",
|
|
266
|
+
FunctionType.CEIL: lambda x, types: f"ceil({x[0]})",
|
|
267
|
+
FunctionType.MOD: lambda x, types: f"{x[0]} % {x[1]}",
|
|
268
|
+
FunctionType.POWER: lambda x, types: f"{x[0]} ** {x[1]}",
|
|
269
|
+
FunctionType.SQRT: lambda x, types: f"sqrt({x[0]})",
|
|
270
|
+
FunctionType.RANDOM: lambda x, types: "random()",
|
|
271
|
+
FunctionType.LOG: lambda x, types: (
|
|
272
|
+
f"log({x[0]})" if x[1] == 10 else f"log({x[0]}, {x[1]})"
|
|
273
|
+
),
|
|
274
|
+
# aggregate types
|
|
275
|
+
FunctionType.COUNT_DISTINCT: lambda x, types: f"count(distinct {x[0]})",
|
|
276
|
+
FunctionType.COUNT: lambda x, types: f"count({x[0]})",
|
|
277
|
+
FunctionType.SUM: lambda x, types: f"sum({x[0]})",
|
|
278
|
+
FunctionType.ARRAY_AGG: lambda x, types: f"array_agg({x[0]})",
|
|
279
|
+
FunctionType.LENGTH: lambda x, types: f"length({x[0]})",
|
|
280
|
+
FunctionType.AVG: lambda x, types: f"avg({x[0]})",
|
|
281
|
+
FunctionType.MAX: lambda x, types: f"max({x[0]})",
|
|
282
|
+
FunctionType.MIN: lambda x, types: f"min({x[0]})",
|
|
283
|
+
FunctionType.ANY: lambda x, types: f"any_value({x[0]})",
|
|
284
|
+
FunctionType.BOOL_OR: lambda x, types: f"bool_or({x[0]})",
|
|
285
|
+
FunctionType.BOOL_AND: lambda x, types: f"bool_and({x[0]})",
|
|
286
|
+
# string types
|
|
287
|
+
FunctionType.LIKE: lambda x, types: f" {x[0]} like {x[1]} ",
|
|
288
|
+
FunctionType.UPPER: lambda x, types: f"UPPER({x[0]}) ",
|
|
289
|
+
FunctionType.LOWER: lambda x, types: f"LOWER({x[0]}) ",
|
|
290
|
+
FunctionType.SUBSTRING: lambda x, types: f"SUBSTRING({x[0]},{x[1]},{x[2]})",
|
|
291
|
+
FunctionType.STRPOS: lambda x, types: f"STRPOS({x[0]},{x[1]})",
|
|
292
|
+
FunctionType.CONTAINS: lambda x, types: f"CONTAINS({x[0]},{x[1]})",
|
|
293
|
+
FunctionType.REGEXP_CONTAINS: lambda x, types: f"REGEXP_CONTAINS({x[0]},{x[1]})",
|
|
294
|
+
FunctionType.REGEXP_EXTRACT: lambda x, types: f"REGEXP_EXTRACT({x[0]},{x[1]})",
|
|
295
|
+
FunctionType.REGEXP_REPLACE: lambda x, types: f"REGEXP_REPLACE({x[0]},{x[1]}, {x[2]})",
|
|
296
|
+
FunctionType.TRIM: lambda x, types: f"TRIM({x[0]})",
|
|
297
|
+
FunctionType.REPLACE: lambda x, types: f"REPLACE({x[0]},{x[1]},{x[2]})",
|
|
298
|
+
FunctionType.HASH: lambda x, types: hash_from_args(x[0], x[1]),
|
|
299
|
+
# FunctionType.NOT_LIKE: lambda x: f" CASE WHEN {x[0]} like {x[1]} THEN 0 ELSE 1 END",
|
|
300
|
+
# date types
|
|
301
|
+
FunctionType.DATE_TRUNCATE: lambda x, types: f"date_trunc({x[0]},{x[1]})",
|
|
302
|
+
FunctionType.DATE_PART: lambda x, types: f"date_part({x[0]},{x[1]})",
|
|
303
|
+
FunctionType.DATE_ADD: lambda x, types: f"date_add({x[0]},{x[1]}, {x[2]})",
|
|
304
|
+
FunctionType.DATE_SUB: lambda x, types: f"date_sub({x[0]},{x[1]}, {x[2]})",
|
|
305
|
+
FunctionType.DATE_DIFF: lambda x, types: f"date_diff({x[0]},{x[1]}, {x[2]})",
|
|
306
|
+
FunctionType.DATE: lambda x, types: f"date({x[0]})",
|
|
307
|
+
FunctionType.DATETIME: lambda x, types: f"datetime({x[0]})",
|
|
308
|
+
FunctionType.TIMESTAMP: lambda x, types: f"timestamp({x[0]})",
|
|
309
|
+
FunctionType.SECOND: lambda x, types: f"second({x[0]})",
|
|
310
|
+
FunctionType.MINUTE: lambda x, types: f"minute({x[0]})",
|
|
311
|
+
FunctionType.HOUR: lambda x, types: f"hour({x[0]})",
|
|
312
|
+
FunctionType.DAY: lambda x, types: f"day({x[0]})",
|
|
313
|
+
FunctionType.DAY_NAME: lambda x, types: f"dayname({x[0]})",
|
|
314
|
+
FunctionType.DAY_OF_WEEK: lambda x, types: f"day_of_week({x[0]})",
|
|
315
|
+
FunctionType.WEEK: lambda x, types: f"week({x[0]})",
|
|
316
|
+
FunctionType.MONTH: lambda x, types: f"month({x[0]})",
|
|
317
|
+
FunctionType.MONTH_NAME: lambda x, types: f"monthname({x[0]})",
|
|
318
|
+
FunctionType.QUARTER: lambda x, types: f"quarter({x[0]})",
|
|
319
|
+
FunctionType.YEAR: lambda x, types: f"year({x[0]})",
|
|
320
|
+
# string types
|
|
321
|
+
FunctionType.CONCAT: lambda x, types: f"concat({','.join(x)})",
|
|
322
|
+
# constant types
|
|
323
|
+
FunctionType.CURRENT_DATE: lambda x, types: "current_date()",
|
|
324
|
+
FunctionType.CURRENT_DATETIME: lambda x, types: "current_datetime()",
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
328
|
+
**FUNCTION_MAP,
|
|
329
|
+
FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
330
|
+
FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
331
|
+
FunctionType.SUM: lambda args, types: f"{args[0]}",
|
|
332
|
+
FunctionType.AVG: lambda args, types: f"{args[0]}",
|
|
333
|
+
FunctionType.MAX: lambda args, types: f"{args[0]}",
|
|
334
|
+
FunctionType.MIN: lambda args, types: f"{args[0]}",
|
|
335
|
+
FunctionType.ANY: lambda args, types: f"{args[0]}",
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
GENERIC_SQL_TEMPLATE: Template = Template(
|
|
340
|
+
"""{%- if ctes %}
|
|
341
|
+
WITH {% if recursive%} RECURSIVE {% endif %}{% for cte in ctes %}
|
|
342
|
+
{{cte.name}} as (
|
|
343
|
+
{{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
344
|
+
{%- if full_select -%}
|
|
345
|
+
{{full_select}}
|
|
346
|
+
{% else -%}
|
|
347
|
+
SELECT
|
|
348
|
+
{%- if limit is not none %}
|
|
349
|
+
TOP {{ limit }}{% endif %}
|
|
350
|
+
{%- for select in select_columns %}
|
|
351
|
+
\t{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
352
|
+
{% if base %}FROM
|
|
353
|
+
\t{{ base }}{% endif %}{% if joins %}{% for join in joins %}
|
|
354
|
+
\t{{ join }}{% endfor %}{% endif %}{% if where %}
|
|
355
|
+
WHERE
|
|
356
|
+
\t{{ where }}{% endif %}{%- if group_by %}
|
|
357
|
+
GROUP BY {% for group in group_by %}
|
|
358
|
+
\t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
|
|
359
|
+
HAVING
|
|
360
|
+
\t{{ having }}{% endif %}{%- if order_by %}
|
|
361
|
+
ORDER BY{% for order in order_by %}
|
|
362
|
+
\t{{ order }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
363
|
+
{% endif %}{% endif %}
|
|
364
|
+
"""
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
CREATE_TABLE_SQL_TEMPLATE = Template(
|
|
369
|
+
"""
|
|
370
|
+
CREATE {% if create_mode == "create_or_replace" %}OR REPLACE TABLE{% elif create_mode == "create_if_not_exists" %}TABLE IF NOT EXISTS{% else %}TABLE{% endif %} {{ name }} (
|
|
371
|
+
{%- for column in columns %}
|
|
372
|
+
{{ column.name }} {{ type_map[column.name] }}{% if column.comment %} COMMENT '{{ column.comment }}'{% endif %}{% if not loop.last %},{% endif %}
|
|
373
|
+
{%- endfor %}
|
|
374
|
+
)
|
|
375
|
+
{%- if partition_keys %}
|
|
376
|
+
PARTITIONED BY (
|
|
377
|
+
{%- for partition_key in partition_keys %}
|
|
378
|
+
{{ partition_key }}{% if not loop.last %},{% endif %}
|
|
379
|
+
{%- endfor %}
|
|
380
|
+
)
|
|
381
|
+
{%- endif %};
|
|
382
|
+
""".strip()
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def safe_get_cte_value(
|
|
387
|
+
coalesce: Callable,
|
|
388
|
+
cte: CTE | UnionCTE,
|
|
389
|
+
c: BuildConcept,
|
|
390
|
+
quote_char: str,
|
|
391
|
+
render_expr: Callable,
|
|
392
|
+
use_map: dict[str, set[str]],
|
|
393
|
+
) -> Optional[str]:
|
|
394
|
+
address = c.address
|
|
395
|
+
raw = cte.source_map.get(address, None)
|
|
396
|
+
|
|
397
|
+
if not raw:
|
|
398
|
+
return None
|
|
399
|
+
if isinstance(raw, str):
|
|
400
|
+
rendered = cte.get_alias(c, raw)
|
|
401
|
+
use_map[raw].add(c.address)
|
|
402
|
+
return f"{quote_char}{raw}{quote_char}.{safe_quote(rendered, quote_char)}"
|
|
403
|
+
if isinstance(raw, list) and len(raw) == 1:
|
|
404
|
+
rendered = cte.get_alias(c, raw[0])
|
|
405
|
+
if isinstance(rendered, FUNCTION_ITEMS):
|
|
406
|
+
# if it's a function, we need to render it as a function
|
|
407
|
+
return f"{render_expr(rendered, cte=cte, raise_invalid=True)}"
|
|
408
|
+
use_map[raw[0]].add(c.address)
|
|
409
|
+
return f"{quote_char}{raw[0]}{quote_char}.{safe_quote(rendered, quote_char)}"
|
|
410
|
+
for x in raw:
|
|
411
|
+
use_map[x].add(c.address)
|
|
412
|
+
return coalesce(
|
|
413
|
+
sorted(
|
|
414
|
+
[
|
|
415
|
+
f"{quote_char}{x}{quote_char}.{safe_quote(cte.get_alias(c, x), quote_char)}"
|
|
416
|
+
for x in raw
|
|
417
|
+
]
|
|
418
|
+
),
|
|
419
|
+
[],
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
class BaseDialect:
|
|
424
|
+
WINDOW_FUNCTION_MAP = WINDOW_FUNCTION_MAP
|
|
425
|
+
FUNCTION_MAP = FUNCTION_MAP
|
|
426
|
+
FUNCTION_GRAIN_MATCH_MAP = FUNCTION_GRAIN_MATCH_MAP
|
|
427
|
+
QUOTE_CHARACTER = "`"
|
|
428
|
+
SQL_TEMPLATE = GENERIC_SQL_TEMPLATE
|
|
429
|
+
CREATE_TABLE_SQL_TEMPLATE = CREATE_TABLE_SQL_TEMPLATE
|
|
430
|
+
DATATYPE_MAP = DATATYPE_MAP
|
|
431
|
+
COMPLEX_DATATYPE_MAP = COMPLEX_DATATYPE_MAP
|
|
432
|
+
UNNEST_MODE = UnnestMode.CROSS_APPLY
|
|
433
|
+
GROUP_MODE = GroupMode.AUTO
|
|
434
|
+
EXPLAIN_KEYWORD = "EXPLAIN"
|
|
435
|
+
NULL_WRAPPER = staticmethod(null_wrapper)
|
|
436
|
+
ALIAS_ORDER_REFERENCING_ALLOWED = True
|
|
437
|
+
TABLE_NOT_FOUND_PATTERN: str | None = None # Dialect-specific pattern to match
|
|
438
|
+
|
|
439
|
+
def __init__(
|
|
440
|
+
self,
|
|
441
|
+
rendering: Rendering | None = None,
|
|
442
|
+
config: "DialectConfig | None" = None,
|
|
443
|
+
):
|
|
444
|
+
self.rendering = rendering or CONFIG.rendering
|
|
445
|
+
self.config = config
|
|
446
|
+
self.used_map: dict[str, set[str]] = defaultdict(set)
|
|
447
|
+
|
|
448
|
+
def render_source(self, address: Address) -> str:
|
|
449
|
+
if address.type == AddressType.QUERY:
|
|
450
|
+
return f"({address.location})"
|
|
451
|
+
if address.is_file:
|
|
452
|
+
if address.type == AddressType.SQL:
|
|
453
|
+
with open(address.location, "r", encoding="utf-8") as f:
|
|
454
|
+
return f"({f.read()})"
|
|
455
|
+
raise NotImplementedError(
|
|
456
|
+
f"File source type {address.type} not supported by this dialect"
|
|
457
|
+
)
|
|
458
|
+
return self.safe_quote(address.location)
|
|
459
|
+
|
|
460
|
+
def get_table_schema(
|
|
461
|
+
self, executor, table_name: str, schema: str | None = None
|
|
462
|
+
) -> list[tuple]:
|
|
463
|
+
raise NotImplementedError
|
|
464
|
+
|
|
465
|
+
def get_table_primary_keys(
|
|
466
|
+
self, executor, table_name: str, schema: str | None = None
|
|
467
|
+
) -> list[str]:
|
|
468
|
+
raise NotImplementedError
|
|
469
|
+
|
|
470
|
+
def get_table_sample(
|
|
471
|
+
self,
|
|
472
|
+
executor,
|
|
473
|
+
table_name: str,
|
|
474
|
+
schema: str | None = None,
|
|
475
|
+
sample_size: int = 10000,
|
|
476
|
+
) -> list[tuple]:
|
|
477
|
+
if schema:
|
|
478
|
+
qualified_name = f"{schema}.{table_name}"
|
|
479
|
+
else:
|
|
480
|
+
qualified_name = table_name
|
|
481
|
+
|
|
482
|
+
sample_query = (
|
|
483
|
+
f"SELECT * FROM {self.safe_quote(qualified_name)} LIMIT {sample_size}"
|
|
484
|
+
)
|
|
485
|
+
rows = executor.execute_raw_sql(sample_query).fetchall()
|
|
486
|
+
return rows
|
|
487
|
+
|
|
488
|
+
def get_table_last_modified(
|
|
489
|
+
self, executor, table_name: str, schema: str | None = None
|
|
490
|
+
) -> str | None:
|
|
491
|
+
from datetime import datetime, timezone
|
|
492
|
+
|
|
493
|
+
return datetime.now(timezone.utc).isoformat()
|
|
494
|
+
|
|
495
|
+
def hash_column_value(self, column_name: str) -> str:
|
|
496
|
+
return f"md5(CAST({self.safe_quote(column_name)} AS VARCHAR))"
|
|
497
|
+
|
|
498
|
+
def aggregate_checksum(self, hash_expr: str) -> str:
|
|
499
|
+
return f"BIT_XOR(hash({hash_expr}))"
|
|
500
|
+
|
|
501
|
+
def render_order_item(
|
|
502
|
+
self,
|
|
503
|
+
order_item: BuildOrderItem,
|
|
504
|
+
cte: CTE | UnionCTE,
|
|
505
|
+
) -> str:
|
|
506
|
+
if (
|
|
507
|
+
isinstance(order_item.expr, BuildConcept)
|
|
508
|
+
and order_item.expr.address in cte.output_columns
|
|
509
|
+
and self.ALIAS_ORDER_REFERENCING_ALLOWED
|
|
510
|
+
):
|
|
511
|
+
if cte.source_map.get(order_item.expr.address, []):
|
|
512
|
+
# if it is sourced from somewhere, we need to reference the alias directly
|
|
513
|
+
return f"{self.render_expr(order_item.expr, cte=cte, )} {order_item.order.value}"
|
|
514
|
+
# otherwise we've derived it, safe to use alias
|
|
515
|
+
return f"{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
|
|
516
|
+
return (
|
|
517
|
+
f"{self.render_expr(order_item.expr, cte=cte, )} {order_item.order.value}"
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
def render_concept_sql(
|
|
521
|
+
self,
|
|
522
|
+
c: BuildConcept,
|
|
523
|
+
cte: CTE | UnionCTE,
|
|
524
|
+
alias: bool = True,
|
|
525
|
+
raise_invalid: bool = False,
|
|
526
|
+
) -> str:
|
|
527
|
+
result = None
|
|
528
|
+
if c.pseudonyms:
|
|
529
|
+
candidates = [y for y in [cte.get_concept(x) for x in c.pseudonyms] if y]
|
|
530
|
+
logger.debug(
|
|
531
|
+
f"{LOGGER_PREFIX} [{c.address}] pseudonym candidates are {[x.address for x in candidates]}"
|
|
532
|
+
)
|
|
533
|
+
for candidate in [c] + candidates:
|
|
534
|
+
try:
|
|
535
|
+
logger.debug(
|
|
536
|
+
f"{LOGGER_PREFIX} [{c.address}] Attempting rendering w/ candidate {candidate.address}"
|
|
537
|
+
)
|
|
538
|
+
result = self._render_concept_sql(
|
|
539
|
+
candidate,
|
|
540
|
+
cte,
|
|
541
|
+
raise_invalid=True,
|
|
542
|
+
)
|
|
543
|
+
if result:
|
|
544
|
+
break
|
|
545
|
+
except ValueError:
|
|
546
|
+
continue
|
|
547
|
+
if not result:
|
|
548
|
+
result = self._render_concept_sql(
|
|
549
|
+
c,
|
|
550
|
+
cte,
|
|
551
|
+
raise_invalid=raise_invalid,
|
|
552
|
+
)
|
|
553
|
+
if alias:
|
|
554
|
+
return f"{result} as {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
|
|
555
|
+
return result
|
|
556
|
+
|
|
557
|
+
def _render_concept_sql(
|
|
558
|
+
self,
|
|
559
|
+
c: BuildConcept,
|
|
560
|
+
cte: CTE | UnionCTE,
|
|
561
|
+
raise_invalid: bool = False,
|
|
562
|
+
) -> str:
|
|
563
|
+
# only recurse while it's in sources of the current cte
|
|
564
|
+
logger.debug(
|
|
565
|
+
f"{LOGGER_PREFIX} [{c.address}] Starting rendering loop on cte: {cte.name}"
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
# check if it's not inherited AND no pseudonyms are inherited
|
|
569
|
+
if c.lineage and cte.source_map.get(c.address, []) == []:
|
|
570
|
+
logger.debug(
|
|
571
|
+
f"{LOGGER_PREFIX} [{c.address}] rendering concept with lineage that is not already existing"
|
|
572
|
+
)
|
|
573
|
+
if isinstance(c.lineage, WINDOW_ITEMS):
|
|
574
|
+
rendered_order_components = [
|
|
575
|
+
f"{self.render_expr(x.expr, cte, raise_invalid=raise_invalid)} {x.order.value}"
|
|
576
|
+
for x in c.lineage.order_by
|
|
577
|
+
]
|
|
578
|
+
rendered_over_components = [
|
|
579
|
+
self.render_concept_sql(
|
|
580
|
+
x, cte, alias=False, raise_invalid=raise_invalid
|
|
581
|
+
)
|
|
582
|
+
for x in c.lineage.over
|
|
583
|
+
]
|
|
584
|
+
|
|
585
|
+
rval = self.WINDOW_FUNCTION_MAP[c.lineage.type](
|
|
586
|
+
concept=self.render_concept_sql(
|
|
587
|
+
c.lineage.content,
|
|
588
|
+
cte=cte,
|
|
589
|
+
alias=False,
|
|
590
|
+
raise_invalid=raise_invalid,
|
|
591
|
+
),
|
|
592
|
+
window=",".join(rendered_over_components),
|
|
593
|
+
sort=",".join(rendered_order_components),
|
|
594
|
+
offset=c.lineage.index,
|
|
595
|
+
)
|
|
596
|
+
elif isinstance(c.lineage, FILTER_ITEMS):
|
|
597
|
+
# for cases when we've optimized this
|
|
598
|
+
if cte.condition == c.lineage.where.conditional:
|
|
599
|
+
rval = self.render_expr(
|
|
600
|
+
c.lineage.content, cte=cte, raise_invalid=raise_invalid
|
|
601
|
+
)
|
|
602
|
+
else:
|
|
603
|
+
rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_expr(c.lineage.content, cte=cte, raise_invalid=raise_invalid)} ELSE NULL END"
|
|
604
|
+
elif isinstance(c.lineage, BuildRowsetItem):
|
|
605
|
+
rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False, raise_invalid=raise_invalid)}"
|
|
606
|
+
elif isinstance(c.lineage, BuildMultiSelectLineage):
|
|
607
|
+
if c.address in c.lineage.calculated_derivations:
|
|
608
|
+
assert c.lineage.derive is not None
|
|
609
|
+
for x in c.lineage.derive.items:
|
|
610
|
+
if x.address == c.address:
|
|
611
|
+
rval = self.render_expr(
|
|
612
|
+
x.expr,
|
|
613
|
+
cte=cte,
|
|
614
|
+
raise_invalid=raise_invalid,
|
|
615
|
+
)
|
|
616
|
+
break
|
|
617
|
+
else:
|
|
618
|
+
rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False, raise_invalid=raise_invalid)}"
|
|
619
|
+
elif isinstance(c.lineage, BuildComparison):
|
|
620
|
+
rval = f"{self.render_expr(c.lineage.left, cte=cte, raise_invalid=raise_invalid)} {c.lineage.operator.value} {self.render_expr(c.lineage.right, cte=cte, raise_invalid=raise_invalid)}"
|
|
621
|
+
elif isinstance(c.lineage, AGGREGATE_ITEMS):
|
|
622
|
+
args = [
|
|
623
|
+
self.render_expr(v, cte) # , alias=False)
|
|
624
|
+
for v in c.lineage.function.arguments
|
|
625
|
+
]
|
|
626
|
+
if cte.group_to_grain:
|
|
627
|
+
rval = self.FUNCTION_MAP[c.lineage.function.operator](args, [])
|
|
628
|
+
else:
|
|
629
|
+
logger.debug(
|
|
630
|
+
f"{LOGGER_PREFIX} [{c.address}] ignoring aggregate, already at"
|
|
631
|
+
" target grain"
|
|
632
|
+
)
|
|
633
|
+
rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.function.operator](args, [])}"
|
|
634
|
+
elif (
|
|
635
|
+
isinstance(c.lineage, FUNCTION_ITEMS)
|
|
636
|
+
and c.lineage.operator == FunctionType.UNION
|
|
637
|
+
):
|
|
638
|
+
local_matched = [
|
|
639
|
+
x
|
|
640
|
+
for x in c.lineage.arguments
|
|
641
|
+
if isinstance(x, BuildConcept) and x.address in cte.output_columns
|
|
642
|
+
]
|
|
643
|
+
# if we're sorting by the output of the union
|
|
644
|
+
if not local_matched:
|
|
645
|
+
rval = c.safe_address
|
|
646
|
+
else:
|
|
647
|
+
rval = self.render_expr(local_matched[0], cte)
|
|
648
|
+
elif (
|
|
649
|
+
isinstance(c.lineage, FUNCTION_ITEMS)
|
|
650
|
+
and c.lineage.operator == FunctionType.CONSTANT
|
|
651
|
+
and self.rendering.parameters is True
|
|
652
|
+
and c.datatype.data_type != DataType.MAP
|
|
653
|
+
):
|
|
654
|
+
rval = f":{c.safe_address}"
|
|
655
|
+
else:
|
|
656
|
+
args = []
|
|
657
|
+
types = []
|
|
658
|
+
for arg in c.lineage.arguments:
|
|
659
|
+
if (
|
|
660
|
+
isinstance(arg, BuildConcept)
|
|
661
|
+
and arg.lineage
|
|
662
|
+
and isinstance(arg.lineage, FUNCTION_ITEMS)
|
|
663
|
+
and arg.lineage.operator
|
|
664
|
+
in (
|
|
665
|
+
FunctionType.ADD,
|
|
666
|
+
FunctionType.SUBTRACT,
|
|
667
|
+
FunctionType.DIVIDE,
|
|
668
|
+
FunctionType.MULTIPLY,
|
|
669
|
+
)
|
|
670
|
+
):
|
|
671
|
+
args.append(
|
|
672
|
+
self.render_expr(
|
|
673
|
+
BuildParenthetical(content=arg),
|
|
674
|
+
cte=cte,
|
|
675
|
+
raise_invalid=raise_invalid,
|
|
676
|
+
)
|
|
677
|
+
)
|
|
678
|
+
else:
|
|
679
|
+
args.append(
|
|
680
|
+
self.render_expr(arg, cte=cte, raise_invalid=raise_invalid)
|
|
681
|
+
)
|
|
682
|
+
types.append(arg_to_datatype(arg))
|
|
683
|
+
|
|
684
|
+
if cte.group_to_grain:
|
|
685
|
+
rval = f"{self.FUNCTION_MAP[c.lineage.operator](args, types)}"
|
|
686
|
+
else:
|
|
687
|
+
|
|
688
|
+
rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.operator](args, types)}"
|
|
689
|
+
else:
|
|
690
|
+
logger.debug(
|
|
691
|
+
f"{LOGGER_PREFIX} [{c.address}] Rendering basic lookup from {cte.source_map.get(c.address,None)}"
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
raw_content = cte.get_alias(c)
|
|
695
|
+
parent = cte.source_map.get(c.address, None)
|
|
696
|
+
if parent:
|
|
697
|
+
self.used_map[parent[0]].add(c.address)
|
|
698
|
+
if isinstance(raw_content, RawColumnExpr):
|
|
699
|
+
rval = raw_content.text
|
|
700
|
+
elif isinstance(raw_content, FUNCTION_ITEMS):
|
|
701
|
+
rval = self.render_expr(
|
|
702
|
+
raw_content, cte=cte, raise_invalid=raise_invalid
|
|
703
|
+
)
|
|
704
|
+
else:
|
|
705
|
+
rval = safe_get_cte_value(
|
|
706
|
+
self.FUNCTION_MAP[FunctionType.COALESCE],
|
|
707
|
+
cte,
|
|
708
|
+
c,
|
|
709
|
+
self.QUOTE_CHARACTER,
|
|
710
|
+
self.render_expr,
|
|
711
|
+
self.used_map,
|
|
712
|
+
)
|
|
713
|
+
if not rval:
|
|
714
|
+
# unions won't have a specific source mapped; just use a generic column reference
|
|
715
|
+
# we shouldn't ever have an expression at this point, so will be safe
|
|
716
|
+
if isinstance(cte, UnionCTE):
|
|
717
|
+
rval = c.safe_address
|
|
718
|
+
else:
|
|
719
|
+
if raise_invalid:
|
|
720
|
+
raise ValueError(
|
|
721
|
+
f"Invalid reference string found in query: {rval}, this should never occur. Please report this issue."
|
|
722
|
+
)
|
|
723
|
+
rval = INVALID_REFERENCE_STRING(
|
|
724
|
+
f"Missing source reference to {c.address}"
|
|
725
|
+
)
|
|
726
|
+
return rval
|
|
727
|
+
|
|
728
|
+
def render_array_unnest(
|
|
729
|
+
self,
|
|
730
|
+
left,
|
|
731
|
+
right,
|
|
732
|
+
operator: ComparisonOperator,
|
|
733
|
+
cte: CTE | UnionCTE | None = None,
|
|
734
|
+
cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
|
|
735
|
+
raise_invalid: bool = False,
|
|
736
|
+
):
|
|
737
|
+
return f"{self.render_expr(left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
|
|
738
|
+
|
|
739
|
+
def render_expr(
|
|
740
|
+
self,
|
|
741
|
+
e: Union[
|
|
742
|
+
BuildConcept,
|
|
743
|
+
BuildFunction,
|
|
744
|
+
BuildConditional,
|
|
745
|
+
BuildAggregateWrapper,
|
|
746
|
+
BuildComparison,
|
|
747
|
+
BuildCaseWhen,
|
|
748
|
+
BuildCaseElse,
|
|
749
|
+
BuildSubselectComparison,
|
|
750
|
+
BuildWindowItem,
|
|
751
|
+
BuildFilterItem,
|
|
752
|
+
BuildParenthetical,
|
|
753
|
+
BuildParamaterizedConceptReference,
|
|
754
|
+
BuildMultiSelectLineage,
|
|
755
|
+
BuildRowsetItem,
|
|
756
|
+
str,
|
|
757
|
+
int,
|
|
758
|
+
list,
|
|
759
|
+
bool,
|
|
760
|
+
float,
|
|
761
|
+
date,
|
|
762
|
+
datetime,
|
|
763
|
+
DataType,
|
|
764
|
+
TraitDataType,
|
|
765
|
+
MagicConstants,
|
|
766
|
+
MapWrapper[Any, Any],
|
|
767
|
+
MapType,
|
|
768
|
+
NumericType,
|
|
769
|
+
StructType,
|
|
770
|
+
ArrayType,
|
|
771
|
+
ListWrapper[Any],
|
|
772
|
+
TupleWrapper[Any],
|
|
773
|
+
DatePart,
|
|
774
|
+
],
|
|
775
|
+
cte: Optional[CTE | UnionCTE] = None,
|
|
776
|
+
cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
|
|
777
|
+
raise_invalid: bool = False,
|
|
778
|
+
) -> str:
|
|
779
|
+
if isinstance(e, SUBSELECT_COMPARISON_ITEMS):
|
|
780
|
+
right: Any = e.right
|
|
781
|
+
while isinstance(right, BuildParenthetical):
|
|
782
|
+
right = right.content
|
|
783
|
+
if isinstance(right, BuildConcept):
|
|
784
|
+
# we won't always have an existnce map
|
|
785
|
+
# so fall back to the normal map
|
|
786
|
+
lookup_cte = cte
|
|
787
|
+
if cte_map and not lookup_cte:
|
|
788
|
+
lookup_cte = cte_map.get(right.address)
|
|
789
|
+
|
|
790
|
+
assert lookup_cte, "Subselects must be rendered with a CTE in context"
|
|
791
|
+
if right.address not in lookup_cte.existence_source_map:
|
|
792
|
+
lookup = lookup_cte.source_map.get(
|
|
793
|
+
right.address,
|
|
794
|
+
[
|
|
795
|
+
INVALID_REFERENCE_STRING(
|
|
796
|
+
f"Missing source reference to {right.address}"
|
|
797
|
+
)
|
|
798
|
+
],
|
|
799
|
+
)
|
|
800
|
+
else:
|
|
801
|
+
lookup = lookup_cte.existence_source_map[right.address]
|
|
802
|
+
if len(lookup) > 0:
|
|
803
|
+
target = lookup[0]
|
|
804
|
+
else:
|
|
805
|
+
target = INVALID_REFERENCE_STRING(
|
|
806
|
+
f"Missing source CTE for {right.address}"
|
|
807
|
+
)
|
|
808
|
+
assert cte, "CTE must be provided for inlined CTEs"
|
|
809
|
+
self.used_map[target].add(right.address)
|
|
810
|
+
if target in cte.inlined_ctes:
|
|
811
|
+
info = cte.inlined_ctes[target]
|
|
812
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} from {info.new_base} as {target} where {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} is not null)"
|
|
813
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} is not null)"
|
|
814
|
+
elif isinstance(right, BuildParamaterizedConceptReference):
|
|
815
|
+
if isinstance(right.concept.lineage, BuildFunction) and isinstance(
|
|
816
|
+
right.concept.lineage.arguments[0], ListWrapper
|
|
817
|
+
):
|
|
818
|
+
return self.render_array_unnest(
|
|
819
|
+
e.left,
|
|
820
|
+
right,
|
|
821
|
+
e.operator,
|
|
822
|
+
cte=cte,
|
|
823
|
+
cte_map=cte_map,
|
|
824
|
+
raise_invalid=raise_invalid,
|
|
825
|
+
)
|
|
826
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
|
|
827
|
+
elif isinstance(
|
|
828
|
+
right,
|
|
829
|
+
(ListWrapper, TupleWrapper, BuildParenthetical),
|
|
830
|
+
):
|
|
831
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
|
|
832
|
+
|
|
833
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} ({self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)})"
|
|
834
|
+
elif isinstance(e, COMPARISON_ITEMS):
|
|
835
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
|
|
836
|
+
elif isinstance(e, CONDITIONAL_ITEMS):
|
|
837
|
+
# conditions need to be nested in parentheses
|
|
838
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
|
|
839
|
+
elif isinstance(e, WINDOW_ITEMS):
|
|
840
|
+
rendered_order_components = [
|
|
841
|
+
f"{self.render_expr(x.expr, cte, cte_map=cte_map, raise_invalid=raise_invalid)} {x.order.value}"
|
|
842
|
+
for x in e.order_by
|
|
843
|
+
]
|
|
844
|
+
rendered_over_components = [
|
|
845
|
+
self.render_expr(x, cte, cte_map=cte_map, raise_invalid=raise_invalid)
|
|
846
|
+
for x in e.over
|
|
847
|
+
]
|
|
848
|
+
return f"{self.WINDOW_FUNCTION_MAP[e.type](concept = self.render_expr(e.content, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
|
|
849
|
+
elif isinstance(e, PARENTHETICAL_ITEMS):
|
|
850
|
+
# conditions need to be nested in parentheses
|
|
851
|
+
if isinstance(e.content, list):
|
|
852
|
+
return f"( {','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e.content])} )"
|
|
853
|
+
return f"( {self.render_expr(e.content, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} )"
|
|
854
|
+
elif isinstance(e, CASE_WHEN_ITEMS):
|
|
855
|
+
return f"WHEN {self.render_expr(e.comparison, cte=cte, cte_map=cte_map) } THEN {self.render_expr(e.expr, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) }"
|
|
856
|
+
elif isinstance(e, CASE_ELSE_ITEMS):
|
|
857
|
+
return f"ELSE {self.render_expr(e.expr, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) }"
|
|
858
|
+
elif isinstance(e, FUNCTION_ITEMS):
|
|
859
|
+
arguments = []
|
|
860
|
+
for arg in e.arguments:
|
|
861
|
+
if (
|
|
862
|
+
isinstance(arg, BuildConcept)
|
|
863
|
+
and arg.lineage
|
|
864
|
+
and isinstance(arg.lineage, FUNCTION_ITEMS)
|
|
865
|
+
and arg.lineage.operator
|
|
866
|
+
in (
|
|
867
|
+
FunctionType.ADD,
|
|
868
|
+
FunctionType.SUBTRACT,
|
|
869
|
+
FunctionType.DIVIDE,
|
|
870
|
+
FunctionType.MULTIPLY,
|
|
871
|
+
)
|
|
872
|
+
):
|
|
873
|
+
arguments.append(
|
|
874
|
+
self.render_expr(
|
|
875
|
+
BuildParenthetical(content=arg),
|
|
876
|
+
cte=cte,
|
|
877
|
+
cte_map=cte_map,
|
|
878
|
+
raise_invalid=raise_invalid,
|
|
879
|
+
)
|
|
880
|
+
)
|
|
881
|
+
else:
|
|
882
|
+
arguments.append(
|
|
883
|
+
self.render_expr(
|
|
884
|
+
arg, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid
|
|
885
|
+
)
|
|
886
|
+
)
|
|
887
|
+
|
|
888
|
+
if cte and cte.group_to_grain:
|
|
889
|
+
return self.FUNCTION_MAP[e.operator](arguments, [])
|
|
890
|
+
|
|
891
|
+
return self.FUNCTION_GRAIN_MATCH_MAP[e.operator](arguments, [])
|
|
892
|
+
elif isinstance(e, AGGREGATE_ITEMS):
|
|
893
|
+
return self.render_expr(
|
|
894
|
+
e.function, cte, cte_map=cte_map, raise_invalid=raise_invalid
|
|
895
|
+
)
|
|
896
|
+
elif isinstance(e, FILTER_ITEMS):
|
|
897
|
+
return f"CASE WHEN {self.render_expr(e.where.conditional,cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} THEN {self.render_expr(e.content, cte, cte_map=cte_map, raise_invalid=raise_invalid)} ELSE NULL END"
|
|
898
|
+
elif isinstance(e, BuildConcept):
|
|
899
|
+
if (
|
|
900
|
+
isinstance(e.lineage, FUNCTION_ITEMS)
|
|
901
|
+
and e.lineage.operator == FunctionType.CONSTANT
|
|
902
|
+
and self.rendering.parameters is True
|
|
903
|
+
and e.datatype.data_type != DataType.MAP
|
|
904
|
+
):
|
|
905
|
+
return f":{e.safe_address}"
|
|
906
|
+
if cte:
|
|
907
|
+
return self.render_concept_sql(
|
|
908
|
+
e,
|
|
909
|
+
cte,
|
|
910
|
+
alias=False,
|
|
911
|
+
raise_invalid=raise_invalid,
|
|
912
|
+
)
|
|
913
|
+
elif cte_map:
|
|
914
|
+
self.used_map[cte_map[e.address].name].add(e.address)
|
|
915
|
+
return f"{cte_map[e.address].name}.{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
|
|
916
|
+
return f"{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
|
|
917
|
+
elif isinstance(e, bool):
|
|
918
|
+
return f"{e}"
|
|
919
|
+
elif isinstance(e, str):
|
|
920
|
+
return f"'{e}'"
|
|
921
|
+
elif isinstance(e, (int, float)):
|
|
922
|
+
return str(e)
|
|
923
|
+
elif isinstance(e, TupleWrapper):
|
|
924
|
+
return f"({','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])})"
|
|
925
|
+
elif isinstance(e, MapWrapper):
|
|
926
|
+
return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}:{self.render_expr(v, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}' for k, v in e.items()])}}}"
|
|
927
|
+
elif isinstance(e, ListWrapper):
|
|
928
|
+
return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e], [])}"
|
|
929
|
+
elif isinstance(e, DataType):
|
|
930
|
+
return self.DATATYPE_MAP.get(e, e.value)
|
|
931
|
+
elif isinstance(e, DatePart):
|
|
932
|
+
return str(e.value)
|
|
933
|
+
elif isinstance(e, NumericType):
|
|
934
|
+
return f"{self.DATATYPE_MAP[DataType.NUMERIC]}({e.precision},{e.scale})"
|
|
935
|
+
elif isinstance(e, MagicConstants):
|
|
936
|
+
if e == MagicConstants.NULL:
|
|
937
|
+
return "null"
|
|
938
|
+
return str(e.value)
|
|
939
|
+
elif isinstance(e, date):
|
|
940
|
+
return self.FUNCTION_MAP[FunctionType.DATE_LITERAL](e, [])
|
|
941
|
+
elif isinstance(e, datetime):
|
|
942
|
+
return self.FUNCTION_MAP[FunctionType.DATETIME_LITERAL](e, [])
|
|
943
|
+
elif isinstance(e, TraitDataType):
|
|
944
|
+
return self.render_expr(e.type, cte=cte, cte_map=cte_map)
|
|
945
|
+
elif isinstance(e, ArgBinding):
|
|
946
|
+
return e.name
|
|
947
|
+
elif isinstance(e, Ordering):
|
|
948
|
+
return str(e.value)
|
|
949
|
+
elif isinstance(e, ArrayType):
|
|
950
|
+
return f"{self.COMPLEX_DATATYPE_MAP[DataType.ARRAY](self.render_expr(e.value_data_type, cte=cte, cte_map=cte_map))}"
|
|
951
|
+
elif isinstance(e, list):
|
|
952
|
+
return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e], [])}"
|
|
953
|
+
elif isinstance(e, BuildParamaterizedConceptReference):
|
|
954
|
+
if self.rendering.parameters:
|
|
955
|
+
if e.concept.namespace == DEFAULT_NAMESPACE:
|
|
956
|
+
return f":{e.concept.name}"
|
|
957
|
+
return f":{e.concept.address.replace('.', '_')}"
|
|
958
|
+
elif e.concept.lineage:
|
|
959
|
+
return self.render_expr(e.concept.lineage, cte=cte, cte_map=cte_map)
|
|
960
|
+
return f"{self.QUOTE_CHARACTER}{e.concept.address}{self.QUOTE_CHARACTER}"
|
|
961
|
+
|
|
962
|
+
else:
|
|
963
|
+
raise ValueError(f"Unable to render type {type(e)} {e}")
|
|
964
|
+
|
|
965
|
+
def render_cte_group_by(
|
|
966
|
+
self, cte: CTE | UnionCTE, select_columns
|
|
967
|
+
) -> Optional[list[str]]:
|
|
968
|
+
|
|
969
|
+
if not cte.group_to_grain:
|
|
970
|
+
return None
|
|
971
|
+
base = set(
|
|
972
|
+
[self.render_concept_sql(c, cte, alias=False) for c in cte.group_concepts]
|
|
973
|
+
)
|
|
974
|
+
if self.GROUP_MODE == GroupMode.AUTO:
|
|
975
|
+
return sorted(list(base))
|
|
976
|
+
|
|
977
|
+
else:
|
|
978
|
+
# find the index of each column in the select columns
|
|
979
|
+
final = []
|
|
980
|
+
found = []
|
|
981
|
+
for idx, c in enumerate(select_columns):
|
|
982
|
+
pre_alias = c.split(" as ")[0]
|
|
983
|
+
if pre_alias in base:
|
|
984
|
+
final.append(str(idx + 1))
|
|
985
|
+
found.append(pre_alias)
|
|
986
|
+
if not all(c in found for c in base):
|
|
987
|
+
raise ValueError(
|
|
988
|
+
f"Group by columns {base} not found in select columns {select_columns}"
|
|
989
|
+
)
|
|
990
|
+
return final
|
|
991
|
+
|
|
992
|
+
def safe_quote(self, name: str) -> str:
|
|
993
|
+
return safe_quote(name, self.QUOTE_CHARACTER)
|
|
994
|
+
|
|
995
|
+
def quote(self, name: str) -> str:
|
|
996
|
+
return f"{self.QUOTE_CHARACTER}{name}{self.QUOTE_CHARACTER}"
|
|
997
|
+
|
|
998
|
+
def render_cte(self, cte: CTE | UnionCTE, auto_sort: bool = True) -> CompiledCTE:
|
|
999
|
+
if isinstance(cte, UnionCTE):
|
|
1000
|
+
base_statement = f"\n{cte.operator}\n".join(
|
|
1001
|
+
[
|
|
1002
|
+
self.render_cte(child, auto_sort=False).statement
|
|
1003
|
+
for child in cte.internal_ctes
|
|
1004
|
+
]
|
|
1005
|
+
)
|
|
1006
|
+
if cte.order_by:
|
|
1007
|
+
|
|
1008
|
+
ordering = [self.render_order_item(i, cte) for i in cte.order_by.items]
|
|
1009
|
+
base_statement += "\nORDER BY " + ",".join(ordering)
|
|
1010
|
+
return CompiledCTE(name=cte.name, statement=base_statement)
|
|
1011
|
+
elif isinstance(cte, RecursiveCTE):
|
|
1012
|
+
base_statement = "\nUNION ALL\n".join(
|
|
1013
|
+
[self.render_cte(child, False).statement for child in cte.internal_ctes]
|
|
1014
|
+
)
|
|
1015
|
+
return CompiledCTE(name=cte.name, statement=base_statement)
|
|
1016
|
+
if self.UNNEST_MODE in (
|
|
1017
|
+
UnnestMode.CROSS_APPLY,
|
|
1018
|
+
UnnestMode.CROSS_JOIN,
|
|
1019
|
+
UnnestMode.CROSS_JOIN_ALIAS,
|
|
1020
|
+
UnnestMode.SNOWFLAKE,
|
|
1021
|
+
):
|
|
1022
|
+
# for a cross apply, derivation happens in the join
|
|
1023
|
+
# so we only use the alias to select
|
|
1024
|
+
select_columns = [
|
|
1025
|
+
self.render_concept_sql(c, cte)
|
|
1026
|
+
for c in cte.output_columns
|
|
1027
|
+
if c.address not in [y.address for y in cte.join_derived_concepts]
|
|
1028
|
+
and c.address not in cte.hidden_concepts
|
|
1029
|
+
] + [
|
|
1030
|
+
f"{self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
|
|
1031
|
+
for c in cte.join_derived_concepts
|
|
1032
|
+
if c.address not in cte.hidden_concepts
|
|
1033
|
+
]
|
|
1034
|
+
elif self.UNNEST_MODE in (UnnestMode.CROSS_JOIN_UNNEST, UnnestMode.PRESTO):
|
|
1035
|
+
select_columns = [
|
|
1036
|
+
self.render_concept_sql(c, cte)
|
|
1037
|
+
for c in cte.output_columns
|
|
1038
|
+
if c.address not in [y.address for y in cte.join_derived_concepts]
|
|
1039
|
+
and c.address not in cte.hidden_concepts
|
|
1040
|
+
] + [
|
|
1041
|
+
f"{UNNEST_NAME} as {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
|
|
1042
|
+
for c in cte.join_derived_concepts
|
|
1043
|
+
if c.address not in cte.hidden_concepts
|
|
1044
|
+
]
|
|
1045
|
+
else:
|
|
1046
|
+
# otherwse, assume we are unnesting directly in the select
|
|
1047
|
+
select_columns = [
|
|
1048
|
+
self.render_concept_sql(c, cte)
|
|
1049
|
+
for c in cte.output_columns
|
|
1050
|
+
if c.address not in cte.hidden_concepts
|
|
1051
|
+
]
|
|
1052
|
+
if auto_sort:
|
|
1053
|
+
select_columns = sorted(select_columns, key=lambda x: x)
|
|
1054
|
+
source: str | None = cte.base_name
|
|
1055
|
+
if not cte.render_from_clause:
|
|
1056
|
+
if len(cte.joins) > 0:
|
|
1057
|
+
if cte.join_derived_concepts and self.UNNEST_MODE in (
|
|
1058
|
+
UnnestMode.CROSS_JOIN_ALIAS,
|
|
1059
|
+
# UnnestMode.CROSS_JOIN_UNNEST,
|
|
1060
|
+
UnnestMode.CROSS_JOIN,
|
|
1061
|
+
UnnestMode.CROSS_APPLY,
|
|
1062
|
+
):
|
|
1063
|
+
|
|
1064
|
+
source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_expr, cte)}"
|
|
1065
|
+
elif cte.join_derived_concepts and self.UNNEST_MODE in (
|
|
1066
|
+
UnnestMode.CROSS_JOIN_UNNEST,
|
|
1067
|
+
):
|
|
1068
|
+
source = f"{self.render_expr(cte.join_derived_concepts[0], cte)} as {self.QUOTE_CHARACTER}{UNNEST_NAME}{self.QUOTE_CHARACTER}"
|
|
1069
|
+
elif cte.join_derived_concepts and self.UNNEST_MODE in (
|
|
1070
|
+
UnnestMode.PRESTO,
|
|
1071
|
+
):
|
|
1072
|
+
source = f"{self.render_expr(cte.join_derived_concepts[0], cte)} as t({self.QUOTE_CHARACTER}{UNNEST_NAME}{self.QUOTE_CHARACTER})"
|
|
1073
|
+
elif (
|
|
1074
|
+
cte.join_derived_concepts
|
|
1075
|
+
and self.UNNEST_MODE == UnnestMode.SNOWFLAKE
|
|
1076
|
+
):
|
|
1077
|
+
source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_expr, cte)}"
|
|
1078
|
+
# direct - eg DUCK DB - can be directly selected inline
|
|
1079
|
+
elif (
|
|
1080
|
+
cte.join_derived_concepts and self.UNNEST_MODE == UnnestMode.DIRECT
|
|
1081
|
+
):
|
|
1082
|
+
source = None
|
|
1083
|
+
else:
|
|
1084
|
+
raise SyntaxError("CTE has joins but no from clause")
|
|
1085
|
+
else:
|
|
1086
|
+
source = None
|
|
1087
|
+
else:
|
|
1088
|
+
addr = cte.source_address
|
|
1089
|
+
if isinstance(addr, Address):
|
|
1090
|
+
source = self.render_source(addr)
|
|
1091
|
+
elif cte.quote_address:
|
|
1092
|
+
source = safe_quote(addr, self.QUOTE_CHARACTER)
|
|
1093
|
+
else:
|
|
1094
|
+
source = addr
|
|
1095
|
+
if cte.base_name != cte.base_alias:
|
|
1096
|
+
source = f"{source} as {self.QUOTE_CHARACTER}{cte.base_alias}{self.QUOTE_CHARACTER}"
|
|
1097
|
+
if not cte.render_from_clause:
|
|
1098
|
+
final_joins = []
|
|
1099
|
+
else:
|
|
1100
|
+
final_joins = cte.joins or []
|
|
1101
|
+
where: BuildConditional | BuildParenthetical | BuildComparison | None = None
|
|
1102
|
+
having: BuildConditional | BuildParenthetical | BuildComparison | None = None
|
|
1103
|
+
materialized = {x for x, v in cte.source_map.items() if v}
|
|
1104
|
+
if cte.condition:
|
|
1105
|
+
if not cte.group_to_grain or is_scalar_condition(
|
|
1106
|
+
cte.condition, materialized=materialized
|
|
1107
|
+
):
|
|
1108
|
+
where = cte.condition
|
|
1109
|
+
|
|
1110
|
+
else:
|
|
1111
|
+
components = decompose_condition(cte.condition)
|
|
1112
|
+
for x in components:
|
|
1113
|
+
if is_scalar_condition(x, materialized=materialized):
|
|
1114
|
+
where = where + x if where else x
|
|
1115
|
+
else:
|
|
1116
|
+
having = having + x if having else x
|
|
1117
|
+
|
|
1118
|
+
logger.info(f"{LOGGER_PREFIX} {len(final_joins)} joins for cte {cte.name}")
|
|
1119
|
+
return CompiledCTE(
|
|
1120
|
+
name=cte.name,
|
|
1121
|
+
statement=self.SQL_TEMPLATE.render(
|
|
1122
|
+
select_columns=select_columns,
|
|
1123
|
+
base=f"{source}" if source else None,
|
|
1124
|
+
grain=cte.grain,
|
|
1125
|
+
limit=cte.limit,
|
|
1126
|
+
comment=cte.comment if CONFIG.show_comments else None,
|
|
1127
|
+
# some joins may not need to be rendered
|
|
1128
|
+
joins=[
|
|
1129
|
+
j
|
|
1130
|
+
for j in [
|
|
1131
|
+
render_join(
|
|
1132
|
+
join,
|
|
1133
|
+
self.QUOTE_CHARACTER,
|
|
1134
|
+
self.render_expr,
|
|
1135
|
+
cte,
|
|
1136
|
+
use_map=self.used_map,
|
|
1137
|
+
unnest_mode=self.UNNEST_MODE,
|
|
1138
|
+
null_wrapper=self.NULL_WRAPPER,
|
|
1139
|
+
)
|
|
1140
|
+
for join in final_joins
|
|
1141
|
+
]
|
|
1142
|
+
if j
|
|
1143
|
+
],
|
|
1144
|
+
where=(self.render_expr(where, cte) if where else None),
|
|
1145
|
+
having=(self.render_expr(having, cte) if having else None),
|
|
1146
|
+
order_by=(
|
|
1147
|
+
[self.render_order_item(i, cte) for i in cte.order_by.items]
|
|
1148
|
+
if cte.order_by
|
|
1149
|
+
else None
|
|
1150
|
+
),
|
|
1151
|
+
group_by=self.render_cte_group_by(cte, select_columns),
|
|
1152
|
+
),
|
|
1153
|
+
)
|
|
1154
|
+
|
|
1155
|
+
def generate_ctes(
|
|
1156
|
+
self,
|
|
1157
|
+
query: ProcessedQuery,
|
|
1158
|
+
) -> List[CompiledCTE]:
|
|
1159
|
+
return [self.render_cte(cte) for cte in query.ctes[:-1]] + [
|
|
1160
|
+
# last CTE needs to respect the user output order
|
|
1161
|
+
self.render_cte(sort_select_output(query.ctes[-1], query), auto_sort=False)
|
|
1162
|
+
]
|
|
1163
|
+
|
|
1164
|
+
def create_show_output(
|
|
1165
|
+
self,
|
|
1166
|
+
environment: Environment,
|
|
1167
|
+
content: ShowCategory,
|
|
1168
|
+
):
|
|
1169
|
+
if content == ShowCategory.CONCEPTS:
|
|
1170
|
+
output_columns = [
|
|
1171
|
+
environment.concepts[
|
|
1172
|
+
DEFAULT_CONCEPTS["concept_address"].address
|
|
1173
|
+
].reference,
|
|
1174
|
+
environment.concepts[
|
|
1175
|
+
DEFAULT_CONCEPTS["concept_datatype"].address
|
|
1176
|
+
].reference,
|
|
1177
|
+
environment.concepts[
|
|
1178
|
+
DEFAULT_CONCEPTS["concept_description"].address
|
|
1179
|
+
].reference,
|
|
1180
|
+
]
|
|
1181
|
+
output_values = [
|
|
1182
|
+
{
|
|
1183
|
+
DEFAULT_CONCEPTS["concept_address"].address: (
|
|
1184
|
+
concept.name
|
|
1185
|
+
if concept.namespace == DEFAULT_NAMESPACE
|
|
1186
|
+
else concept.address
|
|
1187
|
+
),
|
|
1188
|
+
DEFAULT_CONCEPTS["concept_datatype"].address: str(concept.datatype),
|
|
1189
|
+
DEFAULT_CONCEPTS[
|
|
1190
|
+
"concept_description"
|
|
1191
|
+
].address: concept.metadata.description
|
|
1192
|
+
or "",
|
|
1193
|
+
}
|
|
1194
|
+
for _, concept in environment.concepts.items()
|
|
1195
|
+
if not concept.is_internal
|
|
1196
|
+
]
|
|
1197
|
+
else:
|
|
1198
|
+
raise NotImplementedError(f"Show category {content} not implemented")
|
|
1199
|
+
return ProcessedShowStatement(
|
|
1200
|
+
output_columns=output_columns,
|
|
1201
|
+
output_values=[ProcessedStaticValueOutput(values=output_values)],
|
|
1202
|
+
)
|
|
1203
|
+
|
|
1204
|
+
def generate_queries(
|
|
1205
|
+
self,
|
|
1206
|
+
environment: Environment,
|
|
1207
|
+
statements: Sequence[
|
|
1208
|
+
SelectStatement
|
|
1209
|
+
| MultiSelectStatement
|
|
1210
|
+
| PersistStatement
|
|
1211
|
+
| ShowStatement
|
|
1212
|
+
| ConceptDeclarationStatement
|
|
1213
|
+
| RowsetDerivationStatement
|
|
1214
|
+
| ImportStatement
|
|
1215
|
+
| RawSQLStatement
|
|
1216
|
+
| MergeStatementV2
|
|
1217
|
+
| CopyStatement
|
|
1218
|
+
| ValidateStatement
|
|
1219
|
+
| CreateStatement
|
|
1220
|
+
| PublishStatement
|
|
1221
|
+
| MockStatement
|
|
1222
|
+
],
|
|
1223
|
+
hooks: Optional[List[BaseHook]] = None,
|
|
1224
|
+
) -> List[PROCESSED_STATEMENT_TYPES]:
|
|
1225
|
+
output: List[PROCESSED_STATEMENT_TYPES] = []
|
|
1226
|
+
for statement in statements:
|
|
1227
|
+
if isinstance(statement, PersistStatement):
|
|
1228
|
+
if hooks:
|
|
1229
|
+
for hook in hooks:
|
|
1230
|
+
hook.process_persist_info(statement)
|
|
1231
|
+
persist = process_persist(environment, statement, hooks=hooks)
|
|
1232
|
+
output.append(persist)
|
|
1233
|
+
elif isinstance(statement, CopyStatement):
|
|
1234
|
+
if hooks:
|
|
1235
|
+
for hook in hooks:
|
|
1236
|
+
hook.process_select_info(statement.select)
|
|
1237
|
+
copy = process_copy(environment, statement, hooks=hooks)
|
|
1238
|
+
output.append(copy)
|
|
1239
|
+
elif isinstance(statement, SelectStatement):
|
|
1240
|
+
if hooks:
|
|
1241
|
+
for hook in hooks:
|
|
1242
|
+
hook.process_select_info(statement)
|
|
1243
|
+
output.append(process_query(environment, statement, hooks=hooks))
|
|
1244
|
+
elif isinstance(statement, MultiSelectStatement):
|
|
1245
|
+
if hooks:
|
|
1246
|
+
for hook in hooks:
|
|
1247
|
+
hook.process_multiselect_info(statement)
|
|
1248
|
+
output.append(process_query(environment, statement, hooks=hooks))
|
|
1249
|
+
elif isinstance(statement, RowsetDerivationStatement):
|
|
1250
|
+
if hooks:
|
|
1251
|
+
for hook in hooks:
|
|
1252
|
+
hook.process_rowset_info(statement)
|
|
1253
|
+
elif isinstance(statement, ShowStatement):
|
|
1254
|
+
# TODO - encapsulate this a little better
|
|
1255
|
+
if isinstance(statement.content, SelectStatement):
|
|
1256
|
+
|
|
1257
|
+
output.append(
|
|
1258
|
+
ProcessedShowStatement(
|
|
1259
|
+
output_columns=[
|
|
1260
|
+
environment.concepts[
|
|
1261
|
+
DEFAULT_CONCEPTS["query_text"].address
|
|
1262
|
+
].reference
|
|
1263
|
+
],
|
|
1264
|
+
output_values=[
|
|
1265
|
+
process_query(
|
|
1266
|
+
environment, statement.content, hooks=hooks
|
|
1267
|
+
)
|
|
1268
|
+
],
|
|
1269
|
+
)
|
|
1270
|
+
)
|
|
1271
|
+
elif isinstance(statement.content, ShowCategory):
|
|
1272
|
+
output.append(
|
|
1273
|
+
self.create_show_output(environment, statement.content)
|
|
1274
|
+
)
|
|
1275
|
+
elif isinstance(statement.content, ValidateStatement):
|
|
1276
|
+
output.append(
|
|
1277
|
+
ProcessedShowStatement(
|
|
1278
|
+
output_columns=[
|
|
1279
|
+
environment.concepts[
|
|
1280
|
+
DEFAULT_CONCEPTS["label"].address
|
|
1281
|
+
].reference,
|
|
1282
|
+
environment.concepts[
|
|
1283
|
+
DEFAULT_CONCEPTS["query_text"].address
|
|
1284
|
+
].reference,
|
|
1285
|
+
environment.concepts[
|
|
1286
|
+
DEFAULT_CONCEPTS["expected"].address
|
|
1287
|
+
].reference,
|
|
1288
|
+
],
|
|
1289
|
+
output_values=[
|
|
1290
|
+
ProcessedValidateStatement(
|
|
1291
|
+
scope=statement.content.scope,
|
|
1292
|
+
targets=statement.content.targets,
|
|
1293
|
+
)
|
|
1294
|
+
],
|
|
1295
|
+
)
|
|
1296
|
+
)
|
|
1297
|
+
else:
|
|
1298
|
+
raise NotImplementedError(type(statement.content))
|
|
1299
|
+
elif isinstance(statement, RawSQLStatement):
|
|
1300
|
+
output.append(ProcessedRawSQLStatement(text=statement.text))
|
|
1301
|
+
elif isinstance(statement, ValidateStatement):
|
|
1302
|
+
output.append(
|
|
1303
|
+
ProcessedValidateStatement(
|
|
1304
|
+
scope=statement.scope,
|
|
1305
|
+
targets=statement.targets,
|
|
1306
|
+
)
|
|
1307
|
+
)
|
|
1308
|
+
elif isinstance(statement, MockStatement):
|
|
1309
|
+
output.append(
|
|
1310
|
+
ProcessedMockStatement(
|
|
1311
|
+
scope=statement.scope,
|
|
1312
|
+
targets=statement.targets,
|
|
1313
|
+
)
|
|
1314
|
+
)
|
|
1315
|
+
elif isinstance(statement, CreateStatement):
|
|
1316
|
+
output.append(process_create_statement(statement, environment))
|
|
1317
|
+
elif isinstance(statement, PublishStatement):
|
|
1318
|
+
output.append(
|
|
1319
|
+
ProcessedPublishStatement(
|
|
1320
|
+
scope=statement.scope,
|
|
1321
|
+
targets=statement.targets,
|
|
1322
|
+
action=statement.action,
|
|
1323
|
+
)
|
|
1324
|
+
)
|
|
1325
|
+
elif isinstance(
|
|
1326
|
+
statement,
|
|
1327
|
+
(
|
|
1328
|
+
ConceptDeclarationStatement,
|
|
1329
|
+
MergeStatementV2,
|
|
1330
|
+
ImportStatement,
|
|
1331
|
+
RowsetDerivationStatement,
|
|
1332
|
+
Datasource,
|
|
1333
|
+
FunctionDeclaration,
|
|
1334
|
+
),
|
|
1335
|
+
):
|
|
1336
|
+
continue
|
|
1337
|
+
else:
|
|
1338
|
+
raise NotImplementedError(type(statement))
|
|
1339
|
+
return output
|
|
1340
|
+
|
|
1341
|
+
def generate_partitioned_insert(
|
|
1342
|
+
self,
|
|
1343
|
+
query: ProcessedQueryPersist,
|
|
1344
|
+
recursive: bool,
|
|
1345
|
+
compiled_ctes: list[CompiledCTE],
|
|
1346
|
+
) -> str:
|
|
1347
|
+
return self.SQL_TEMPLATE.render(
|
|
1348
|
+
recursive=recursive,
|
|
1349
|
+
output=f"INSERT OVERWRITE {self.safe_quote(query.output_to.address.location)}",
|
|
1350
|
+
full_select=compiled_ctes[-1].statement,
|
|
1351
|
+
ctes=compiled_ctes[:-1],
|
|
1352
|
+
)
|
|
1353
|
+
|
|
1354
|
+
def compile_create_table_statement(
|
|
1355
|
+
self, target: CreateTableInfo, create_mode: CreateMode
|
|
1356
|
+
) -> str:
|
|
1357
|
+
type_map = {}
|
|
1358
|
+
for c in target.columns:
|
|
1359
|
+
type_map[c.name] = self.render_expr(c.type)
|
|
1360
|
+
return self.CREATE_TABLE_SQL_TEMPLATE.render(
|
|
1361
|
+
create_mode=create_mode.value,
|
|
1362
|
+
name=self.safe_quote(target.name),
|
|
1363
|
+
columns=target.columns,
|
|
1364
|
+
type_map=type_map,
|
|
1365
|
+
partition_keys=target.partition_keys,
|
|
1366
|
+
)
|
|
1367
|
+
|
|
1368
|
+
def compile_statement(
|
|
1369
|
+
self,
|
|
1370
|
+
query: PROCESSED_STATEMENT_TYPES,
|
|
1371
|
+
) -> str:
|
|
1372
|
+
if isinstance(query, ProcessedShowStatement):
|
|
1373
|
+
return ";\n".join(
|
|
1374
|
+
[
|
|
1375
|
+
f"{self.EXPLAIN_KEYWORD} {self.compile_statement(x)}"
|
|
1376
|
+
for x in query.output_values
|
|
1377
|
+
if isinstance(x, (ProcessedQuery, ProcessedCopyStatement))
|
|
1378
|
+
]
|
|
1379
|
+
)
|
|
1380
|
+
elif isinstance(query, ProcessedRawSQLStatement):
|
|
1381
|
+
return query.text
|
|
1382
|
+
|
|
1383
|
+
elif isinstance(query, ProcessedValidateStatement):
|
|
1384
|
+
return "--Trilogy validate statements do not have a generic SQL representation;\nselect 1;"
|
|
1385
|
+
elif isinstance(query, ProcessedMockStatement):
|
|
1386
|
+
return "--Trilogy mock statements do not have a generic SQL representation;\nselect 1;"
|
|
1387
|
+
elif isinstance(query, ProcessedPublishStatement):
|
|
1388
|
+
return "--Trilogy publish statements do not have a generic SQL representation;\nselect 1;"
|
|
1389
|
+
elif isinstance(query, ProcessedCreateStatement):
|
|
1390
|
+
|
|
1391
|
+
text = []
|
|
1392
|
+
for target in query.targets:
|
|
1393
|
+
text.append(
|
|
1394
|
+
self.compile_create_table_statement(target, query.create_mode)
|
|
1395
|
+
)
|
|
1396
|
+
return "\n".join(text)
|
|
1397
|
+
|
|
1398
|
+
recursive = any(isinstance(x, RecursiveCTE) for x in query.ctes)
|
|
1399
|
+
|
|
1400
|
+
compiled_ctes = self.generate_ctes(query)
|
|
1401
|
+
output = None
|
|
1402
|
+
if isinstance(query, ProcessedQueryPersist):
|
|
1403
|
+
if query.persist_mode == PersistMode.OVERWRITE:
|
|
1404
|
+
create_table_info = datasource_to_create_table_info(query.datasource)
|
|
1405
|
+
output = f"{self.compile_create_table_statement(create_table_info, CreateMode.CREATE_OR_REPLACE)} INSERT INTO {self.safe_quote(query.output_to.address.location)} "
|
|
1406
|
+
elif query.persist_mode == PersistMode.APPEND:
|
|
1407
|
+
if query.partition_by:
|
|
1408
|
+
return self.generate_partitioned_insert(
|
|
1409
|
+
query, recursive, compiled_ctes
|
|
1410
|
+
)
|
|
1411
|
+
else:
|
|
1412
|
+
output = f"INSERT INTO {self.safe_quote(query.output_to.address.location)} "
|
|
1413
|
+
else:
|
|
1414
|
+
raise NotImplementedError(
|
|
1415
|
+
f"Persist mode {query.persist_mode} not implemented"
|
|
1416
|
+
)
|
|
1417
|
+
|
|
1418
|
+
final = self.SQL_TEMPLATE.render(
|
|
1419
|
+
recursive=recursive,
|
|
1420
|
+
output=output,
|
|
1421
|
+
full_select=compiled_ctes[-1].statement,
|
|
1422
|
+
ctes=compiled_ctes[:-1],
|
|
1423
|
+
)
|
|
1424
|
+
|
|
1425
|
+
if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
|
|
1426
|
+
raise ValueError(
|
|
1427
|
+
f"Invalid reference string found in query: {final}, this should never"
|
|
1428
|
+
" occur. Please create an issue to report this."
|
|
1429
|
+
)
|
|
1430
|
+
logger.info(f"{LOGGER_PREFIX} Compiled query: {final}")
|
|
1431
|
+
return final
|