pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
- pytrilogy-0.3.148.dist-info/METADATA +555 -0
- pytrilogy-0.3.148.dist-info/RECORD +206 -0
- pytrilogy-0.3.148.dist-info/WHEEL +5 -0
- pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +27 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +119 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +454 -0
- trilogy/core/env_processor.py +239 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1240 -0
- trilogy/core/graph_models.py +142 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2662 -0
- trilogy/core/models/build.py +2603 -0
- trilogy/core/models/build_environment.py +165 -0
- trilogy/core/models/core.py +506 -0
- trilogy/core/models/datasource.py +434 -0
- trilogy/core/models/environment.py +756 -0
- trilogy/core/models/execute.py +1213 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +270 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +207 -0
- trilogy/core/processing/node_generators/node_merge_node.py +695 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +786 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +522 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +604 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1431 -0
- trilogy/dialect/bigquery.py +314 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +159 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +376 -0
- trilogy/dialect/enums.py +149 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/__init__.py +17 -0
- trilogy/execution/config.py +119 -0
- trilogy/execution/state/__init__.py +0 -0
- trilogy/execution/state/file_state_store.py +0 -0
- trilogy/execution/state/sqllite_state_store.py +0 -0
- trilogy/execution/state/state_store.py +301 -0
- trilogy/executor.py +656 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +135 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2863 -0
- trilogy/parsing/render.py +773 -0
- trilogy/parsing/trilogy.lark +544 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +45 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +306 -0
- trilogy/scripts/common.py +430 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +311 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +555 -0
- trilogy/scripts/environment.py +59 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +472 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +748 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/refresh.py +106 -0
- trilogy/scripts/run.py +79 -0
- trilogy/scripts/serve.py +202 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +129 -0
- trilogy/scripts/trilogy.py +75 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,2863 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import date, datetime
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from logging import getLogger
|
|
5
|
+
from os.path import dirname, join
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from re import IGNORECASE
|
|
8
|
+
from typing import Any, List, Optional, Tuple, Union
|
|
9
|
+
|
|
10
|
+
from lark import Lark, ParseTree, Token, Transformer, Tree, v_args
|
|
11
|
+
from lark.exceptions import (
|
|
12
|
+
UnexpectedCharacters,
|
|
13
|
+
UnexpectedEOF,
|
|
14
|
+
UnexpectedInput,
|
|
15
|
+
UnexpectedToken,
|
|
16
|
+
VisitError,
|
|
17
|
+
)
|
|
18
|
+
from lark.tree import Meta
|
|
19
|
+
from pydantic import ValidationError
|
|
20
|
+
|
|
21
|
+
from trilogy.constants import (
|
|
22
|
+
CONFIG,
|
|
23
|
+
DEFAULT_NAMESPACE,
|
|
24
|
+
NULL_VALUE,
|
|
25
|
+
MagicConstants,
|
|
26
|
+
Parsing,
|
|
27
|
+
)
|
|
28
|
+
from trilogy.core.enums import (
|
|
29
|
+
AddressType,
|
|
30
|
+
BooleanOperator,
|
|
31
|
+
ComparisonOperator,
|
|
32
|
+
ConceptSource,
|
|
33
|
+
CreateMode,
|
|
34
|
+
DatasourceState,
|
|
35
|
+
DatePart,
|
|
36
|
+
Derivation,
|
|
37
|
+
FunctionType,
|
|
38
|
+
Granularity,
|
|
39
|
+
IOType,
|
|
40
|
+
Modifier,
|
|
41
|
+
Ordering,
|
|
42
|
+
PersistMode,
|
|
43
|
+
PublishAction,
|
|
44
|
+
Purpose,
|
|
45
|
+
ShowCategory,
|
|
46
|
+
ValidationScope,
|
|
47
|
+
WindowOrder,
|
|
48
|
+
WindowType,
|
|
49
|
+
)
|
|
50
|
+
from trilogy.core.exceptions import (
|
|
51
|
+
InvalidSyntaxException,
|
|
52
|
+
MissingParameterException,
|
|
53
|
+
UndefinedConceptException,
|
|
54
|
+
)
|
|
55
|
+
from trilogy.core.functions import (
|
|
56
|
+
CurrentDate,
|
|
57
|
+
FunctionFactory,
|
|
58
|
+
)
|
|
59
|
+
from trilogy.core.internal import ALL_ROWS_CONCEPT, INTERNAL_NAMESPACE
|
|
60
|
+
from trilogy.core.models.author import (
|
|
61
|
+
AggregateWrapper,
|
|
62
|
+
AlignClause,
|
|
63
|
+
AlignItem,
|
|
64
|
+
ArgBinding,
|
|
65
|
+
CaseElse,
|
|
66
|
+
CaseWhen,
|
|
67
|
+
Comment,
|
|
68
|
+
Comparison,
|
|
69
|
+
Concept,
|
|
70
|
+
ConceptRef,
|
|
71
|
+
Conditional,
|
|
72
|
+
CustomFunctionFactory,
|
|
73
|
+
CustomType,
|
|
74
|
+
DeriveClause,
|
|
75
|
+
DeriveItem,
|
|
76
|
+
Expr,
|
|
77
|
+
FilterItem,
|
|
78
|
+
Function,
|
|
79
|
+
FunctionCallWrapper,
|
|
80
|
+
Grain,
|
|
81
|
+
HavingClause,
|
|
82
|
+
Metadata,
|
|
83
|
+
MultiSelectLineage,
|
|
84
|
+
OrderBy,
|
|
85
|
+
OrderItem,
|
|
86
|
+
Parenthetical,
|
|
87
|
+
RowsetItem,
|
|
88
|
+
SubselectComparison,
|
|
89
|
+
UndefinedConceptFull,
|
|
90
|
+
WhereClause,
|
|
91
|
+
Window,
|
|
92
|
+
WindowItem,
|
|
93
|
+
WindowItemOrder,
|
|
94
|
+
WindowItemOver,
|
|
95
|
+
)
|
|
96
|
+
from trilogy.core.models.core import (
|
|
97
|
+
ArrayType,
|
|
98
|
+
DataType,
|
|
99
|
+
DataTyped,
|
|
100
|
+
ListWrapper,
|
|
101
|
+
MapType,
|
|
102
|
+
MapWrapper,
|
|
103
|
+
NumericType,
|
|
104
|
+
StructComponent,
|
|
105
|
+
StructType,
|
|
106
|
+
TraitDataType,
|
|
107
|
+
TupleWrapper,
|
|
108
|
+
arg_to_datatype,
|
|
109
|
+
dict_to_map_wrapper,
|
|
110
|
+
is_compatible_datatype,
|
|
111
|
+
list_to_wrapper,
|
|
112
|
+
tuple_to_wrapper,
|
|
113
|
+
)
|
|
114
|
+
from trilogy.core.models.datasource import (
|
|
115
|
+
Address,
|
|
116
|
+
ColumnAssignment,
|
|
117
|
+
Datasource,
|
|
118
|
+
File,
|
|
119
|
+
Query,
|
|
120
|
+
RawColumnExpr,
|
|
121
|
+
)
|
|
122
|
+
from trilogy.core.models.environment import (
|
|
123
|
+
DictImportResolver,
|
|
124
|
+
Environment,
|
|
125
|
+
FileSystemImportResolver,
|
|
126
|
+
Import,
|
|
127
|
+
)
|
|
128
|
+
from trilogy.core.statements.author import (
|
|
129
|
+
ConceptDeclarationStatement,
|
|
130
|
+
ConceptDerivationStatement,
|
|
131
|
+
ConceptTransform,
|
|
132
|
+
CopyStatement,
|
|
133
|
+
CreateStatement,
|
|
134
|
+
FunctionDeclaration,
|
|
135
|
+
ImportStatement,
|
|
136
|
+
Limit,
|
|
137
|
+
MergeStatementV2,
|
|
138
|
+
MockStatement,
|
|
139
|
+
MultiSelectStatement,
|
|
140
|
+
PersistStatement,
|
|
141
|
+
PublishStatement,
|
|
142
|
+
RawSQLStatement,
|
|
143
|
+
RowsetDerivationStatement,
|
|
144
|
+
SelectItem,
|
|
145
|
+
SelectStatement,
|
|
146
|
+
ShowStatement,
|
|
147
|
+
TypeDeclaration,
|
|
148
|
+
ValidateStatement,
|
|
149
|
+
)
|
|
150
|
+
from trilogy.parsing.common import (
|
|
151
|
+
align_item_to_concept,
|
|
152
|
+
arbitrary_to_concept,
|
|
153
|
+
constant_to_concept,
|
|
154
|
+
derive_item_to_concept,
|
|
155
|
+
process_function_args,
|
|
156
|
+
rowset_to_concepts,
|
|
157
|
+
)
|
|
158
|
+
from trilogy.parsing.exceptions import NameShadowError, ParseError
|
|
159
|
+
|
|
160
|
+
perf_logger = getLogger("trilogy.parse.performance")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class ParsePass(Enum):
|
|
164
|
+
INITIAL = 1
|
|
165
|
+
VALIDATION = 2
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
CONSTANT_TYPES = (int, float, str, bool, ListWrapper, TupleWrapper, MapWrapper)
|
|
169
|
+
|
|
170
|
+
SELF_LABEL = "root"
|
|
171
|
+
|
|
172
|
+
MAX_PARSE_DEPTH = 10
|
|
173
|
+
|
|
174
|
+
SUPPORTED_INCREMENTAL_TYPES: set[DataType] = set([DataType.DATE, DataType.TIMESTAMP])
|
|
175
|
+
|
|
176
|
+
STDLIB_ROOT = Path(__file__).parent.parent
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@dataclass
|
|
180
|
+
class WholeGrainWrapper:
|
|
181
|
+
where: WhereClause
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@dataclass
|
|
185
|
+
class FunctionBindingType:
|
|
186
|
+
type: DataType | TraitDataType | None = None
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@dataclass
|
|
190
|
+
class DropOn:
|
|
191
|
+
functions: List[FunctionType]
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@dataclass
|
|
195
|
+
class AddOn:
|
|
196
|
+
functions: List[FunctionType]
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
@dataclass
|
|
200
|
+
class DatasourcePartitionClause:
|
|
201
|
+
columns: List[ConceptRef]
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@dataclass
|
|
205
|
+
class DatasourceIncrementalClause:
|
|
206
|
+
columns: List[ConceptRef]
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
with open(join(dirname(__file__), "trilogy.lark"), "r") as f:
|
|
210
|
+
PARSER = Lark(
|
|
211
|
+
f.read(),
|
|
212
|
+
start="start",
|
|
213
|
+
propagate_positions=True,
|
|
214
|
+
g_regex_flags=IGNORECASE,
|
|
215
|
+
parser="lalr",
|
|
216
|
+
cache=True,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def parse_concept_reference(
|
|
221
|
+
name: str, environment: Environment, purpose: Optional[Purpose] = None
|
|
222
|
+
) -> Tuple[str, str, str, str | None]:
|
|
223
|
+
parent = None
|
|
224
|
+
if "." in name:
|
|
225
|
+
if purpose == Purpose.PROPERTY:
|
|
226
|
+
parent, name = name.rsplit(".", 1)
|
|
227
|
+
namespace = environment.concepts[parent].namespace or DEFAULT_NAMESPACE
|
|
228
|
+
lookup = f"{namespace}.{name}"
|
|
229
|
+
else:
|
|
230
|
+
namespace, name = name.rsplit(".", 1)
|
|
231
|
+
lookup = f"{namespace}.{name}"
|
|
232
|
+
else:
|
|
233
|
+
namespace = environment.namespace or DEFAULT_NAMESPACE
|
|
234
|
+
lookup = name
|
|
235
|
+
return lookup, namespace, name, parent
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def expr_to_boolean(
|
|
239
|
+
root,
|
|
240
|
+
function_factory: FunctionFactory,
|
|
241
|
+
) -> Union[Comparison, SubselectComparison, Conditional]:
|
|
242
|
+
if not isinstance(root, (Comparison, SubselectComparison, Conditional)):
|
|
243
|
+
if arg_to_datatype(root) == DataType.BOOL:
|
|
244
|
+
root = Comparison(left=root, right=True, operator=ComparisonOperator.EQ)
|
|
245
|
+
elif arg_to_datatype(root) == DataType.INTEGER:
|
|
246
|
+
root = Comparison(
|
|
247
|
+
left=function_factory.create_function(
|
|
248
|
+
[root],
|
|
249
|
+
FunctionType.BOOL,
|
|
250
|
+
),
|
|
251
|
+
right=True,
|
|
252
|
+
operator=ComparisonOperator.EQ,
|
|
253
|
+
)
|
|
254
|
+
else:
|
|
255
|
+
root = Comparison(
|
|
256
|
+
left=root, right=NULL_VALUE, operator=ComparisonOperator.IS_NOT
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
return root
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def unwrap_transformation(
|
|
263
|
+
input: Expr,
|
|
264
|
+
environment: Environment,
|
|
265
|
+
) -> (
|
|
266
|
+
Function
|
|
267
|
+
| FilterItem
|
|
268
|
+
| WindowItem
|
|
269
|
+
| AggregateWrapper
|
|
270
|
+
| FunctionCallWrapper
|
|
271
|
+
| Parenthetical
|
|
272
|
+
):
|
|
273
|
+
if isinstance(input, Function):
|
|
274
|
+
return input
|
|
275
|
+
elif isinstance(input, AggregateWrapper):
|
|
276
|
+
return input
|
|
277
|
+
elif isinstance(input, ConceptRef):
|
|
278
|
+
concept = environment.concepts[input.address]
|
|
279
|
+
return Function(
|
|
280
|
+
operator=FunctionType.ALIAS,
|
|
281
|
+
output_datatype=concept.datatype,
|
|
282
|
+
output_purpose=concept.purpose,
|
|
283
|
+
arguments=[input],
|
|
284
|
+
)
|
|
285
|
+
elif isinstance(input, FilterItem):
|
|
286
|
+
return input
|
|
287
|
+
elif isinstance(input, WindowItem):
|
|
288
|
+
return input
|
|
289
|
+
elif isinstance(input, FunctionCallWrapper):
|
|
290
|
+
return input
|
|
291
|
+
elif isinstance(input, Parenthetical):
|
|
292
|
+
return input
|
|
293
|
+
else:
|
|
294
|
+
return Function.model_construct(
|
|
295
|
+
operator=FunctionType.CONSTANT,
|
|
296
|
+
output_datatype=arg_to_datatype(input),
|
|
297
|
+
output_purpose=Purpose.CONSTANT,
|
|
298
|
+
arguments=[input],
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def rehydrate_lineage(
|
|
303
|
+
lineage: Any, environment: Environment, function_factory: FunctionFactory
|
|
304
|
+
) -> Any:
|
|
305
|
+
"""Fix datatype propagation. This is a hack to fix the fact that we don't know the datatypes of functions until we've parsed all concepts"""
|
|
306
|
+
if isinstance(lineage, Function):
|
|
307
|
+
rehydrated = [
|
|
308
|
+
rehydrate_lineage(x, environment, function_factory)
|
|
309
|
+
for x in lineage.arguments
|
|
310
|
+
]
|
|
311
|
+
return function_factory.create_function(
|
|
312
|
+
rehydrated,
|
|
313
|
+
operator=lineage.operator,
|
|
314
|
+
)
|
|
315
|
+
elif isinstance(lineage, Parenthetical):
|
|
316
|
+
lineage.content = rehydrate_lineage(
|
|
317
|
+
lineage.content, environment, function_factory
|
|
318
|
+
)
|
|
319
|
+
return lineage
|
|
320
|
+
elif isinstance(lineage, WindowItem):
|
|
321
|
+
# this is temporarily guaranteed until we do some upstream work
|
|
322
|
+
assert isinstance(lineage.content, ConceptRef)
|
|
323
|
+
lineage.content.datatype = environment.concepts[
|
|
324
|
+
lineage.content.address
|
|
325
|
+
].datatype
|
|
326
|
+
return lineage
|
|
327
|
+
elif isinstance(lineage, AggregateWrapper):
|
|
328
|
+
lineage.function = rehydrate_lineage(
|
|
329
|
+
lineage.function, environment, function_factory
|
|
330
|
+
)
|
|
331
|
+
return lineage
|
|
332
|
+
elif isinstance(lineage, RowsetItem):
|
|
333
|
+
lineage.content.datatype = environment.concepts[
|
|
334
|
+
lineage.content.address
|
|
335
|
+
].datatype
|
|
336
|
+
return lineage
|
|
337
|
+
else:
|
|
338
|
+
return lineage
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def rehydrate_concept_lineage(
|
|
342
|
+
concept: Concept, environment: Environment, function_factory: FunctionFactory
|
|
343
|
+
) -> Concept:
|
|
344
|
+
concept.lineage = rehydrate_lineage(concept.lineage, environment, function_factory)
|
|
345
|
+
if isinstance(concept.lineage, DataTyped):
|
|
346
|
+
concept.datatype = concept.lineage.output_datatype
|
|
347
|
+
return concept
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
class ParseToObjects(Transformer):
|
|
351
|
+
def __init__(
|
|
352
|
+
self,
|
|
353
|
+
environment: Environment,
|
|
354
|
+
parse_address: str | None = None,
|
|
355
|
+
token_address: Path | str | None = None,
|
|
356
|
+
parsed: dict[str, "ParseToObjects"] | None = None,
|
|
357
|
+
tokens: dict[Path | str, ParseTree] | None = None,
|
|
358
|
+
text_lookup: dict[Path | str, str] | None = None,
|
|
359
|
+
environment_lookup: dict[str, Environment] | None = None,
|
|
360
|
+
import_keys: list[str] | None = None,
|
|
361
|
+
parse_config: Parsing | None = None,
|
|
362
|
+
):
|
|
363
|
+
Transformer.__init__(self, True)
|
|
364
|
+
self.environment: Environment = environment
|
|
365
|
+
self.parse_address: str = parse_address or SELF_LABEL
|
|
366
|
+
self.token_address: Path | str = token_address or SELF_LABEL
|
|
367
|
+
self.parsed: dict[str, ParseToObjects] = parsed if parsed is not None else {}
|
|
368
|
+
self.tokens: dict[Path | str, ParseTree] = tokens if tokens is not None else {}
|
|
369
|
+
self.environments: dict[str, Environment] = environment_lookup or {}
|
|
370
|
+
self.text_lookup: dict[Path | str, str] = (
|
|
371
|
+
text_lookup if text_lookup is not None else {}
|
|
372
|
+
)
|
|
373
|
+
# we do a second pass to pick up circular dependencies
|
|
374
|
+
# after initial parsing
|
|
375
|
+
self.parse_pass = ParsePass.INITIAL
|
|
376
|
+
self.function_factory = FunctionFactory(self.environment)
|
|
377
|
+
self.import_keys: list[str] = import_keys or ["root"]
|
|
378
|
+
self.parse_config: Parsing = parse_config or CONFIG.parsing
|
|
379
|
+
|
|
380
|
+
def set_text(self, text: str):
|
|
381
|
+
self.text_lookup[self.token_address] = text
|
|
382
|
+
|
|
383
|
+
def transform(self, tree: Tree):
|
|
384
|
+
results = super().transform(tree)
|
|
385
|
+
self.tokens[self.token_address] = tree
|
|
386
|
+
return results
|
|
387
|
+
|
|
388
|
+
def prepare_parse(self):
|
|
389
|
+
self.parse_pass = ParsePass.INITIAL
|
|
390
|
+
self.environment.concepts.fail_on_missing = False
|
|
391
|
+
for _, v in self.parsed.items():
|
|
392
|
+
v.prepare_parse()
|
|
393
|
+
|
|
394
|
+
def run_second_parse_pass(self, force: bool = False):
|
|
395
|
+
if self.token_address not in self.tokens:
|
|
396
|
+
return []
|
|
397
|
+
self.parse_pass = ParsePass.VALIDATION
|
|
398
|
+
for _, v in list(self.parsed.items()):
|
|
399
|
+
if v.parse_pass == ParsePass.VALIDATION:
|
|
400
|
+
continue
|
|
401
|
+
v.run_second_parse_pass()
|
|
402
|
+
reparsed = self.transform(self.tokens[self.token_address])
|
|
403
|
+
self.environment.concepts.undefined = {}
|
|
404
|
+
passed = False
|
|
405
|
+
passes = 0
|
|
406
|
+
# output datatypes for functions may have been wrong
|
|
407
|
+
# as they were derived from not fully understood upstream types
|
|
408
|
+
# so loop through to recreate function lineage until all datatypes are known
|
|
409
|
+
|
|
410
|
+
while not passed:
|
|
411
|
+
new_passed = True
|
|
412
|
+
for x, y in self.environment.concepts.items():
|
|
413
|
+
if y.datatype == DataType.UNKNOWN and y.lineage:
|
|
414
|
+
self.environment.concepts[x] = rehydrate_concept_lineage(
|
|
415
|
+
y, self.environment, self.function_factory
|
|
416
|
+
)
|
|
417
|
+
new_passed = False
|
|
418
|
+
passes += 1
|
|
419
|
+
if passes > MAX_PARSE_DEPTH:
|
|
420
|
+
break
|
|
421
|
+
passed = new_passed
|
|
422
|
+
|
|
423
|
+
return reparsed
|
|
424
|
+
|
|
425
|
+
def start(self, args):
|
|
426
|
+
return args
|
|
427
|
+
|
|
428
|
+
def LINE_SEPARATOR(self, args):
|
|
429
|
+
return MagicConstants.LINE_SEPARATOR
|
|
430
|
+
|
|
431
|
+
def block(self, args):
|
|
432
|
+
output = args[0]
|
|
433
|
+
if isinstance(output, ConceptDeclarationStatement):
|
|
434
|
+
if len(args) > 1 and args[1] != MagicConstants.LINE_SEPARATOR:
|
|
435
|
+
comments = [x for x in args[1:] if isinstance(x, Comment)]
|
|
436
|
+
merged = "\n".join([x.text.split("#")[1].rstrip() for x in comments])
|
|
437
|
+
output.concept.metadata.description = merged
|
|
438
|
+
# this is a bad plan for now;
|
|
439
|
+
# because a comment after an import statement is very common
|
|
440
|
+
# and it's not intuitive that it modifies the import description
|
|
441
|
+
# if isinstance(output, ImportStatement):
|
|
442
|
+
# if len(args) > 1 and isinstance(args[1], Comment):
|
|
443
|
+
# comment = args[1].text.split("#")[1].strip()
|
|
444
|
+
# namespace = output.alias
|
|
445
|
+
# for _, v in self.environment.concepts.items():
|
|
446
|
+
# if v.namespace == namespace:
|
|
447
|
+
# if v.metadata.description:
|
|
448
|
+
# v.metadata.description = (
|
|
449
|
+
# f"{comment}: {v.metadata.description}"
|
|
450
|
+
# )
|
|
451
|
+
# else:
|
|
452
|
+
# v.metadata.description = comment
|
|
453
|
+
|
|
454
|
+
return args[0]
|
|
455
|
+
|
|
456
|
+
def metadata(self, args):
|
|
457
|
+
pairs = {key: val for key, val in zip(args[::2], args[1::2])}
|
|
458
|
+
return Metadata(**pairs)
|
|
459
|
+
|
|
460
|
+
def IDENTIFIER(self, args) -> str:
|
|
461
|
+
return args.value
|
|
462
|
+
|
|
463
|
+
def ORDER_IDENTIFIER(self, args) -> ConceptRef:
|
|
464
|
+
return self.environment.concepts[args.value.strip()].reference
|
|
465
|
+
|
|
466
|
+
def WILDCARD_IDENTIFIER(self, args) -> str:
|
|
467
|
+
return args.value
|
|
468
|
+
|
|
469
|
+
def QUOTED_IDENTIFIER(self, args) -> str:
|
|
470
|
+
return args.value[1:-1]
|
|
471
|
+
|
|
472
|
+
@v_args(meta=True)
|
|
473
|
+
def concept_lit(self, meta: Meta, args) -> ConceptRef:
|
|
474
|
+
address = args[0]
|
|
475
|
+
if "." not in address and self.environment.namespace == DEFAULT_NAMESPACE:
|
|
476
|
+
address = f"{DEFAULT_NAMESPACE}.{address}"
|
|
477
|
+
mapping = self.environment.concepts[address]
|
|
478
|
+
datatype = mapping.output_datatype
|
|
479
|
+
return ConceptRef(
|
|
480
|
+
# this is load-bearing to handle pseudonyms
|
|
481
|
+
address=mapping.address,
|
|
482
|
+
metadata=Metadata(line_number=meta.line),
|
|
483
|
+
datatype=datatype,
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
def ADDRESS(self, args) -> Address:
|
|
487
|
+
return Address(location=args.value, quoted=False)
|
|
488
|
+
|
|
489
|
+
def QUOTED_ADDRESS(self, args) -> Address:
|
|
490
|
+
return Address(location=args.value[1:-1], quoted=True)
|
|
491
|
+
|
|
492
|
+
def STRING_CHARS(self, args) -> str:
|
|
493
|
+
return args.value
|
|
494
|
+
|
|
495
|
+
def SINGLE_STRING_CHARS(self, args) -> str:
|
|
496
|
+
return args.value
|
|
497
|
+
|
|
498
|
+
def DOUBLE_STRING_CHARS(self, args) -> str:
|
|
499
|
+
return args.value
|
|
500
|
+
|
|
501
|
+
def MINUS(self, args) -> str:
|
|
502
|
+
return "-"
|
|
503
|
+
|
|
504
|
+
@v_args(meta=True)
|
|
505
|
+
def struct_component(self, meta: Meta, args) -> StructComponent:
|
|
506
|
+
modifiers = []
|
|
507
|
+
for arg in args:
|
|
508
|
+
if isinstance(arg, Modifier):
|
|
509
|
+
modifiers.append(arg)
|
|
510
|
+
return StructComponent(name=args[0], type=args[1], modifiers=modifiers)
|
|
511
|
+
|
|
512
|
+
@v_args(meta=True)
|
|
513
|
+
def struct_type(self, meta: Meta, args) -> StructType:
|
|
514
|
+
final: list[
|
|
515
|
+
DataType
|
|
516
|
+
| MapType
|
|
517
|
+
| ArrayType
|
|
518
|
+
| NumericType
|
|
519
|
+
| StructType
|
|
520
|
+
| StructComponent
|
|
521
|
+
| Concept
|
|
522
|
+
] = []
|
|
523
|
+
for arg in args:
|
|
524
|
+
if isinstance(arg, StructComponent):
|
|
525
|
+
final.append(arg)
|
|
526
|
+
else:
|
|
527
|
+
new = self.environment.concepts.__getitem__( # type: ignore
|
|
528
|
+
key=arg, line_no=meta.line
|
|
529
|
+
)
|
|
530
|
+
final.append(new)
|
|
531
|
+
|
|
532
|
+
return StructType(
|
|
533
|
+
fields=final,
|
|
534
|
+
fields_map={
|
|
535
|
+
x.name: x for x in final if isinstance(x, (Concept, StructComponent))
|
|
536
|
+
},
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
def list_type(self, args) -> ArrayType:
|
|
540
|
+
content = args[0]
|
|
541
|
+
if isinstance(content, str):
|
|
542
|
+
content = self.environment.concepts[content]
|
|
543
|
+
return ArrayType(type=content)
|
|
544
|
+
|
|
545
|
+
def numeric_type(self, args) -> NumericType:
|
|
546
|
+
return NumericType(precision=args[0], scale=args[1])
|
|
547
|
+
|
|
548
|
+
def map_type(self, args) -> MapType:
|
|
549
|
+
key = args[0]
|
|
550
|
+
value = args[1]
|
|
551
|
+
if isinstance(key, str):
|
|
552
|
+
key = self.environment.concepts[key]
|
|
553
|
+
elif isinstance(value, str):
|
|
554
|
+
value = self.environment.concepts[value]
|
|
555
|
+
return MapType(key_type=key, value_type=value)
|
|
556
|
+
|
|
557
|
+
@v_args(meta=True)
|
|
558
|
+
def data_type(
|
|
559
|
+
self, meta: Meta, args
|
|
560
|
+
) -> DataType | TraitDataType | ArrayType | StructType | MapType | NumericType:
|
|
561
|
+
resolved = args[0]
|
|
562
|
+
traits = args[2:]
|
|
563
|
+
base: DataType | TraitDataType | ArrayType | StructType | MapType | NumericType
|
|
564
|
+
if isinstance(resolved, StructType):
|
|
565
|
+
base = resolved
|
|
566
|
+
elif isinstance(resolved, ArrayType):
|
|
567
|
+
base = resolved
|
|
568
|
+
elif isinstance(resolved, NumericType):
|
|
569
|
+
base = resolved
|
|
570
|
+
elif isinstance(resolved, MapType):
|
|
571
|
+
base = resolved
|
|
572
|
+
else:
|
|
573
|
+
base = DataType(args[0].lower())
|
|
574
|
+
if traits:
|
|
575
|
+
for trait in traits:
|
|
576
|
+
if trait not in self.environment.data_types:
|
|
577
|
+
raise ParseError(
|
|
578
|
+
f"Invalid trait (type) {trait} for {base}, line {meta.line}."
|
|
579
|
+
)
|
|
580
|
+
matched = self.environment.data_types[trait]
|
|
581
|
+
if not is_compatible_datatype(matched.type, base):
|
|
582
|
+
raise ParseError(
|
|
583
|
+
f"Invalid trait (type) {trait} for {base}, line {meta.line}. Trait expects type {matched.type}, has {base}"
|
|
584
|
+
)
|
|
585
|
+
return TraitDataType(type=base, traits=traits)
|
|
586
|
+
|
|
587
|
+
return base
|
|
588
|
+
|
|
589
|
+
def array_comparison(self, args) -> ComparisonOperator:
|
|
590
|
+
return ComparisonOperator([x.value.lower() for x in args])
|
|
591
|
+
|
|
592
|
+
def COMPARISON_OPERATOR(self, args) -> ComparisonOperator:
|
|
593
|
+
return ComparisonOperator(args.strip())
|
|
594
|
+
|
|
595
|
+
def LOGICAL_OPERATOR(self, args) -> BooleanOperator:
|
|
596
|
+
return BooleanOperator(args.lower())
|
|
597
|
+
|
|
598
|
+
def concept_assignment(self, args):
|
|
599
|
+
return args
|
|
600
|
+
|
|
601
|
+
@v_args(meta=True)
|
|
602
|
+
def column_assignment(self, meta: Meta, args):
|
|
603
|
+
modifiers = []
|
|
604
|
+
if len(args) == 2:
|
|
605
|
+
alias = args[0]
|
|
606
|
+
concept_list = args[1]
|
|
607
|
+
else:
|
|
608
|
+
alias = args[0][-1]
|
|
609
|
+
concept_list = args[0]
|
|
610
|
+
# recursively collect modifiers
|
|
611
|
+
if len(concept_list) > 1:
|
|
612
|
+
modifiers += concept_list[:-1]
|
|
613
|
+
concept = concept_list[-1]
|
|
614
|
+
resolved = self.environment.concepts.__getitem__( # type: ignore
|
|
615
|
+
key=concept, line_no=meta.line, file=self.token_address
|
|
616
|
+
)
|
|
617
|
+
return ColumnAssignment(
|
|
618
|
+
alias=alias, modifiers=modifiers, concept=resolved.reference
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
def _TERMINATOR(self, args):
|
|
622
|
+
return None
|
|
623
|
+
|
|
624
|
+
def _static_functions(self, args):
|
|
625
|
+
return args[0]
|
|
626
|
+
|
|
627
|
+
def MODIFIER(self, args) -> Modifier:
|
|
628
|
+
return Modifier(args.value)
|
|
629
|
+
|
|
630
|
+
def SHORTHAND_MODIFIER(self, args) -> Modifier:
|
|
631
|
+
return Modifier(args.value)
|
|
632
|
+
|
|
633
|
+
def PURPOSE(self, args) -> Purpose:
|
|
634
|
+
return Purpose(args.value)
|
|
635
|
+
|
|
636
|
+
def AUTO(self, args) -> Purpose:
|
|
637
|
+
return Purpose.AUTO
|
|
638
|
+
|
|
639
|
+
def CONST(self, args) -> Purpose:
|
|
640
|
+
return Purpose.CONSTANT
|
|
641
|
+
|
|
642
|
+
def CONSTANT(self, args) -> Purpose:
|
|
643
|
+
return Purpose.CONSTANT
|
|
644
|
+
|
|
645
|
+
def PROPERTY(self, args):
|
|
646
|
+
return Purpose.PROPERTY
|
|
647
|
+
|
|
648
|
+
def HASH_TYPE(self, args):
|
|
649
|
+
return args.value
|
|
650
|
+
|
|
651
|
+
@v_args(meta=True)
|
|
652
|
+
def prop_ident(self, meta: Meta, args) -> Tuple[List[Concept], str]:
|
|
653
|
+
return [self.environment.concepts[grain] for grain in args[:-1]], args[-1]
|
|
654
|
+
|
|
655
|
+
@v_args(meta=True)
|
|
656
|
+
def concept_property_declaration(self, meta: Meta, args) -> Concept:
|
|
657
|
+
unique = False
|
|
658
|
+
if not args[0] == Purpose.PROPERTY:
|
|
659
|
+
unique = True
|
|
660
|
+
args = args[1:]
|
|
661
|
+
metadata = Metadata()
|
|
662
|
+
modifiers = []
|
|
663
|
+
for arg in args:
|
|
664
|
+
if isinstance(arg, Metadata):
|
|
665
|
+
metadata = arg
|
|
666
|
+
if isinstance(arg, Modifier):
|
|
667
|
+
modifiers.append(arg)
|
|
668
|
+
|
|
669
|
+
declaration = args[1]
|
|
670
|
+
if isinstance(declaration, (tuple)):
|
|
671
|
+
parents, name = declaration
|
|
672
|
+
if "." in name:
|
|
673
|
+
namespace, name = name.split(".", 1)
|
|
674
|
+
else:
|
|
675
|
+
namespace = self.environment.namespace or DEFAULT_NAMESPACE
|
|
676
|
+
else:
|
|
677
|
+
if "." not in declaration:
|
|
678
|
+
raise ParseError(
|
|
679
|
+
f"Property declaration {args[1]} must be fully qualified with a parent key"
|
|
680
|
+
)
|
|
681
|
+
grain, name = declaration.rsplit(".", 1)
|
|
682
|
+
parent = self.environment.concepts[grain]
|
|
683
|
+
parents = [parent]
|
|
684
|
+
namespace = parent.namespace
|
|
685
|
+
concept = Concept(
|
|
686
|
+
name=name,
|
|
687
|
+
datatype=args[2],
|
|
688
|
+
purpose=Purpose.PROPERTY if not unique else Purpose.UNIQUE_PROPERTY,
|
|
689
|
+
metadata=metadata,
|
|
690
|
+
grain=Grain(components={x.address for x in parents}),
|
|
691
|
+
namespace=namespace,
|
|
692
|
+
keys=set([x.address for x in parents]),
|
|
693
|
+
modifiers=modifiers,
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
self.environment.add_concept(concept, meta)
|
|
697
|
+
return concept
|
|
698
|
+
|
|
699
|
+
@v_args(meta=True)
|
|
700
|
+
def concept_declaration(self, meta: Meta, args) -> ConceptDeclarationStatement:
|
|
701
|
+
metadata = Metadata()
|
|
702
|
+
modifiers = []
|
|
703
|
+
purpose = args[0]
|
|
704
|
+
datatype = args[2]
|
|
705
|
+
for arg in args:
|
|
706
|
+
if isinstance(arg, Metadata):
|
|
707
|
+
metadata = arg
|
|
708
|
+
if isinstance(arg, Modifier):
|
|
709
|
+
modifiers.append(arg)
|
|
710
|
+
name = args[1]
|
|
711
|
+
_, namespace, name, _ = parse_concept_reference(name, self.environment)
|
|
712
|
+
if purpose == Purpose.PARAMETER:
|
|
713
|
+
value = self.environment.parameters.get(name, None)
|
|
714
|
+
if not value:
|
|
715
|
+
raise MissingParameterException(
|
|
716
|
+
f'This script requires parameter "{name}" to be set in environment.'
|
|
717
|
+
)
|
|
718
|
+
if datatype == DataType.INTEGER:
|
|
719
|
+
value = int(value)
|
|
720
|
+
elif datatype == DataType.FLOAT:
|
|
721
|
+
value = float(value)
|
|
722
|
+
elif datatype == DataType.BOOL:
|
|
723
|
+
value = bool(value)
|
|
724
|
+
elif datatype == DataType.STRING:
|
|
725
|
+
value = str(value)
|
|
726
|
+
elif datatype == DataType.DATE:
|
|
727
|
+
if isinstance(value, date):
|
|
728
|
+
value = value
|
|
729
|
+
else:
|
|
730
|
+
value = date.fromisoformat(value)
|
|
731
|
+
elif datatype == DataType.DATETIME:
|
|
732
|
+
if isinstance(value, datetime):
|
|
733
|
+
value = value
|
|
734
|
+
else:
|
|
735
|
+
value = datetime.fromisoformat(value)
|
|
736
|
+
else:
|
|
737
|
+
raise ParseError(
|
|
738
|
+
f"Unsupported datatype {datatype} for parameter {name}."
|
|
739
|
+
)
|
|
740
|
+
rval = self.constant_derivation(
|
|
741
|
+
meta, [Purpose.CONSTANT, name, value, metadata]
|
|
742
|
+
)
|
|
743
|
+
return rval
|
|
744
|
+
|
|
745
|
+
concept = Concept(
|
|
746
|
+
name=name,
|
|
747
|
+
datatype=datatype,
|
|
748
|
+
purpose=purpose,
|
|
749
|
+
metadata=metadata,
|
|
750
|
+
namespace=namespace,
|
|
751
|
+
modifiers=modifiers,
|
|
752
|
+
derivation=Derivation.ROOT,
|
|
753
|
+
granularity=Granularity.MULTI_ROW,
|
|
754
|
+
)
|
|
755
|
+
if concept.metadata:
|
|
756
|
+
concept.metadata.line_number = meta.line
|
|
757
|
+
self.environment.add_concept(concept, meta=meta)
|
|
758
|
+
return ConceptDeclarationStatement(concept=concept)
|
|
759
|
+
|
|
760
|
+
@v_args(meta=True)
|
|
761
|
+
def concept_derivation(self, meta: Meta, args) -> ConceptDerivationStatement:
|
|
762
|
+
|
|
763
|
+
if len(args) > 3:
|
|
764
|
+
metadata = args[3]
|
|
765
|
+
else:
|
|
766
|
+
metadata = None
|
|
767
|
+
purpose = args[0]
|
|
768
|
+
raw_name = args[1]
|
|
769
|
+
# abc.def.property pattern
|
|
770
|
+
if isinstance(raw_name, str):
|
|
771
|
+
lookup, namespace, name, parent_concept = parse_concept_reference(
|
|
772
|
+
raw_name, self.environment, purpose
|
|
773
|
+
)
|
|
774
|
+
# <abc.def,zef.gf>.property pattern
|
|
775
|
+
else:
|
|
776
|
+
keys, name = raw_name
|
|
777
|
+
keys = [x.address for x in keys]
|
|
778
|
+
namespaces = set([x.rsplit(".", 1)[0] for x in keys])
|
|
779
|
+
if not len(namespaces) == 1:
|
|
780
|
+
namespace = self.environment.namespace or DEFAULT_NAMESPACE
|
|
781
|
+
else:
|
|
782
|
+
namespace = namespaces.pop()
|
|
783
|
+
source_value = args[2]
|
|
784
|
+
# we need to strip off every parenthetical to see what is being assigned.
|
|
785
|
+
while isinstance(source_value, Parenthetical):
|
|
786
|
+
source_value = source_value.content
|
|
787
|
+
|
|
788
|
+
if isinstance(
|
|
789
|
+
source_value,
|
|
790
|
+
(
|
|
791
|
+
FilterItem,
|
|
792
|
+
WindowItem,
|
|
793
|
+
AggregateWrapper,
|
|
794
|
+
Function,
|
|
795
|
+
FunctionCallWrapper,
|
|
796
|
+
Comparison,
|
|
797
|
+
),
|
|
798
|
+
):
|
|
799
|
+
concept = arbitrary_to_concept(
|
|
800
|
+
source_value,
|
|
801
|
+
name=name,
|
|
802
|
+
namespace=namespace,
|
|
803
|
+
environment=self.environment,
|
|
804
|
+
metadata=metadata,
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
# let constant purposes exist to support round-tripping
|
|
808
|
+
# as a build concept may end up with a constant based on constant inlining happening recursively
|
|
809
|
+
if purpose == Purpose.KEY and concept.purpose != Purpose.KEY:
|
|
810
|
+
concept.purpose = Purpose.KEY
|
|
811
|
+
elif (
|
|
812
|
+
purpose
|
|
813
|
+
and purpose != Purpose.AUTO
|
|
814
|
+
and concept.purpose != purpose
|
|
815
|
+
and purpose != Purpose.CONSTANT
|
|
816
|
+
):
|
|
817
|
+
raise SyntaxError(
|
|
818
|
+
f'Concept {name} purpose {concept.purpose} does not match declared purpose {purpose}. Suggest defaulting to "auto"'
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
if concept.metadata:
|
|
822
|
+
concept.metadata.line_number = meta.line
|
|
823
|
+
self.environment.add_concept(concept, meta=meta)
|
|
824
|
+
return ConceptDerivationStatement(concept=concept)
|
|
825
|
+
|
|
826
|
+
elif isinstance(source_value, CONSTANT_TYPES):
|
|
827
|
+
concept = constant_to_concept(
|
|
828
|
+
source_value,
|
|
829
|
+
name=name,
|
|
830
|
+
namespace=namespace,
|
|
831
|
+
metadata=metadata,
|
|
832
|
+
)
|
|
833
|
+
if concept.metadata:
|
|
834
|
+
concept.metadata.line_number = meta.line
|
|
835
|
+
self.environment.add_concept(concept, meta=meta)
|
|
836
|
+
return ConceptDerivationStatement(concept=concept)
|
|
837
|
+
|
|
838
|
+
raise SyntaxError(
|
|
839
|
+
f"Received invalid type {type(args[2])} {args[2]} as input to concept derivation: `{self.text_lookup[self.token_address][meta.start_pos:meta.end_pos]}`"
|
|
840
|
+
)
|
|
841
|
+
|
|
842
|
+
@v_args(meta=True)
|
|
843
|
+
def rowset_derivation_statement(
|
|
844
|
+
self, meta: Meta, args
|
|
845
|
+
) -> RowsetDerivationStatement:
|
|
846
|
+
name = args[0]
|
|
847
|
+
select: SelectStatement | MultiSelectStatement = args[1]
|
|
848
|
+
output = RowsetDerivationStatement(
|
|
849
|
+
name=name,
|
|
850
|
+
select=select,
|
|
851
|
+
namespace=self.environment.namespace or DEFAULT_NAMESPACE,
|
|
852
|
+
)
|
|
853
|
+
|
|
854
|
+
for new_concept in rowset_to_concepts(output, self.environment):
|
|
855
|
+
if new_concept.metadata:
|
|
856
|
+
new_concept.metadata.line_number = meta.line
|
|
857
|
+
self.environment.add_concept(new_concept, force=True)
|
|
858
|
+
|
|
859
|
+
self.environment.add_rowset(
|
|
860
|
+
output.name, output.select.as_lineage(self.environment)
|
|
861
|
+
)
|
|
862
|
+
return output
|
|
863
|
+
|
|
864
|
+
@v_args(meta=True)
|
|
865
|
+
def constant_derivation(
|
|
866
|
+
self, meta: Meta, args: tuple[Purpose, str, Any, Optional[Metadata]]
|
|
867
|
+
) -> Concept:
|
|
868
|
+
|
|
869
|
+
if len(args) > 3:
|
|
870
|
+
metadata = args[3]
|
|
871
|
+
else:
|
|
872
|
+
metadata = None
|
|
873
|
+
name = args[1]
|
|
874
|
+
constant: Union[str, float, int, bool, MapWrapper, ListWrapper] = args[2]
|
|
875
|
+
lookup, namespace, name, parent = parse_concept_reference(
|
|
876
|
+
name, self.environment
|
|
877
|
+
)
|
|
878
|
+
concept = Concept(
|
|
879
|
+
name=name,
|
|
880
|
+
datatype=arg_to_datatype(constant),
|
|
881
|
+
purpose=Purpose.CONSTANT,
|
|
882
|
+
metadata=Metadata(line_number=meta.line) if not metadata else metadata,
|
|
883
|
+
lineage=Function(
|
|
884
|
+
operator=FunctionType.CONSTANT,
|
|
885
|
+
output_datatype=arg_to_datatype(constant),
|
|
886
|
+
output_purpose=Purpose.CONSTANT,
|
|
887
|
+
arguments=[constant],
|
|
888
|
+
),
|
|
889
|
+
grain=Grain(components=set()),
|
|
890
|
+
namespace=namespace,
|
|
891
|
+
granularity=Granularity.SINGLE_ROW,
|
|
892
|
+
)
|
|
893
|
+
if concept.metadata:
|
|
894
|
+
concept.metadata.line_number = meta.line
|
|
895
|
+
self.environment.add_concept(concept, meta)
|
|
896
|
+
return concept
|
|
897
|
+
|
|
898
|
+
@v_args(meta=True)
|
|
899
|
+
def concept(self, meta: Meta, args) -> ConceptDeclarationStatement:
|
|
900
|
+
if isinstance(args[0], Concept):
|
|
901
|
+
concept: Concept = args[0]
|
|
902
|
+
else:
|
|
903
|
+
concept = args[0].concept
|
|
904
|
+
if concept.metadata:
|
|
905
|
+
concept.metadata.line_number = meta.line
|
|
906
|
+
return ConceptDeclarationStatement(concept=concept)
|
|
907
|
+
|
|
908
|
+
def column_assignment_list(self, args):
|
|
909
|
+
return args
|
|
910
|
+
|
|
911
|
+
def column_list(self, args) -> List:
|
|
912
|
+
return args
|
|
913
|
+
|
|
914
|
+
def grain_clause(self, args) -> Grain:
|
|
915
|
+
return Grain(
|
|
916
|
+
components=set([self.environment.concepts[a].address for a in args[0]])
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
@v_args(meta=True)
|
|
920
|
+
def aggregate_by(self, meta: Meta, args):
|
|
921
|
+
base = args[0]
|
|
922
|
+
b_concept = base.value.split(" ")[-1]
|
|
923
|
+
args = [self.environment.concepts[a] for a in [b_concept] + args[1:]]
|
|
924
|
+
return self.function_factory.create_function(args, FunctionType.GROUP, meta)
|
|
925
|
+
|
|
926
|
+
def whole_grain_clause(self, args) -> WholeGrainWrapper:
|
|
927
|
+
return WholeGrainWrapper(where=args[0])
|
|
928
|
+
|
|
929
|
+
def MULTILINE_STRING(self, args) -> str:
|
|
930
|
+
return args[3:-3]
|
|
931
|
+
|
|
932
|
+
def raw_column_assignment(self, args):
|
|
933
|
+
return RawColumnExpr(text=args[1])
|
|
934
|
+
|
|
935
|
+
def DATASOURCE_STATUS(self, args) -> DatasourceState:
|
|
936
|
+
return DatasourceState(args.value.lower())
|
|
937
|
+
|
|
938
|
+
@v_args(meta=True)
|
|
939
|
+
def datasource_status_clause(self, meta: Meta, args):
|
|
940
|
+
return args[1]
|
|
941
|
+
|
|
942
|
+
@v_args(meta=True)
|
|
943
|
+
def datasource_partition_clause(self, meta: Meta, args):
|
|
944
|
+
return DatasourcePartitionClause([ConceptRef(address=arg) for arg in args[0]])
|
|
945
|
+
|
|
946
|
+
@v_args(meta=True)
|
|
947
|
+
def datasource_increment_clause(self, meta: Meta, args):
|
|
948
|
+
return DatasourceIncrementalClause([ConceptRef(address=arg) for arg in args[0]])
|
|
949
|
+
|
|
950
|
+
@v_args(meta=True)
|
|
951
|
+
def datasource(self, meta: Meta, args):
|
|
952
|
+
is_root = False
|
|
953
|
+
if isinstance(args[0], Token) and args[0].lower() == "root":
|
|
954
|
+
is_root = True
|
|
955
|
+
args = args[1:]
|
|
956
|
+
name = args[0]
|
|
957
|
+
columns: List[ColumnAssignment] = args[1]
|
|
958
|
+
grain: Optional[Grain] = None
|
|
959
|
+
address: Optional[Address] = None
|
|
960
|
+
where: Optional[WhereClause] = None
|
|
961
|
+
non_partial_for: Optional[WhereClause] = None
|
|
962
|
+
incremental_by: List[ConceptRef] = []
|
|
963
|
+
partition_by: List[ConceptRef] = []
|
|
964
|
+
datasource_status: DatasourceState = DatasourceState.PUBLISHED
|
|
965
|
+
for val in args[1:]:
|
|
966
|
+
if isinstance(val, Address):
|
|
967
|
+
address = val
|
|
968
|
+
elif isinstance(val, Grain):
|
|
969
|
+
grain = val
|
|
970
|
+
elif isinstance(val, WholeGrainWrapper):
|
|
971
|
+
non_partial_for = val.where
|
|
972
|
+
elif isinstance(val, Query):
|
|
973
|
+
address = Address(location=val.text, type=AddressType.QUERY)
|
|
974
|
+
elif isinstance(val, File):
|
|
975
|
+
address = Address(location=val.path, type=val.type)
|
|
976
|
+
elif isinstance(val, WhereClause):
|
|
977
|
+
where = val
|
|
978
|
+
elif isinstance(val, DatasourceState):
|
|
979
|
+
datasource_status = val
|
|
980
|
+
elif isinstance(val, DatasourceIncrementalClause):
|
|
981
|
+
incremental_by = val.columns
|
|
982
|
+
elif isinstance(val, DatasourcePartitionClause):
|
|
983
|
+
partition_by = val.columns
|
|
984
|
+
if not address:
|
|
985
|
+
raise ValueError(
|
|
986
|
+
"Malformed datasource, missing address or query declaration"
|
|
987
|
+
)
|
|
988
|
+
|
|
989
|
+
datasource = Datasource(
|
|
990
|
+
name=name,
|
|
991
|
+
columns=columns,
|
|
992
|
+
# grain will be set by default from args
|
|
993
|
+
# TODO: move to factory
|
|
994
|
+
grain=grain, # type: ignore
|
|
995
|
+
address=address,
|
|
996
|
+
namespace=self.environment.namespace,
|
|
997
|
+
where=where,
|
|
998
|
+
non_partial_for=non_partial_for,
|
|
999
|
+
status=datasource_status,
|
|
1000
|
+
incremental_by=incremental_by,
|
|
1001
|
+
partition_by=partition_by,
|
|
1002
|
+
is_root=is_root,
|
|
1003
|
+
)
|
|
1004
|
+
if datasource.where:
|
|
1005
|
+
for x in datasource.where.concept_arguments:
|
|
1006
|
+
if x.address not in datasource.output_concepts:
|
|
1007
|
+
raise ValueError(
|
|
1008
|
+
f"Datasource {name} where condition depends on concept {x.address} that does not exist on the datasource, line {meta.line}."
|
|
1009
|
+
)
|
|
1010
|
+
if self.parse_pass == ParsePass.VALIDATION:
|
|
1011
|
+
self.environment.add_datasource(datasource, meta=meta)
|
|
1012
|
+
# if we have any foreign keys on the datasource, we can
|
|
1013
|
+
# at this point optimize them to properties if they do not have other usage.
|
|
1014
|
+
for column in columns:
|
|
1015
|
+
# skip partial for now
|
|
1016
|
+
if not grain:
|
|
1017
|
+
continue
|
|
1018
|
+
if column.concept.address in grain.components:
|
|
1019
|
+
continue
|
|
1020
|
+
target_c = self.environment.concepts[column.concept.address]
|
|
1021
|
+
if target_c.purpose != Purpose.KEY:
|
|
1022
|
+
continue
|
|
1023
|
+
|
|
1024
|
+
key_inputs = grain.components
|
|
1025
|
+
eligible = True
|
|
1026
|
+
for key in key_inputs:
|
|
1027
|
+
# never overwrite a key with a dependency on a property
|
|
1028
|
+
# for example - binding a datasource with a grain of <x>.fun should
|
|
1029
|
+
# never override the grain of x to <fun>
|
|
1030
|
+
if column.concept.address in (
|
|
1031
|
+
self.environment.concepts[key].keys or set()
|
|
1032
|
+
):
|
|
1033
|
+
eligible = False
|
|
1034
|
+
if not eligible:
|
|
1035
|
+
continue
|
|
1036
|
+
keys = [self.environment.concepts[grain] for grain in key_inputs]
|
|
1037
|
+
# target_c.purpose = Purpose.PROPERTY
|
|
1038
|
+
target_c.keys = set([x.address for x in keys])
|
|
1039
|
+
# target_c.grain = Grain(components={x.address for x in keys})
|
|
1040
|
+
|
|
1041
|
+
return datasource
|
|
1042
|
+
|
|
1043
|
+
@v_args(meta=True)
|
|
1044
|
+
def comment(self, meta: Meta, args):
|
|
1045
|
+
assert len(args) == 1
|
|
1046
|
+
return Comment(text=args[0].value)
|
|
1047
|
+
|
|
1048
|
+
def PARSE_COMMENT(self, args):
|
|
1049
|
+
return Comment(text=args.value.rstrip())
|
|
1050
|
+
|
|
1051
|
+
@v_args(meta=True)
|
|
1052
|
+
def select_transform(self, meta: Meta, args) -> ConceptTransform:
|
|
1053
|
+
output: str = args[1]
|
|
1054
|
+
transformation = unwrap_transformation(args[0], self.environment)
|
|
1055
|
+
lookup, namespace, output, parent = parse_concept_reference(
|
|
1056
|
+
output, self.environment
|
|
1057
|
+
)
|
|
1058
|
+
|
|
1059
|
+
metadata = Metadata(line_number=meta.line, concept_source=ConceptSource.SELECT)
|
|
1060
|
+
concept = arbitrary_to_concept(
|
|
1061
|
+
transformation,
|
|
1062
|
+
environment=self.environment,
|
|
1063
|
+
namespace=namespace,
|
|
1064
|
+
name=output,
|
|
1065
|
+
metadata=metadata,
|
|
1066
|
+
)
|
|
1067
|
+
return ConceptTransform(function=transformation, output=concept)
|
|
1068
|
+
|
|
1069
|
+
@v_args(meta=True)
|
|
1070
|
+
def concept_nullable_modifier(self, meta: Meta, args) -> Modifier:
|
|
1071
|
+
return Modifier.NULLABLE
|
|
1072
|
+
|
|
1073
|
+
@v_args(meta=True)
|
|
1074
|
+
def select_hide_modifier(self, meta: Meta, args) -> Modifier:
|
|
1075
|
+
return Modifier.HIDDEN
|
|
1076
|
+
|
|
1077
|
+
@v_args(meta=True)
|
|
1078
|
+
def select_partial_modifier(self, meta: Meta, args) -> Modifier:
|
|
1079
|
+
return Modifier.PARTIAL
|
|
1080
|
+
|
|
1081
|
+
@v_args(meta=True)
|
|
1082
|
+
def select_item(self, meta: Meta, args) -> Optional[SelectItem]:
|
|
1083
|
+
modifiers = [arg for arg in args if isinstance(arg, Modifier)]
|
|
1084
|
+
args = [arg for arg in args if not isinstance(arg, (Modifier, Comment))]
|
|
1085
|
+
|
|
1086
|
+
if not args:
|
|
1087
|
+
return None
|
|
1088
|
+
if len(args) != 1:
|
|
1089
|
+
raise ParseError(
|
|
1090
|
+
"Malformed select statement"
|
|
1091
|
+
f" {args} {self.text_lookup[self.parse_address][meta.start_pos:meta.end_pos]}"
|
|
1092
|
+
)
|
|
1093
|
+
content = args[0]
|
|
1094
|
+
if isinstance(content, ConceptTransform):
|
|
1095
|
+
return SelectItem(content=content, modifiers=modifiers)
|
|
1096
|
+
return SelectItem(
|
|
1097
|
+
content=content,
|
|
1098
|
+
modifiers=modifiers,
|
|
1099
|
+
)
|
|
1100
|
+
|
|
1101
|
+
def select_list(self, args):
|
|
1102
|
+
return [arg for arg in args if arg]
|
|
1103
|
+
|
|
1104
|
+
def limit(self, args):
|
|
1105
|
+
return Limit(count=int(args[0].value))
|
|
1106
|
+
|
|
1107
|
+
def ordering(self, args: list[str]):
|
|
1108
|
+
base = args[0].lower()
|
|
1109
|
+
if len(args) > 1:
|
|
1110
|
+
null_sort = args[-1]
|
|
1111
|
+
return Ordering(" ".join([base, "nulls", null_sort.lower()]))
|
|
1112
|
+
return Ordering(base)
|
|
1113
|
+
|
|
1114
|
+
def order_list(self, args) -> List[OrderItem]:
|
|
1115
|
+
return [
|
|
1116
|
+
OrderItem(
|
|
1117
|
+
expr=x,
|
|
1118
|
+
order=y,
|
|
1119
|
+
)
|
|
1120
|
+
for x, y in zip(args[::2], args[1::2])
|
|
1121
|
+
]
|
|
1122
|
+
|
|
1123
|
+
def order_by(self, args):
|
|
1124
|
+
return OrderBy(items=args[0])
|
|
1125
|
+
|
|
1126
|
+
def over_component(self, args):
|
|
1127
|
+
return ConceptRef(address=args[0].value.lstrip(",").strip())
|
|
1128
|
+
|
|
1129
|
+
def over_list(self, args):
|
|
1130
|
+
return [x for x in args]
|
|
1131
|
+
|
|
1132
|
+
def PUBLISH_ACTION(self, args) -> PublishAction:
|
|
1133
|
+
action = args.value.lower()
|
|
1134
|
+
if action == "publish":
|
|
1135
|
+
return PublishAction.PUBLISH
|
|
1136
|
+
elif action == "unpublish":
|
|
1137
|
+
return PublishAction.UNPUBLISH
|
|
1138
|
+
else:
|
|
1139
|
+
raise SyntaxError(f"Unknown publish action: {action}")
|
|
1140
|
+
|
|
1141
|
+
@v_args(meta=True)
|
|
1142
|
+
def publish_statement(self, meta: Meta, args) -> PublishStatement:
|
|
1143
|
+
targets = []
|
|
1144
|
+
scope = ValidationScope.DATASOURCES
|
|
1145
|
+
publish_action = PublishAction.PUBLISH
|
|
1146
|
+
for arg in args:
|
|
1147
|
+
if isinstance(arg, str):
|
|
1148
|
+
targets.append(arg)
|
|
1149
|
+
elif isinstance(arg, PublishAction):
|
|
1150
|
+
publish_action = arg
|
|
1151
|
+
elif isinstance(arg, ValidationScope):
|
|
1152
|
+
scope = arg
|
|
1153
|
+
if arg != ValidationScope.DATASOURCES:
|
|
1154
|
+
raise SyntaxError(
|
|
1155
|
+
f"Publishing is only supported for Datasources, got {arg} on line {meta.line}"
|
|
1156
|
+
)
|
|
1157
|
+
return PublishStatement(
|
|
1158
|
+
scope=scope,
|
|
1159
|
+
targets=targets,
|
|
1160
|
+
action=publish_action,
|
|
1161
|
+
)
|
|
1162
|
+
|
|
1163
|
+
def create_modifier_clause(self, args):
|
|
1164
|
+
token = args[0]
|
|
1165
|
+
if token.type == "CREATE_IF_NOT_EXISTS":
|
|
1166
|
+
return CreateMode.CREATE_IF_NOT_EXISTS
|
|
1167
|
+
elif token.type == "CREATE_OR_REPLACE":
|
|
1168
|
+
return CreateMode.CREATE_OR_REPLACE
|
|
1169
|
+
|
|
1170
|
+
@v_args(meta=True)
|
|
1171
|
+
def create_statement(self, meta: Meta, args) -> CreateStatement:
|
|
1172
|
+
targets = []
|
|
1173
|
+
scope = ValidationScope.DATASOURCES
|
|
1174
|
+
create_mode = CreateMode.CREATE
|
|
1175
|
+
for arg in args:
|
|
1176
|
+
if isinstance(arg, str):
|
|
1177
|
+
targets.append(arg)
|
|
1178
|
+
elif isinstance(arg, ValidationScope):
|
|
1179
|
+
scope = arg
|
|
1180
|
+
if arg != ValidationScope.DATASOURCES:
|
|
1181
|
+
raise SyntaxError(
|
|
1182
|
+
f"Creating is only supported for Datasources, got {arg} on line {meta.line}"
|
|
1183
|
+
)
|
|
1184
|
+
elif isinstance(arg, CreateMode):
|
|
1185
|
+
create_mode = arg
|
|
1186
|
+
|
|
1187
|
+
return CreateStatement(scope=scope, targets=targets, create_mode=create_mode)
|
|
1188
|
+
|
|
1189
|
+
def VALIDATE_SCOPE(self, args) -> ValidationScope:
|
|
1190
|
+
base: str = args.lower()
|
|
1191
|
+
if not base.endswith("s"):
|
|
1192
|
+
base += "s"
|
|
1193
|
+
return ValidationScope(base)
|
|
1194
|
+
|
|
1195
|
+
@v_args(meta=True)
|
|
1196
|
+
def validate_statement(self, meta: Meta, args) -> ValidateStatement:
|
|
1197
|
+
if len(args) > 1:
|
|
1198
|
+
scope = args[0]
|
|
1199
|
+
targets = args[1:]
|
|
1200
|
+
elif len(args) == 0:
|
|
1201
|
+
scope = ValidationScope.ALL
|
|
1202
|
+
targets = None
|
|
1203
|
+
else:
|
|
1204
|
+
scope = args[0]
|
|
1205
|
+
targets = None
|
|
1206
|
+
return ValidateStatement(
|
|
1207
|
+
scope=scope,
|
|
1208
|
+
targets=targets,
|
|
1209
|
+
)
|
|
1210
|
+
|
|
1211
|
+
@v_args(meta=True)
|
|
1212
|
+
def mock_statement(self, meta: Meta, args) -> MockStatement:
|
|
1213
|
+
return MockStatement(scope=args[0], targets=args[1:])
|
|
1214
|
+
|
|
1215
|
+
@v_args(meta=True)
|
|
1216
|
+
def merge_statement(self, meta: Meta, args) -> MergeStatementV2 | None:
|
|
1217
|
+
modifiers = []
|
|
1218
|
+
cargs: list[str] = []
|
|
1219
|
+
source_wildcard = None
|
|
1220
|
+
target_wildcard = None
|
|
1221
|
+
for arg in args:
|
|
1222
|
+
if isinstance(arg, Modifier):
|
|
1223
|
+
modifiers.append(arg)
|
|
1224
|
+
else:
|
|
1225
|
+
cargs.append(arg)
|
|
1226
|
+
source, target = cargs
|
|
1227
|
+
if source.endswith(".*"):
|
|
1228
|
+
if not target.endswith(".*"):
|
|
1229
|
+
raise ValueError("Invalid merge, source is wildcard, target is not")
|
|
1230
|
+
source_wildcard = source[:-2]
|
|
1231
|
+
target_wildcard = target[:-2]
|
|
1232
|
+
sources: list[Concept] = [
|
|
1233
|
+
v
|
|
1234
|
+
for k, v in self.environment.concepts.items()
|
|
1235
|
+
if v.namespace == source_wildcard
|
|
1236
|
+
]
|
|
1237
|
+
targets: dict[str, Concept] = {}
|
|
1238
|
+
for x in sources:
|
|
1239
|
+
target = target_wildcard + "." + x.name
|
|
1240
|
+
if target in self.environment.concepts:
|
|
1241
|
+
targets[x.address] = self.environment.concepts[target]
|
|
1242
|
+
sources = [x for x in sources if x.address in targets]
|
|
1243
|
+
else:
|
|
1244
|
+
sources = [self.environment.concepts[source]]
|
|
1245
|
+
targets = {sources[0].address: self.environment.concepts[target]}
|
|
1246
|
+
|
|
1247
|
+
if self.parse_pass == ParsePass.VALIDATION:
|
|
1248
|
+
for source_c in sources:
|
|
1249
|
+
if isinstance(source_c, UndefinedConceptFull):
|
|
1250
|
+
raise SyntaxError(
|
|
1251
|
+
f"Cannot merge non-existent source concept {source_c.address} on line: {meta.line}"
|
|
1252
|
+
)
|
|
1253
|
+
new = MergeStatementV2(
|
|
1254
|
+
sources=sources,
|
|
1255
|
+
targets=targets,
|
|
1256
|
+
modifiers=modifiers,
|
|
1257
|
+
source_wildcard=source_wildcard,
|
|
1258
|
+
target_wildcard=target_wildcard,
|
|
1259
|
+
)
|
|
1260
|
+
for source_c in new.sources:
|
|
1261
|
+
self.environment.merge_concept(
|
|
1262
|
+
source_c, targets[source_c.address], modifiers
|
|
1263
|
+
)
|
|
1264
|
+
|
|
1265
|
+
return new
|
|
1266
|
+
return None
|
|
1267
|
+
|
|
1268
|
+
@v_args(meta=True)
|
|
1269
|
+
def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
|
|
1270
|
+
statement = RawSQLStatement(meta=Metadata(line_number=meta.line), text=args[0])
|
|
1271
|
+
return statement
|
|
1272
|
+
|
|
1273
|
+
def COPY_TYPE(self, args) -> IOType:
|
|
1274
|
+
return IOType(args.value)
|
|
1275
|
+
|
|
1276
|
+
@v_args(meta=True)
|
|
1277
|
+
def copy_statement(self, meta: Meta, args) -> CopyStatement:
|
|
1278
|
+
return CopyStatement(
|
|
1279
|
+
target=args[1],
|
|
1280
|
+
target_type=args[0],
|
|
1281
|
+
meta=Metadata(line_number=meta.line),
|
|
1282
|
+
select=args[-1],
|
|
1283
|
+
)
|
|
1284
|
+
|
|
1285
|
+
def resolve_import_address(self, address: str, is_stdlib: bool = False) -> str:
|
|
1286
|
+
if (
|
|
1287
|
+
isinstance(
|
|
1288
|
+
self.environment.config.import_resolver, FileSystemImportResolver
|
|
1289
|
+
)
|
|
1290
|
+
or is_stdlib
|
|
1291
|
+
):
|
|
1292
|
+
with open(address, "r", encoding="utf-8") as f:
|
|
1293
|
+
text = f.read()
|
|
1294
|
+
elif isinstance(self.environment.config.import_resolver, DictImportResolver):
|
|
1295
|
+
lookup = address
|
|
1296
|
+
if lookup not in self.environment.config.import_resolver.content:
|
|
1297
|
+
raise ImportError(
|
|
1298
|
+
f"Unable to import file {lookup}, not resolvable from provided source files."
|
|
1299
|
+
)
|
|
1300
|
+
text = self.environment.config.import_resolver.content[lookup]
|
|
1301
|
+
else:
|
|
1302
|
+
raise ImportError(
|
|
1303
|
+
f"Unable to import file {address}, resolver type {type(self.environment.config.import_resolver)} not supported"
|
|
1304
|
+
)
|
|
1305
|
+
return text
|
|
1306
|
+
|
|
1307
|
+
def IMPORT_DOT(self, args) -> str:
|
|
1308
|
+
return "."
|
|
1309
|
+
|
|
1310
|
+
def import_statement(self, args: list[str]) -> ImportStatement:
|
|
1311
|
+
start = datetime.now()
|
|
1312
|
+
is_file_resolver = isinstance(
|
|
1313
|
+
self.environment.config.import_resolver, FileSystemImportResolver
|
|
1314
|
+
)
|
|
1315
|
+
parent_dirs = -1
|
|
1316
|
+
parsed_args = []
|
|
1317
|
+
for x in args:
|
|
1318
|
+
if x == ".":
|
|
1319
|
+
parent_dirs += 1
|
|
1320
|
+
else:
|
|
1321
|
+
parsed_args.append(x)
|
|
1322
|
+
parent_dirs = max(parent_dirs, 0)
|
|
1323
|
+
args = parsed_args
|
|
1324
|
+
if len(args) == 2:
|
|
1325
|
+
alias = args[-1]
|
|
1326
|
+
cache_key = args[-1]
|
|
1327
|
+
else:
|
|
1328
|
+
alias = self.environment.namespace
|
|
1329
|
+
cache_key = args[0]
|
|
1330
|
+
input_path = args[0]
|
|
1331
|
+
# lstrip off '.' from parent if they exist;
|
|
1332
|
+
# each one is an extra directory up after the first
|
|
1333
|
+
|
|
1334
|
+
path = input_path.split(".")
|
|
1335
|
+
is_stdlib = False
|
|
1336
|
+
if path[0] == "std":
|
|
1337
|
+
is_stdlib = True
|
|
1338
|
+
target = join(STDLIB_ROOT, *path) + ".preql"
|
|
1339
|
+
token_lookup: Path | str = Path(target)
|
|
1340
|
+
elif is_file_resolver:
|
|
1341
|
+
troot = Path(self.environment.working_path)
|
|
1342
|
+
if parent_dirs > 0:
|
|
1343
|
+
for _ in range(parent_dirs):
|
|
1344
|
+
troot = troot.parent
|
|
1345
|
+
target = join(troot, *path) + ".preql"
|
|
1346
|
+
# tokens + text are cached by path
|
|
1347
|
+
token_lookup = Path(target)
|
|
1348
|
+
elif isinstance(self.environment.config.import_resolver, DictImportResolver):
|
|
1349
|
+
target = ".".join(path)
|
|
1350
|
+
token_lookup = target
|
|
1351
|
+
else:
|
|
1352
|
+
raise NotImplementedError
|
|
1353
|
+
|
|
1354
|
+
# parser + env has to be cached by prior import path + current key
|
|
1355
|
+
key_path = self.import_keys + [cache_key]
|
|
1356
|
+
cache_lookup = "-".join(key_path)
|
|
1357
|
+
|
|
1358
|
+
# we don't iterate past the max parse depth
|
|
1359
|
+
if len(key_path) > MAX_PARSE_DEPTH:
|
|
1360
|
+
return ImportStatement(
|
|
1361
|
+
alias=alias, input_path=input_path, path=Path(target)
|
|
1362
|
+
)
|
|
1363
|
+
|
|
1364
|
+
if token_lookup in self.tokens:
|
|
1365
|
+
perf_logger.debug(f"\tTokens cached for {token_lookup}")
|
|
1366
|
+
raw_tokens = self.tokens[token_lookup]
|
|
1367
|
+
text = self.text_lookup[token_lookup]
|
|
1368
|
+
else:
|
|
1369
|
+
perf_logger.debug(f"\tTokens not cached for {token_lookup}, resolving")
|
|
1370
|
+
text = self.resolve_import_address(target, is_stdlib)
|
|
1371
|
+
self.text_lookup[token_lookup] = text
|
|
1372
|
+
|
|
1373
|
+
try:
|
|
1374
|
+
raw_tokens = PARSER.parse(text)
|
|
1375
|
+
except Exception as e:
|
|
1376
|
+
raise ImportError(
|
|
1377
|
+
f"Unable to import '{target}', parsing error: {e}"
|
|
1378
|
+
) from e
|
|
1379
|
+
self.tokens[token_lookup] = raw_tokens
|
|
1380
|
+
|
|
1381
|
+
if cache_lookup in self.parsed:
|
|
1382
|
+
perf_logger.debug(f"\tEnvironment cached for {token_lookup}")
|
|
1383
|
+
nparser = self.parsed[cache_lookup]
|
|
1384
|
+
new_env = nparser.environment
|
|
1385
|
+
if nparser.parse_pass != ParsePass.VALIDATION:
|
|
1386
|
+
# nparser.transform(raw_tokens)
|
|
1387
|
+
second_pass_start = datetime.now()
|
|
1388
|
+
nparser.run_second_parse_pass()
|
|
1389
|
+
second_pass_end = datetime.now()
|
|
1390
|
+
perf_logger.debug(
|
|
1391
|
+
f"{second_pass_end - second_pass_start} seconds | Import {alias} key ({cache_key}) second pass took {second_pass_end - second_pass_start} to parse, {len(new_env.concepts)} concepts"
|
|
1392
|
+
)
|
|
1393
|
+
else:
|
|
1394
|
+
perf_logger.debug(f"\tParsing new for {token_lookup}")
|
|
1395
|
+
root = None
|
|
1396
|
+
if "." in str(token_lookup):
|
|
1397
|
+
root = str(token_lookup).rsplit(".", 1)[0]
|
|
1398
|
+
try:
|
|
1399
|
+
new_env = Environment(
|
|
1400
|
+
working_path=dirname(target),
|
|
1401
|
+
env_file_path=token_lookup,
|
|
1402
|
+
config=self.environment.config.copy_for_root(root=root),
|
|
1403
|
+
parameters=self.environment.parameters,
|
|
1404
|
+
)
|
|
1405
|
+
new_env.concepts.fail_on_missing = False
|
|
1406
|
+
self.parsed[self.parse_address] = self
|
|
1407
|
+
nparser = ParseToObjects(
|
|
1408
|
+
environment=new_env,
|
|
1409
|
+
parse_address=cache_lookup,
|
|
1410
|
+
token_address=token_lookup,
|
|
1411
|
+
parsed=self.parsed,
|
|
1412
|
+
tokens=self.tokens,
|
|
1413
|
+
text_lookup=self.text_lookup,
|
|
1414
|
+
import_keys=self.import_keys + [cache_key],
|
|
1415
|
+
parse_config=self.parse_config,
|
|
1416
|
+
)
|
|
1417
|
+
nparser.transform(raw_tokens)
|
|
1418
|
+
self.parsed[cache_lookup] = nparser
|
|
1419
|
+
except Exception as e:
|
|
1420
|
+
raise ImportError(
|
|
1421
|
+
f"Unable to import file {target}, parsing error: {e}"
|
|
1422
|
+
) from e
|
|
1423
|
+
|
|
1424
|
+
parsed_path = Path(args[0])
|
|
1425
|
+
imps = ImportStatement(alias=alias, input_path=input_path, path=parsed_path)
|
|
1426
|
+
|
|
1427
|
+
self.environment.add_import(
|
|
1428
|
+
alias,
|
|
1429
|
+
new_env,
|
|
1430
|
+
Import(
|
|
1431
|
+
alias=alias,
|
|
1432
|
+
path=parsed_path,
|
|
1433
|
+
input_path=Path(target) if is_file_resolver else None,
|
|
1434
|
+
),
|
|
1435
|
+
)
|
|
1436
|
+
end = datetime.now()
|
|
1437
|
+
perf_logger.debug(
|
|
1438
|
+
f"{end - start} seconds | Import {alias} key ({cache_key}) took to parse, {len(new_env.concepts)} concepts"
|
|
1439
|
+
)
|
|
1440
|
+
return imps
|
|
1441
|
+
|
|
1442
|
+
@v_args(meta=True)
|
|
1443
|
+
def show_category(self, meta: Meta, args) -> ShowCategory:
|
|
1444
|
+
return ShowCategory(args[0])
|
|
1445
|
+
|
|
1446
|
+
@v_args(meta=True)
|
|
1447
|
+
def show_statement(self, meta: Meta, args) -> ShowStatement:
|
|
1448
|
+
return ShowStatement(content=args[0])
|
|
1449
|
+
|
|
1450
|
+
@v_args(meta=True)
|
|
1451
|
+
def persist_partition_clause(self, meta: Meta, args) -> DatasourcePartitionClause:
|
|
1452
|
+
return DatasourcePartitionClause([ConceptRef(address=a) for a in args[0]])
|
|
1453
|
+
|
|
1454
|
+
@v_args(meta=True)
|
|
1455
|
+
def PERSIST_MODE(self, args) -> PersistMode:
|
|
1456
|
+
base = args.value.lower()
|
|
1457
|
+
if base == "persist":
|
|
1458
|
+
return PersistMode.OVERWRITE
|
|
1459
|
+
return PersistMode(base)
|
|
1460
|
+
|
|
1461
|
+
@v_args(meta=True)
|
|
1462
|
+
def auto_persist(self, meta: Meta, args) -> PersistStatement | None:
|
|
1463
|
+
if self.parse_pass != ParsePass.VALIDATION:
|
|
1464
|
+
return None
|
|
1465
|
+
persist_mode = args[0]
|
|
1466
|
+
target_name = args[1]
|
|
1467
|
+
where = args[2] if len(args) > 2 else None
|
|
1468
|
+
|
|
1469
|
+
if target_name not in self.environment.datasources:
|
|
1470
|
+
raise SyntaxError(
|
|
1471
|
+
f"Auto persist target datasource {target_name} does not exist in environment on line {meta.line}. Have {list(self.environment.datasources.keys())}"
|
|
1472
|
+
)
|
|
1473
|
+
target = self.environment.datasources[target_name]
|
|
1474
|
+
select: SelectStatement = target.create_update_statement(
|
|
1475
|
+
self.environment, where, line_no=meta.line
|
|
1476
|
+
)
|
|
1477
|
+
return PersistStatement(
|
|
1478
|
+
select=select,
|
|
1479
|
+
datasource=target,
|
|
1480
|
+
persist_mode=persist_mode,
|
|
1481
|
+
partition_by=target.incremental_by,
|
|
1482
|
+
meta=Metadata(line_number=meta.line),
|
|
1483
|
+
)
|
|
1484
|
+
|
|
1485
|
+
@v_args(meta=True)
|
|
1486
|
+
def full_persist(self, meta: Meta, args) -> PersistStatement | None:
|
|
1487
|
+
if self.parse_pass != ParsePass.VALIDATION:
|
|
1488
|
+
return None
|
|
1489
|
+
partition_clause = DatasourcePartitionClause([])
|
|
1490
|
+
labels = [x for x in args if isinstance(x, str)]
|
|
1491
|
+
for x in args:
|
|
1492
|
+
if isinstance(x, DatasourcePartitionClause):
|
|
1493
|
+
partition_clause = x
|
|
1494
|
+
if len(labels) == 2:
|
|
1495
|
+
identifier = labels[0]
|
|
1496
|
+
address = labels[1]
|
|
1497
|
+
else:
|
|
1498
|
+
identifier = labels[0]
|
|
1499
|
+
address = None
|
|
1500
|
+
target: Datasource | None = self.environment.datasources.get(identifier)
|
|
1501
|
+
|
|
1502
|
+
if not address and not target:
|
|
1503
|
+
raise SyntaxError(
|
|
1504
|
+
f'Append statement without concrete table address on line {meta.line} attempts to insert into datasource "{identifier}" that cannot be found in the environment. Add a physical address to create a new datasource, or check the name.'
|
|
1505
|
+
)
|
|
1506
|
+
elif target:
|
|
1507
|
+
address = target.safe_address
|
|
1508
|
+
|
|
1509
|
+
assert address is not None
|
|
1510
|
+
|
|
1511
|
+
modes = [x for x in args if isinstance(x, PersistMode)]
|
|
1512
|
+
mode = modes[0] if modes else PersistMode.OVERWRITE
|
|
1513
|
+
select: SelectStatement = [x for x in args if isinstance(x, SelectStatement)][0]
|
|
1514
|
+
|
|
1515
|
+
if mode == PersistMode.APPEND:
|
|
1516
|
+
if target is None:
|
|
1517
|
+
raise SyntaxError(
|
|
1518
|
+
f"Cannot append to non-existent datasource {identifier} on line {meta.line}."
|
|
1519
|
+
)
|
|
1520
|
+
new_datasource: Datasource = target
|
|
1521
|
+
if not new_datasource.partition_by == partition_clause.columns:
|
|
1522
|
+
raise SyntaxError(
|
|
1523
|
+
f"Cannot append to datasource {identifier} with different partitioning scheme then insert on line {meta.line}. Datasource partitioning: {new_datasource.partition_by}, insert partitioning: {partition_clause.columns if partition_clause else '[]'}"
|
|
1524
|
+
)
|
|
1525
|
+
if len(partition_clause.columns) > 1:
|
|
1526
|
+
raise NotImplementedError(
|
|
1527
|
+
"Incremental partition overwrites by more than 1 column are not yet supported."
|
|
1528
|
+
)
|
|
1529
|
+
for x in partition_clause.columns:
|
|
1530
|
+
concept = self.environment.concepts[x.address]
|
|
1531
|
+
if concept.output_datatype not in SUPPORTED_INCREMENTAL_TYPES:
|
|
1532
|
+
raise SyntaxError(
|
|
1533
|
+
f"Cannot incremental persist on concept {concept.address} of type {concept.output_datatype} on line {meta.line}."
|
|
1534
|
+
)
|
|
1535
|
+
elif target:
|
|
1536
|
+
new_datasource = target
|
|
1537
|
+
else:
|
|
1538
|
+
new_datasource = select.to_datasource(
|
|
1539
|
+
namespace=(
|
|
1540
|
+
self.environment.namespace
|
|
1541
|
+
if self.environment.namespace
|
|
1542
|
+
else DEFAULT_NAMESPACE
|
|
1543
|
+
),
|
|
1544
|
+
name=identifier,
|
|
1545
|
+
address=Address(location=address),
|
|
1546
|
+
grain=select.grain,
|
|
1547
|
+
environment=self.environment,
|
|
1548
|
+
)
|
|
1549
|
+
return PersistStatement(
|
|
1550
|
+
select=select,
|
|
1551
|
+
datasource=new_datasource,
|
|
1552
|
+
persist_mode=mode,
|
|
1553
|
+
partition_by=partition_clause.columns if partition_clause else [],
|
|
1554
|
+
meta=Metadata(line_number=meta.line),
|
|
1555
|
+
)
|
|
1556
|
+
|
|
1557
|
+
@v_args(meta=True)
|
|
1558
|
+
def persist_statement(self, meta: Meta, args) -> PersistStatement:
|
|
1559
|
+
return args[0]
|
|
1560
|
+
|
|
1561
|
+
@v_args(meta=True)
|
|
1562
|
+
def align_item(self, meta: Meta, args) -> AlignItem:
|
|
1563
|
+
return AlignItem(
|
|
1564
|
+
alias=args[0],
|
|
1565
|
+
namespace=self.environment.namespace,
|
|
1566
|
+
concepts=[self.environment.concepts[arg].reference for arg in args[1:]],
|
|
1567
|
+
)
|
|
1568
|
+
|
|
1569
|
+
@v_args(meta=True)
|
|
1570
|
+
def align_clause(self, meta: Meta, args) -> AlignClause:
|
|
1571
|
+
return AlignClause(items=args)
|
|
1572
|
+
|
|
1573
|
+
@v_args(meta=True)
|
|
1574
|
+
def derive_item(self, meta: Meta, args) -> DeriveItem:
|
|
1575
|
+
return DeriveItem(
|
|
1576
|
+
expr=args[0], name=args[1], namespace=self.environment.namespace
|
|
1577
|
+
)
|
|
1578
|
+
|
|
1579
|
+
@v_args(meta=True)
|
|
1580
|
+
def derive_clause(self, meta: Meta, args) -> DeriveClause:
|
|
1581
|
+
|
|
1582
|
+
return DeriveClause(items=args)
|
|
1583
|
+
|
|
1584
|
+
@v_args(meta=True)
|
|
1585
|
+
def multi_select_statement(self, meta: Meta, args) -> MultiSelectStatement:
|
|
1586
|
+
|
|
1587
|
+
selects: list[SelectStatement] = []
|
|
1588
|
+
align: AlignClause | None = None
|
|
1589
|
+
limit: int | None = None
|
|
1590
|
+
order_by: OrderBy | None = None
|
|
1591
|
+
where: WhereClause | None = None
|
|
1592
|
+
having: HavingClause | None = None
|
|
1593
|
+
derive: DeriveClause | None = None
|
|
1594
|
+
for arg in args:
|
|
1595
|
+
atype = type(arg)
|
|
1596
|
+
if atype is SelectStatement:
|
|
1597
|
+
selects.append(arg)
|
|
1598
|
+
elif atype is Limit:
|
|
1599
|
+
limit = arg.count
|
|
1600
|
+
elif atype is OrderBy:
|
|
1601
|
+
order_by = arg
|
|
1602
|
+
elif atype is WhereClause:
|
|
1603
|
+
where = arg
|
|
1604
|
+
elif atype is HavingClause:
|
|
1605
|
+
having = arg
|
|
1606
|
+
elif atype is AlignClause:
|
|
1607
|
+
align = arg
|
|
1608
|
+
elif atype is DeriveClause:
|
|
1609
|
+
derive = arg
|
|
1610
|
+
|
|
1611
|
+
assert align
|
|
1612
|
+
assert align is not None
|
|
1613
|
+
|
|
1614
|
+
derived_concepts = []
|
|
1615
|
+
new_selects = [x.as_lineage(self.environment) for x in selects]
|
|
1616
|
+
lineage = MultiSelectLineage(
|
|
1617
|
+
selects=new_selects,
|
|
1618
|
+
align=align,
|
|
1619
|
+
derive=derive,
|
|
1620
|
+
namespace=self.environment.namespace,
|
|
1621
|
+
where_clause=where,
|
|
1622
|
+
having_clause=having,
|
|
1623
|
+
limit=limit,
|
|
1624
|
+
hidden_components=set(y for x in new_selects for y in x.hidden_components),
|
|
1625
|
+
)
|
|
1626
|
+
for x in align.items:
|
|
1627
|
+
concept = align_item_to_concept(
|
|
1628
|
+
x,
|
|
1629
|
+
align,
|
|
1630
|
+
selects,
|
|
1631
|
+
where=where,
|
|
1632
|
+
having=having,
|
|
1633
|
+
limit=limit,
|
|
1634
|
+
environment=self.environment,
|
|
1635
|
+
)
|
|
1636
|
+
derived_concepts.append(concept)
|
|
1637
|
+
self.environment.add_concept(concept, meta=meta)
|
|
1638
|
+
if derive:
|
|
1639
|
+
for derived in derive.items:
|
|
1640
|
+
derivation = derived.expr
|
|
1641
|
+
name = derived.name
|
|
1642
|
+
if not isinstance(derivation, (Function, Comparison, WindowItem)):
|
|
1643
|
+
raise SyntaxError(
|
|
1644
|
+
f"Invalid derive expression {derivation} in {meta.line}, must be a function or conditional"
|
|
1645
|
+
)
|
|
1646
|
+
concept = derive_item_to_concept(
|
|
1647
|
+
derivation, name, lineage, self.environment.namespace
|
|
1648
|
+
)
|
|
1649
|
+
derived_concepts.append(concept)
|
|
1650
|
+
self.environment.add_concept(concept, meta=meta)
|
|
1651
|
+
multi = MultiSelectStatement(
|
|
1652
|
+
selects=selects,
|
|
1653
|
+
align=align,
|
|
1654
|
+
namespace=self.environment.namespace,
|
|
1655
|
+
where_clause=where,
|
|
1656
|
+
order_by=order_by,
|
|
1657
|
+
limit=limit,
|
|
1658
|
+
meta=Metadata(line_number=meta.line),
|
|
1659
|
+
derived_concepts=derived_concepts,
|
|
1660
|
+
derive=derive,
|
|
1661
|
+
)
|
|
1662
|
+
return multi
|
|
1663
|
+
|
|
1664
|
+
@v_args(meta=True)
|
|
1665
|
+
def select_statement(self, meta: Meta, args) -> SelectStatement:
|
|
1666
|
+
select_items: List[SelectItem] | None = None
|
|
1667
|
+
limit: int | None = None
|
|
1668
|
+
order_by: OrderBy | None = None
|
|
1669
|
+
where = None
|
|
1670
|
+
having = None
|
|
1671
|
+
for arg in args:
|
|
1672
|
+
atype = type(arg)
|
|
1673
|
+
if atype is list:
|
|
1674
|
+
select_items = arg
|
|
1675
|
+
elif atype is Limit:
|
|
1676
|
+
limit = arg.count
|
|
1677
|
+
elif atype is OrderBy:
|
|
1678
|
+
order_by = arg
|
|
1679
|
+
elif atype is WhereClause:
|
|
1680
|
+
if where is not None:
|
|
1681
|
+
raise ParseError(
|
|
1682
|
+
"Multiple where clauses defined are not supported!"
|
|
1683
|
+
)
|
|
1684
|
+
where = arg
|
|
1685
|
+
elif atype is HavingClause:
|
|
1686
|
+
having = arg
|
|
1687
|
+
if not select_items:
|
|
1688
|
+
raise ParseError("Malformed select, missing select items")
|
|
1689
|
+
pre_keys = set(self.environment.concepts.keys())
|
|
1690
|
+
base = SelectStatement.from_inputs(
|
|
1691
|
+
environment=self.environment,
|
|
1692
|
+
selection=select_items,
|
|
1693
|
+
order_by=order_by,
|
|
1694
|
+
where_clause=where,
|
|
1695
|
+
having_clause=having,
|
|
1696
|
+
limit=limit,
|
|
1697
|
+
meta=Metadata(line_number=meta.line),
|
|
1698
|
+
)
|
|
1699
|
+
if (
|
|
1700
|
+
self.parse_pass == ParsePass.INITIAL
|
|
1701
|
+
and self.parse_config.strict_name_shadow_enforcement
|
|
1702
|
+
):
|
|
1703
|
+
intersection = base.locally_derived.intersection(pre_keys)
|
|
1704
|
+
if intersection:
|
|
1705
|
+
for x in intersection:
|
|
1706
|
+
if str(base.local_concepts[x].lineage) == str(
|
|
1707
|
+
self.environment.concepts[x].lineage
|
|
1708
|
+
):
|
|
1709
|
+
local = base.local_concepts[x]
|
|
1710
|
+
friendly_name = (
|
|
1711
|
+
local.name
|
|
1712
|
+
if local.namespace == DEFAULT_NAMESPACE
|
|
1713
|
+
else local.namespace
|
|
1714
|
+
)
|
|
1715
|
+
raise NameShadowError(
|
|
1716
|
+
f"Select statement {base} creates a new concept '{friendly_name}' with identical definition as the existing concept '{friendly_name}'. Replace {base.local_concepts[x].lineage} with a direct reference to {friendly_name}."
|
|
1717
|
+
)
|
|
1718
|
+
else:
|
|
1719
|
+
raise NameShadowError(
|
|
1720
|
+
f"Select statement {base} creates new named concepts from calculations {list(intersection)} with identical name(s) to existing concept(s). Use new unique names for these."
|
|
1721
|
+
)
|
|
1722
|
+
return base
|
|
1723
|
+
|
|
1724
|
+
@v_args(meta=True)
|
|
1725
|
+
def address(self, meta: Meta, args):
|
|
1726
|
+
return args[0]
|
|
1727
|
+
|
|
1728
|
+
@v_args(meta=True)
|
|
1729
|
+
def query(self, meta: Meta, args):
|
|
1730
|
+
return Query(text=args[0])
|
|
1731
|
+
|
|
1732
|
+
@v_args(meta=True)
|
|
1733
|
+
def file(self, meta: Meta, args):
|
|
1734
|
+
raw_path = args[0][1:-1]
|
|
1735
|
+
|
|
1736
|
+
# Cloud storage URLs should be used as-is without path resolution
|
|
1737
|
+
cloud_prefixes = ("gcs://", "gs://", "s3://", "https://", "http://")
|
|
1738
|
+
is_cloud = raw_path.startswith(cloud_prefixes)
|
|
1739
|
+
|
|
1740
|
+
if is_cloud:
|
|
1741
|
+
base = raw_path
|
|
1742
|
+
suffix = "." + raw_path.rsplit(".", 1)[-1] if "." in raw_path else ""
|
|
1743
|
+
else:
|
|
1744
|
+
path = Path(raw_path)
|
|
1745
|
+
# if it's a relative path, look it up relative to current parsing directory
|
|
1746
|
+
if path.is_relative_to("."):
|
|
1747
|
+
path = Path(self.environment.working_path) / path
|
|
1748
|
+
base = str(path.resolve().absolute())
|
|
1749
|
+
suffix = path.suffix
|
|
1750
|
+
|
|
1751
|
+
def check_exists():
|
|
1752
|
+
if not is_cloud and not Path(base).exists():
|
|
1753
|
+
raise FileNotFoundError(
|
|
1754
|
+
f"File path {base} does not exist on line {meta.line}"
|
|
1755
|
+
)
|
|
1756
|
+
|
|
1757
|
+
if suffix == ".sql":
|
|
1758
|
+
check_exists()
|
|
1759
|
+
return File(path=base, type=AddressType.SQL)
|
|
1760
|
+
elif suffix == ".py":
|
|
1761
|
+
check_exists()
|
|
1762
|
+
return File(path=base, type=AddressType.PYTHON_SCRIPT)
|
|
1763
|
+
elif suffix == ".csv":
|
|
1764
|
+
return File(path=base, type=AddressType.CSV)
|
|
1765
|
+
elif suffix == ".tsv":
|
|
1766
|
+
return File(path=base, type=AddressType.TSV)
|
|
1767
|
+
elif suffix == ".parquet":
|
|
1768
|
+
return File(path=base, type=AddressType.PARQUET)
|
|
1769
|
+
else:
|
|
1770
|
+
raise ParseError(
|
|
1771
|
+
f"Unsupported file type {suffix} for path {raw_path} on line {meta.line}"
|
|
1772
|
+
)
|
|
1773
|
+
|
|
1774
|
+
def where(self, args):
|
|
1775
|
+
root = args[0]
|
|
1776
|
+
root = expr_to_boolean(root, self.function_factory)
|
|
1777
|
+
return WhereClause(conditional=root)
|
|
1778
|
+
|
|
1779
|
+
def having(self, args):
|
|
1780
|
+
root = args[0]
|
|
1781
|
+
if not isinstance(root, (Comparison, Conditional, Parenthetical)):
|
|
1782
|
+
if arg_to_datatype(root) == DataType.BOOL:
|
|
1783
|
+
root = Comparison(left=root, right=True, operator=ComparisonOperator.EQ)
|
|
1784
|
+
else:
|
|
1785
|
+
root = Comparison(
|
|
1786
|
+
left=root,
|
|
1787
|
+
right=MagicConstants.NULL,
|
|
1788
|
+
operator=ComparisonOperator.IS_NOT,
|
|
1789
|
+
)
|
|
1790
|
+
return HavingClause(conditional=root)
|
|
1791
|
+
|
|
1792
|
+
@v_args(meta=True)
|
|
1793
|
+
def function_binding_list(self, meta: Meta, args) -> list[ArgBinding]:
|
|
1794
|
+
return args
|
|
1795
|
+
|
|
1796
|
+
@v_args(meta=True)
|
|
1797
|
+
def function_binding_type(self, meta: Meta, args) -> FunctionBindingType:
|
|
1798
|
+
return FunctionBindingType(type=args[0])
|
|
1799
|
+
|
|
1800
|
+
@v_args(meta=True)
|
|
1801
|
+
def function_binding_default(self, meta: Meta, args):
|
|
1802
|
+
return args[1]
|
|
1803
|
+
|
|
1804
|
+
@v_args(meta=True)
|
|
1805
|
+
def function_binding_item(self, meta: Meta, args) -> ArgBinding:
|
|
1806
|
+
default = None
|
|
1807
|
+
type = None
|
|
1808
|
+
for arg in args[1:]:
|
|
1809
|
+
if isinstance(arg, FunctionBindingType):
|
|
1810
|
+
type = arg.type
|
|
1811
|
+
else:
|
|
1812
|
+
default = arg
|
|
1813
|
+
return ArgBinding.model_construct(name=args[0], datatype=type, default=default)
|
|
1814
|
+
|
|
1815
|
+
@v_args(meta=True)
|
|
1816
|
+
def raw_function(self, meta: Meta, args) -> FunctionDeclaration:
|
|
1817
|
+
identity = args[0]
|
|
1818
|
+
function_arguments: list[ArgBinding] = args[1]
|
|
1819
|
+
output = args[2]
|
|
1820
|
+
|
|
1821
|
+
self.environment.functions[identity] = CustomFunctionFactory(
|
|
1822
|
+
function=output,
|
|
1823
|
+
namespace=self.environment.namespace,
|
|
1824
|
+
function_arguments=function_arguments,
|
|
1825
|
+
name=identity,
|
|
1826
|
+
)
|
|
1827
|
+
return FunctionDeclaration(name=identity, args=function_arguments, expr=output)
|
|
1828
|
+
|
|
1829
|
+
def custom_function(self, args) -> FunctionCallWrapper:
|
|
1830
|
+
name = args[0]
|
|
1831
|
+
args = args[1:]
|
|
1832
|
+
remapped = FunctionCallWrapper(
|
|
1833
|
+
content=self.environment.functions[name](*args), name=name, args=args
|
|
1834
|
+
)
|
|
1835
|
+
|
|
1836
|
+
return remapped
|
|
1837
|
+
|
|
1838
|
+
@v_args(meta=True)
|
|
1839
|
+
def function(self, meta: Meta, args) -> Function:
|
|
1840
|
+
return args[0]
|
|
1841
|
+
|
|
1842
|
+
@v_args(meta=True)
|
|
1843
|
+
def type_drop_clause(self, meta: Meta, args) -> DropOn:
|
|
1844
|
+
return DropOn([FunctionType(x) for x in args])
|
|
1845
|
+
|
|
1846
|
+
@v_args(meta=True)
|
|
1847
|
+
def type_add_clause(self, meta: Meta, args) -> AddOn:
|
|
1848
|
+
return AddOn([FunctionType(x) for x in args])
|
|
1849
|
+
|
|
1850
|
+
@v_args(meta=True)
|
|
1851
|
+
def type_declaration(self, meta: Meta, args) -> TypeDeclaration:
|
|
1852
|
+
key = args[0]
|
|
1853
|
+
datatype: list[DataType] = [x for x in args[1:] if isinstance(x, DataType)]
|
|
1854
|
+
if len(datatype) == 1:
|
|
1855
|
+
final_datatype: list[DataType] | DataType = datatype[0]
|
|
1856
|
+
else:
|
|
1857
|
+
final_datatype = datatype
|
|
1858
|
+
add_on = None
|
|
1859
|
+
drop_on = None
|
|
1860
|
+
for x in args[1:]:
|
|
1861
|
+
if isinstance(x, AddOn):
|
|
1862
|
+
add_on = x
|
|
1863
|
+
elif isinstance(x, DropOn):
|
|
1864
|
+
drop_on = x
|
|
1865
|
+
new = CustomType(
|
|
1866
|
+
name=key,
|
|
1867
|
+
type=final_datatype,
|
|
1868
|
+
drop_on=drop_on.functions if drop_on else [],
|
|
1869
|
+
add_on=add_on.functions if add_on else [],
|
|
1870
|
+
)
|
|
1871
|
+
self.environment.data_types[key] = new
|
|
1872
|
+
return TypeDeclaration(type=new)
|
|
1873
|
+
|
|
1874
|
+
def int_lit(self, args):
|
|
1875
|
+
return int("".join(args))
|
|
1876
|
+
|
|
1877
|
+
def bool_lit(self, args):
|
|
1878
|
+
return args[0].capitalize() == "True"
|
|
1879
|
+
|
|
1880
|
+
def null_lit(self, args):
|
|
1881
|
+
return NULL_VALUE
|
|
1882
|
+
|
|
1883
|
+
def float_lit(self, args):
|
|
1884
|
+
return float(args[0])
|
|
1885
|
+
|
|
1886
|
+
def array_lit(self, args):
|
|
1887
|
+
return list_to_wrapper(args)
|
|
1888
|
+
|
|
1889
|
+
def tuple_lit(self, args):
|
|
1890
|
+
return tuple_to_wrapper(args)
|
|
1891
|
+
|
|
1892
|
+
def string_lit(self, args) -> str:
|
|
1893
|
+
if not args:
|
|
1894
|
+
return ""
|
|
1895
|
+
|
|
1896
|
+
return args[0]
|
|
1897
|
+
|
|
1898
|
+
@v_args(meta=True)
|
|
1899
|
+
def struct_lit(self, meta, args):
|
|
1900
|
+
return self.function_factory.create_function(
|
|
1901
|
+
args, operator=FunctionType.STRUCT, meta=meta
|
|
1902
|
+
)
|
|
1903
|
+
|
|
1904
|
+
def map_lit(self, args):
|
|
1905
|
+
parsed = dict(zip(args[::2], args[1::2]))
|
|
1906
|
+
wrapped = dict_to_map_wrapper(parsed)
|
|
1907
|
+
return wrapped
|
|
1908
|
+
|
|
1909
|
+
def literal(self, args):
|
|
1910
|
+
return args[0]
|
|
1911
|
+
|
|
1912
|
+
def product_operator(self, args) -> Function | Any:
|
|
1913
|
+
if len(args) == 1:
|
|
1914
|
+
return args[0]
|
|
1915
|
+
result = args[0]
|
|
1916
|
+
for i in range(1, len(args), 2):
|
|
1917
|
+
new_result = None
|
|
1918
|
+
op = args[i]
|
|
1919
|
+
right = args[i + 1]
|
|
1920
|
+
if op == "*":
|
|
1921
|
+
new_result = self.function_factory.create_function(
|
|
1922
|
+
[result, right], operator=FunctionType.MULTIPLY
|
|
1923
|
+
)
|
|
1924
|
+
elif op == "**":
|
|
1925
|
+
new_result = self.function_factory.create_function(
|
|
1926
|
+
[result, right], operator=FunctionType.POWER
|
|
1927
|
+
)
|
|
1928
|
+
elif op == "/":
|
|
1929
|
+
new_result = self.function_factory.create_function(
|
|
1930
|
+
[result, right], operator=FunctionType.DIVIDE
|
|
1931
|
+
)
|
|
1932
|
+
elif op == "%":
|
|
1933
|
+
new_result = self.function_factory.create_function(
|
|
1934
|
+
[result, right], operator=FunctionType.MOD
|
|
1935
|
+
)
|
|
1936
|
+
else:
|
|
1937
|
+
raise ValueError(f"Unknown operator: {op}")
|
|
1938
|
+
result = new_result
|
|
1939
|
+
return new_result
|
|
1940
|
+
|
|
1941
|
+
def PLUS_OR_MINUS(self, args) -> str:
|
|
1942
|
+
return args.value
|
|
1943
|
+
|
|
1944
|
+
def MULTIPLY_DIVIDE_PERCENT(self, args) -> str:
|
|
1945
|
+
return args.value
|
|
1946
|
+
|
|
1947
|
+
@v_args(meta=True)
|
|
1948
|
+
def sum_operator(self, meta: Meta, args) -> Function | Any:
|
|
1949
|
+
if len(args) == 1:
|
|
1950
|
+
return args[0]
|
|
1951
|
+
result = args[0]
|
|
1952
|
+
for i in range(1, len(args), 2):
|
|
1953
|
+
new_result = None
|
|
1954
|
+
op = args[i].lower()
|
|
1955
|
+
right = args[i + 1]
|
|
1956
|
+
if op == "+":
|
|
1957
|
+
new_result = self.function_factory.create_function(
|
|
1958
|
+
[result, right], operator=FunctionType.ADD, meta=meta
|
|
1959
|
+
)
|
|
1960
|
+
elif op == "-":
|
|
1961
|
+
new_result = self.function_factory.create_function(
|
|
1962
|
+
[result, right], operator=FunctionType.SUBTRACT, meta=meta
|
|
1963
|
+
)
|
|
1964
|
+
elif op == "||":
|
|
1965
|
+
new_result = self.function_factory.create_function(
|
|
1966
|
+
[result, right], operator=FunctionType.CONCAT, meta=meta
|
|
1967
|
+
)
|
|
1968
|
+
elif op == "like":
|
|
1969
|
+
new_result = self.function_factory.create_function(
|
|
1970
|
+
[result, right], operator=FunctionType.LIKE, meta=meta
|
|
1971
|
+
)
|
|
1972
|
+
else:
|
|
1973
|
+
raise ValueError(f"Unknown operator: {op}")
|
|
1974
|
+
result = new_result
|
|
1975
|
+
return result
|
|
1976
|
+
|
|
1977
|
+
def comparison(self, args) -> Comparison:
|
|
1978
|
+
if len(args) == 1:
|
|
1979
|
+
return args[0]
|
|
1980
|
+
left = args[0]
|
|
1981
|
+
right = args[2]
|
|
1982
|
+
if args[1] in (ComparisonOperator.IN, ComparisonOperator.NOT_IN):
|
|
1983
|
+
return SubselectComparison(
|
|
1984
|
+
left=left,
|
|
1985
|
+
right=right,
|
|
1986
|
+
operator=args[1],
|
|
1987
|
+
)
|
|
1988
|
+
return Comparison(left=left, right=right, operator=args[1])
|
|
1989
|
+
|
|
1990
|
+
def between_comparison(self, args) -> Conditional:
|
|
1991
|
+
left_bound = args[1]
|
|
1992
|
+
right_bound = args[2]
|
|
1993
|
+
return Conditional(
|
|
1994
|
+
left=Comparison(
|
|
1995
|
+
left=args[0], right=left_bound, operator=ComparisonOperator.GTE
|
|
1996
|
+
),
|
|
1997
|
+
right=Comparison(
|
|
1998
|
+
left=args[0], right=right_bound, operator=ComparisonOperator.LTE
|
|
1999
|
+
),
|
|
2000
|
+
operator=BooleanOperator.AND,
|
|
2001
|
+
)
|
|
2002
|
+
|
|
2003
|
+
@v_args(meta=True)
|
|
2004
|
+
def subselect_comparison(self, meta: Meta, args) -> SubselectComparison:
|
|
2005
|
+
right = args[2]
|
|
2006
|
+
|
|
2007
|
+
while isinstance(right, Parenthetical) and isinstance(
|
|
2008
|
+
right.content,
|
|
2009
|
+
(
|
|
2010
|
+
Concept,
|
|
2011
|
+
Function,
|
|
2012
|
+
FilterItem,
|
|
2013
|
+
WindowItem,
|
|
2014
|
+
AggregateWrapper,
|
|
2015
|
+
ListWrapper,
|
|
2016
|
+
TupleWrapper,
|
|
2017
|
+
),
|
|
2018
|
+
):
|
|
2019
|
+
right = right.content
|
|
2020
|
+
if isinstance(right, (Function, FilterItem, WindowItem, AggregateWrapper)):
|
|
2021
|
+
right_concept = arbitrary_to_concept(right, environment=self.environment)
|
|
2022
|
+
self.environment.add_concept(right_concept, meta=meta)
|
|
2023
|
+
right = right_concept.reference
|
|
2024
|
+
return SubselectComparison(
|
|
2025
|
+
left=args[0],
|
|
2026
|
+
right=right,
|
|
2027
|
+
operator=args[1],
|
|
2028
|
+
)
|
|
2029
|
+
|
|
2030
|
+
def expr_tuple(self, args):
|
|
2031
|
+
datatypes = set([arg_to_datatype(x) for x in args])
|
|
2032
|
+
if len(datatypes) != 1:
|
|
2033
|
+
raise ParseError("Tuple must have same type for all elements")
|
|
2034
|
+
return TupleWrapper(val=tuple(args), type=datatypes.pop())
|
|
2035
|
+
|
|
2036
|
+
def parenthetical(self, args):
|
|
2037
|
+
return Parenthetical(content=args[0])
|
|
2038
|
+
|
|
2039
|
+
@v_args(meta=True)
|
|
2040
|
+
def condition_parenthetical(self, meta, args):
|
|
2041
|
+
if len(args) == 2:
|
|
2042
|
+
return Comparison(
|
|
2043
|
+
left=Parenthetical(content=args[1]),
|
|
2044
|
+
right=False,
|
|
2045
|
+
operator=ComparisonOperator.EQ,
|
|
2046
|
+
)
|
|
2047
|
+
return Parenthetical(content=args[0])
|
|
2048
|
+
|
|
2049
|
+
def conditional(self, args):
|
|
2050
|
+
def munch_args(args):
|
|
2051
|
+
while args:
|
|
2052
|
+
if len(args) == 1:
|
|
2053
|
+
return args[0]
|
|
2054
|
+
else:
|
|
2055
|
+
return Conditional(
|
|
2056
|
+
left=args[0], operator=args[1], right=munch_args(args[2:])
|
|
2057
|
+
)
|
|
2058
|
+
|
|
2059
|
+
return munch_args(args)
|
|
2060
|
+
|
|
2061
|
+
def window_order(self, args):
|
|
2062
|
+
return WindowOrder(args[0])
|
|
2063
|
+
|
|
2064
|
+
def window_order_by(self, args):
|
|
2065
|
+
# flatten tree
|
|
2066
|
+
return args[0]
|
|
2067
|
+
|
|
2068
|
+
def window(self, args):
|
|
2069
|
+
|
|
2070
|
+
return Window(count=args[1].value, window_order=args[0])
|
|
2071
|
+
|
|
2072
|
+
def WINDOW_TYPE(self, args):
|
|
2073
|
+
return WindowType(args.strip())
|
|
2074
|
+
|
|
2075
|
+
def window_item_over(self, args):
|
|
2076
|
+
|
|
2077
|
+
return WindowItemOver(contents=args[0])
|
|
2078
|
+
|
|
2079
|
+
def window_item_order(self, args):
|
|
2080
|
+
return WindowItemOrder(contents=args[0])
|
|
2081
|
+
|
|
2082
|
+
def logical_operator(self, args):
|
|
2083
|
+
return BooleanOperator(args[0].value.lower())
|
|
2084
|
+
|
|
2085
|
+
def DATE_PART(self, args):
|
|
2086
|
+
return DatePart(args.value)
|
|
2087
|
+
|
|
2088
|
+
@v_args(meta=True)
|
|
2089
|
+
def window_item(self, meta: Meta, args) -> WindowItem:
|
|
2090
|
+
type: WindowType = args[0]
|
|
2091
|
+
order_by = []
|
|
2092
|
+
over = []
|
|
2093
|
+
index = None
|
|
2094
|
+
concept: Concept | None = None
|
|
2095
|
+
for item in args:
|
|
2096
|
+
if isinstance(item, int):
|
|
2097
|
+
index = item
|
|
2098
|
+
elif isinstance(item, WindowItemOrder):
|
|
2099
|
+
order_by = item.contents
|
|
2100
|
+
elif isinstance(item, WindowItemOver):
|
|
2101
|
+
over = item.contents
|
|
2102
|
+
elif isinstance(item, str):
|
|
2103
|
+
concept = self.environment.concepts[item]
|
|
2104
|
+
elif isinstance(item, ConceptRef):
|
|
2105
|
+
concept = self.environment.concepts[item.address]
|
|
2106
|
+
elif isinstance(item, WindowType):
|
|
2107
|
+
type = item
|
|
2108
|
+
else:
|
|
2109
|
+
concept = arbitrary_to_concept(item, environment=self.environment)
|
|
2110
|
+
self.environment.add_concept(concept, meta=meta)
|
|
2111
|
+
if not concept:
|
|
2112
|
+
raise ParseError(
|
|
2113
|
+
f"Window statements must be on fields, not constants - error in: `{self.text_lookup[self.parse_address][meta.start_pos:meta.end_pos]}`"
|
|
2114
|
+
)
|
|
2115
|
+
return WindowItem(
|
|
2116
|
+
type=type,
|
|
2117
|
+
content=concept.reference,
|
|
2118
|
+
over=over,
|
|
2119
|
+
order_by=order_by,
|
|
2120
|
+
index=index,
|
|
2121
|
+
)
|
|
2122
|
+
|
|
2123
|
+
def filter_item(self, args) -> FilterItem:
|
|
2124
|
+
where: WhereClause
|
|
2125
|
+
expr, raw = args
|
|
2126
|
+
if isinstance(raw, WhereClause):
|
|
2127
|
+
where = raw
|
|
2128
|
+
else:
|
|
2129
|
+
where = WhereClause.model_construct(
|
|
2130
|
+
conditional=expr_to_boolean(raw, self.function_factory)
|
|
2131
|
+
)
|
|
2132
|
+
if isinstance(expr, str):
|
|
2133
|
+
expr = self.environment.concepts[expr].reference
|
|
2134
|
+
return FilterItem(content=expr, where=where)
|
|
2135
|
+
|
|
2136
|
+
# BEGIN FUNCTIONS
|
|
2137
|
+
@v_args(meta=True)
|
|
2138
|
+
def expr_reference(self, meta, args) -> Concept:
|
|
2139
|
+
return self.environment.concepts.__getitem__(args[0], meta.line)
|
|
2140
|
+
|
|
2141
|
+
def expr(self, args):
|
|
2142
|
+
if len(args) > 1:
|
|
2143
|
+
raise ParseError("Expression should have one child only.")
|
|
2144
|
+
return args[0]
|
|
2145
|
+
|
|
2146
|
+
def aggregate_over(self, args):
|
|
2147
|
+
return args[0]
|
|
2148
|
+
|
|
2149
|
+
def aggregate_all(self, args):
|
|
2150
|
+
return [
|
|
2151
|
+
ConceptRef(
|
|
2152
|
+
address=f"{INTERNAL_NAMESPACE}.{ALL_ROWS_CONCEPT}",
|
|
2153
|
+
datatype=DataType.INTEGER,
|
|
2154
|
+
)
|
|
2155
|
+
]
|
|
2156
|
+
|
|
2157
|
+
def aggregate_functions(self, args):
|
|
2158
|
+
if len(args) == 2:
|
|
2159
|
+
return AggregateWrapper(function=args[0], by=args[1])
|
|
2160
|
+
return AggregateWrapper(function=args[0])
|
|
2161
|
+
|
|
2162
|
+
@v_args(meta=True)
|
|
2163
|
+
def index_access(self, meta, args):
|
|
2164
|
+
args = process_function_args(args, meta=meta, environment=self.environment)
|
|
2165
|
+
base = args[0]
|
|
2166
|
+
if base.datatype == DataType.MAP or isinstance(base.datatype, MapType):
|
|
2167
|
+
return self.function_factory.create_function(
|
|
2168
|
+
args, FunctionType.MAP_ACCESS, meta
|
|
2169
|
+
)
|
|
2170
|
+
return self.function_factory.create_function(
|
|
2171
|
+
args, FunctionType.INDEX_ACCESS, meta
|
|
2172
|
+
)
|
|
2173
|
+
|
|
2174
|
+
@v_args(meta=True)
|
|
2175
|
+
def map_key_access(self, meta, args):
|
|
2176
|
+
return self.function_factory.create_function(
|
|
2177
|
+
args, FunctionType.MAP_ACCESS, meta
|
|
2178
|
+
)
|
|
2179
|
+
|
|
2180
|
+
@v_args(meta=True)
|
|
2181
|
+
def attr_access(self, meta, args):
|
|
2182
|
+
return self.function_factory.create_function(
|
|
2183
|
+
args, FunctionType.ATTR_ACCESS, meta
|
|
2184
|
+
)
|
|
2185
|
+
|
|
2186
|
+
@v_args(meta=True)
|
|
2187
|
+
def fcoalesce(self, meta, args):
|
|
2188
|
+
return self.function_factory.create_function(args, FunctionType.COALESCE, meta)
|
|
2189
|
+
|
|
2190
|
+
@v_args(meta=True)
|
|
2191
|
+
def fnullif(self, meta, args):
|
|
2192
|
+
return self.function_factory.create_function(args, FunctionType.NULLIF, meta)
|
|
2193
|
+
|
|
2194
|
+
@v_args(meta=True)
|
|
2195
|
+
def frecurse_edge(self, meta, args):
|
|
2196
|
+
return self.function_factory.create_function(
|
|
2197
|
+
args, FunctionType.RECURSE_EDGE, meta
|
|
2198
|
+
)
|
|
2199
|
+
|
|
2200
|
+
@v_args(meta=True)
|
|
2201
|
+
def unnest(self, meta, args):
|
|
2202
|
+
|
|
2203
|
+
return self.function_factory.create_function(args, FunctionType.UNNEST, meta)
|
|
2204
|
+
|
|
2205
|
+
@v_args(meta=True)
|
|
2206
|
+
def count(self, meta, args):
|
|
2207
|
+
return self.function_factory.create_function(args, FunctionType.COUNT, meta)
|
|
2208
|
+
|
|
2209
|
+
@v_args(meta=True)
|
|
2210
|
+
def fgroup(self, meta, args):
|
|
2211
|
+
if len(args) == 2:
|
|
2212
|
+
fargs = [args[0]] + list(args[1])
|
|
2213
|
+
else:
|
|
2214
|
+
fargs = [args[0]]
|
|
2215
|
+
return self.function_factory.create_function(fargs, FunctionType.GROUP, meta)
|
|
2216
|
+
|
|
2217
|
+
@v_args(meta=True)
|
|
2218
|
+
def fabs(self, meta, args):
|
|
2219
|
+
return self.function_factory.create_function(args, FunctionType.ABS, meta)
|
|
2220
|
+
|
|
2221
|
+
@v_args(meta=True)
|
|
2222
|
+
def count_distinct(self, meta, args):
|
|
2223
|
+
return self.function_factory.create_function(
|
|
2224
|
+
args, FunctionType.COUNT_DISTINCT, meta
|
|
2225
|
+
)
|
|
2226
|
+
|
|
2227
|
+
@v_args(meta=True)
|
|
2228
|
+
def sum(self, meta, args):
|
|
2229
|
+
return self.function_factory.create_function(args, FunctionType.SUM, meta)
|
|
2230
|
+
|
|
2231
|
+
@v_args(meta=True)
|
|
2232
|
+
def array_agg(self, meta, args):
|
|
2233
|
+
return self.function_factory.create_function(args, FunctionType.ARRAY_AGG, meta)
|
|
2234
|
+
|
|
2235
|
+
@v_args(meta=True)
|
|
2236
|
+
def any(self, meta, args):
|
|
2237
|
+
return self.function_factory.create_function(args, FunctionType.ANY, meta)
|
|
2238
|
+
|
|
2239
|
+
@v_args(meta=True)
|
|
2240
|
+
def bool_and(self, meta, args):
|
|
2241
|
+
return self.function_factory.create_function(args, FunctionType.BOOL_AND, meta)
|
|
2242
|
+
|
|
2243
|
+
@v_args(meta=True)
|
|
2244
|
+
def bool_or(self, meta, args):
|
|
2245
|
+
return self.function_factory.create_function(args, FunctionType.BOOL_OR, meta)
|
|
2246
|
+
|
|
2247
|
+
@v_args(meta=True)
|
|
2248
|
+
def avg(self, meta, args):
|
|
2249
|
+
return self.function_factory.create_function(args, FunctionType.AVG, meta)
|
|
2250
|
+
|
|
2251
|
+
@v_args(meta=True)
|
|
2252
|
+
def max(self, meta, args):
|
|
2253
|
+
return self.function_factory.create_function(args, FunctionType.MAX, meta)
|
|
2254
|
+
|
|
2255
|
+
@v_args(meta=True)
|
|
2256
|
+
def min(self, meta, args):
|
|
2257
|
+
return self.function_factory.create_function(args, FunctionType.MIN, meta)
|
|
2258
|
+
|
|
2259
|
+
@v_args(meta=True)
|
|
2260
|
+
def len(self, meta, args):
|
|
2261
|
+
return self.function_factory.create_function(args, FunctionType.LENGTH, meta)
|
|
2262
|
+
|
|
2263
|
+
@v_args(meta=True)
|
|
2264
|
+
def fsplit(self, meta, args):
|
|
2265
|
+
return self.function_factory.create_function(args, FunctionType.SPLIT, meta)
|
|
2266
|
+
|
|
2267
|
+
@v_args(meta=True)
|
|
2268
|
+
def concat(self, meta, args):
|
|
2269
|
+
return self.function_factory.create_function(args, FunctionType.CONCAT, meta)
|
|
2270
|
+
|
|
2271
|
+
@v_args(meta=True)
|
|
2272
|
+
def union(self, meta, args):
|
|
2273
|
+
return self.function_factory.create_function(args, FunctionType.UNION, meta)
|
|
2274
|
+
|
|
2275
|
+
@v_args(meta=True)
|
|
2276
|
+
def like(self, meta, args):
|
|
2277
|
+
return self.function_factory.create_function(args, FunctionType.LIKE, meta)
|
|
2278
|
+
|
|
2279
|
+
@v_args(meta=True)
|
|
2280
|
+
def alt_like(self, meta, args):
|
|
2281
|
+
return self.function_factory.create_function(args, FunctionType.LIKE, meta)
|
|
2282
|
+
|
|
2283
|
+
@v_args(meta=True)
|
|
2284
|
+
def ilike(self, meta, args):
|
|
2285
|
+
return self.function_factory.create_function(args, FunctionType.LIKE, meta)
|
|
2286
|
+
|
|
2287
|
+
@v_args(meta=True)
|
|
2288
|
+
def upper(self, meta, args):
|
|
2289
|
+
return self.function_factory.create_function(args, FunctionType.UPPER, meta)
|
|
2290
|
+
|
|
2291
|
+
@v_args(meta=True)
|
|
2292
|
+
def fstrpos(self, meta, args):
|
|
2293
|
+
return self.function_factory.create_function(args, FunctionType.STRPOS, meta)
|
|
2294
|
+
|
|
2295
|
+
@v_args(meta=True)
|
|
2296
|
+
def freplace(self, meta, args):
|
|
2297
|
+
return self.function_factory.create_function(args, FunctionType.REPLACE, meta)
|
|
2298
|
+
|
|
2299
|
+
@v_args(meta=True)
|
|
2300
|
+
def fcontains(self, meta, args):
|
|
2301
|
+
return self.function_factory.create_function(args, FunctionType.CONTAINS, meta)
|
|
2302
|
+
|
|
2303
|
+
@v_args(meta=True)
|
|
2304
|
+
def ftrim(self, meta, args):
|
|
2305
|
+
return self.function_factory.create_function(args, FunctionType.TRIM, meta)
|
|
2306
|
+
|
|
2307
|
+
@v_args(meta=True)
|
|
2308
|
+
def fhash(self, meta, args):
|
|
2309
|
+
return self.function_factory.create_function(args, FunctionType.HASH, meta)
|
|
2310
|
+
|
|
2311
|
+
@v_args(meta=True)
|
|
2312
|
+
def fsubstring(self, meta, args):
|
|
2313
|
+
return self.function_factory.create_function(args, FunctionType.SUBSTRING, meta)
|
|
2314
|
+
|
|
2315
|
+
@v_args(meta=True)
|
|
2316
|
+
def flower(self, meta, args):
|
|
2317
|
+
return self.function_factory.create_function(args, FunctionType.LOWER, meta)
|
|
2318
|
+
|
|
2319
|
+
@v_args(meta=True)
|
|
2320
|
+
def fregexp_contains(self, meta, args):
|
|
2321
|
+
return self.function_factory.create_function(
|
|
2322
|
+
args, FunctionType.REGEXP_CONTAINS, meta
|
|
2323
|
+
)
|
|
2324
|
+
|
|
2325
|
+
@v_args(meta=True)
|
|
2326
|
+
def fregexp_extract(self, meta, args):
|
|
2327
|
+
if len(args) == 2:
|
|
2328
|
+
# this is a magic value to represent the default behavior
|
|
2329
|
+
args.append(-1)
|
|
2330
|
+
return self.function_factory.create_function(
|
|
2331
|
+
args, FunctionType.REGEXP_EXTRACT, meta
|
|
2332
|
+
)
|
|
2333
|
+
|
|
2334
|
+
@v_args(meta=True)
|
|
2335
|
+
def fregexp_replace(self, meta, args):
|
|
2336
|
+
return self.function_factory.create_function(
|
|
2337
|
+
args, FunctionType.REGEXP_REPLACE, meta
|
|
2338
|
+
)
|
|
2339
|
+
|
|
2340
|
+
# date functions
|
|
2341
|
+
@v_args(meta=True)
|
|
2342
|
+
def fdate(self, meta, args):
|
|
2343
|
+
return self.function_factory.create_function(args, FunctionType.DATE, meta)
|
|
2344
|
+
|
|
2345
|
+
@v_args(meta=True)
|
|
2346
|
+
def fdate_trunc(self, meta, args):
|
|
2347
|
+
return self.function_factory.create_function(
|
|
2348
|
+
args, FunctionType.DATE_TRUNCATE, meta
|
|
2349
|
+
)
|
|
2350
|
+
|
|
2351
|
+
@v_args(meta=True)
|
|
2352
|
+
def fdate_part(self, meta, args):
|
|
2353
|
+
return self.function_factory.create_function(args, FunctionType.DATE_PART, meta)
|
|
2354
|
+
|
|
2355
|
+
@v_args(meta=True)
|
|
2356
|
+
def fdate_add(self, meta, args):
|
|
2357
|
+
return self.function_factory.create_function(args, FunctionType.DATE_ADD, meta)
|
|
2358
|
+
|
|
2359
|
+
@v_args(meta=True)
|
|
2360
|
+
def fdate_sub(self, meta, args):
|
|
2361
|
+
return self.function_factory.create_function(args, FunctionType.DATE_SUB, meta)
|
|
2362
|
+
|
|
2363
|
+
@v_args(meta=True)
|
|
2364
|
+
def fdate_diff(self, meta, args):
|
|
2365
|
+
return self.function_factory.create_function(args, FunctionType.DATE_DIFF, meta)
|
|
2366
|
+
|
|
2367
|
+
@v_args(meta=True)
|
|
2368
|
+
def fdatetime(self, meta, args):
|
|
2369
|
+
return self.function_factory.create_function(args, FunctionType.DATETIME, meta)
|
|
2370
|
+
|
|
2371
|
+
@v_args(meta=True)
|
|
2372
|
+
def ftimestamp(self, meta, args):
|
|
2373
|
+
return self.function_factory.create_function(args, FunctionType.TIMESTAMP, meta)
|
|
2374
|
+
|
|
2375
|
+
@v_args(meta=True)
|
|
2376
|
+
def fsecond(self, meta, args):
|
|
2377
|
+
return self.function_factory.create_function(args, FunctionType.SECOND, meta)
|
|
2378
|
+
|
|
2379
|
+
@v_args(meta=True)
|
|
2380
|
+
def fminute(self, meta, args):
|
|
2381
|
+
return self.function_factory.create_function(args, FunctionType.MINUTE, meta)
|
|
2382
|
+
|
|
2383
|
+
@v_args(meta=True)
|
|
2384
|
+
def fhour(self, meta, args):
|
|
2385
|
+
return self.function_factory.create_function(args, FunctionType.HOUR, meta)
|
|
2386
|
+
|
|
2387
|
+
@v_args(meta=True)
|
|
2388
|
+
def fday(self, meta, args):
|
|
2389
|
+
return self.function_factory.create_function(args, FunctionType.DAY, meta)
|
|
2390
|
+
|
|
2391
|
+
@v_args(meta=True)
|
|
2392
|
+
def fday_name(self, meta, args):
|
|
2393
|
+
return self.function_factory.create_function(args, FunctionType.DAY_NAME, meta)
|
|
2394
|
+
|
|
2395
|
+
@v_args(meta=True)
|
|
2396
|
+
def fday_of_week(self, meta, args):
|
|
2397
|
+
return self.function_factory.create_function(
|
|
2398
|
+
args, FunctionType.DAY_OF_WEEK, meta
|
|
2399
|
+
)
|
|
2400
|
+
|
|
2401
|
+
@v_args(meta=True)
|
|
2402
|
+
def fweek(self, meta, args):
|
|
2403
|
+
return self.function_factory.create_function(args, FunctionType.WEEK, meta)
|
|
2404
|
+
|
|
2405
|
+
@v_args(meta=True)
|
|
2406
|
+
def fmonth(self, meta, args):
|
|
2407
|
+
return self.function_factory.create_function(args, FunctionType.MONTH, meta)
|
|
2408
|
+
|
|
2409
|
+
@v_args(meta=True)
|
|
2410
|
+
def fmonth_name(self, meta, args):
|
|
2411
|
+
return self.function_factory.create_function(
|
|
2412
|
+
args, FunctionType.MONTH_NAME, meta
|
|
2413
|
+
)
|
|
2414
|
+
|
|
2415
|
+
@v_args(meta=True)
|
|
2416
|
+
def fquarter(self, meta, args):
|
|
2417
|
+
return self.function_factory.create_function(args, FunctionType.QUARTER, meta)
|
|
2418
|
+
|
|
2419
|
+
@v_args(meta=True)
|
|
2420
|
+
def fyear(self, meta, args):
|
|
2421
|
+
return self.function_factory.create_function(args, FunctionType.YEAR, meta)
|
|
2422
|
+
|
|
2423
|
+
def internal_fcast(self, meta, args) -> Function:
|
|
2424
|
+
args = process_function_args(args, meta=meta, environment=self.environment)
|
|
2425
|
+
|
|
2426
|
+
# Destructure for readability
|
|
2427
|
+
value, dtype = args[0], args[1]
|
|
2428
|
+
processed: Any
|
|
2429
|
+
if isinstance(value, str):
|
|
2430
|
+
match dtype:
|
|
2431
|
+
case DataType.DATE:
|
|
2432
|
+
processed = date.fromisoformat(value)
|
|
2433
|
+
case DataType.DATETIME | DataType.TIMESTAMP:
|
|
2434
|
+
processed = datetime.fromisoformat(value)
|
|
2435
|
+
case DataType.INTEGER:
|
|
2436
|
+
processed = int(value)
|
|
2437
|
+
case DataType.FLOAT:
|
|
2438
|
+
processed = float(value)
|
|
2439
|
+
case DataType.BOOL:
|
|
2440
|
+
processed = value.capitalize() == "True"
|
|
2441
|
+
case DataType.STRING:
|
|
2442
|
+
processed = value
|
|
2443
|
+
case _:
|
|
2444
|
+
raise SyntaxError(f"Invalid cast type {dtype}")
|
|
2445
|
+
|
|
2446
|
+
# Determine function type and arguments
|
|
2447
|
+
if isinstance(dtype, TraitDataType):
|
|
2448
|
+
return self.function_factory.create_function(
|
|
2449
|
+
[processed, dtype], FunctionType.TYPED_CONSTANT, meta
|
|
2450
|
+
)
|
|
2451
|
+
|
|
2452
|
+
return self.function_factory.create_function(
|
|
2453
|
+
[processed], FunctionType.CONSTANT, meta
|
|
2454
|
+
)
|
|
2455
|
+
|
|
2456
|
+
return self.function_factory.create_function(args, FunctionType.CAST, meta)
|
|
2457
|
+
|
|
2458
|
+
@v_args(meta=True)
|
|
2459
|
+
def fdate_spine(self, meta, args) -> Function:
|
|
2460
|
+
return self.function_factory.create_function(
|
|
2461
|
+
args, FunctionType.DATE_SPINE, meta
|
|
2462
|
+
)
|
|
2463
|
+
|
|
2464
|
+
# utility functions
|
|
2465
|
+
@v_args(meta=True)
|
|
2466
|
+
def fcast(self, meta, args) -> Function:
|
|
2467
|
+
return self.internal_fcast(meta, args)
|
|
2468
|
+
|
|
2469
|
+
# math functions
|
|
2470
|
+
@v_args(meta=True)
|
|
2471
|
+
def fadd(self, meta, args) -> Function:
|
|
2472
|
+
|
|
2473
|
+
return self.function_factory.create_function(args, FunctionType.ADD, meta)
|
|
2474
|
+
|
|
2475
|
+
@v_args(meta=True)
|
|
2476
|
+
def fsub(self, meta, args) -> Function:
|
|
2477
|
+
return self.function_factory.create_function(args, FunctionType.SUBTRACT, meta)
|
|
2478
|
+
|
|
2479
|
+
@v_args(meta=True)
|
|
2480
|
+
def fmul(self, meta, args) -> Function:
|
|
2481
|
+
return self.function_factory.create_function(args, FunctionType.MULTIPLY, meta)
|
|
2482
|
+
|
|
2483
|
+
@v_args(meta=True)
|
|
2484
|
+
def fdiv(self, meta: Meta, args) -> Function:
|
|
2485
|
+
return self.function_factory.create_function(args, FunctionType.DIVIDE, meta)
|
|
2486
|
+
|
|
2487
|
+
@v_args(meta=True)
|
|
2488
|
+
def fmod(self, meta: Meta, args) -> Function:
|
|
2489
|
+
return self.function_factory.create_function(args, FunctionType.MOD, meta)
|
|
2490
|
+
|
|
2491
|
+
@v_args(meta=True)
|
|
2492
|
+
def fsqrt(self, meta: Meta, args) -> Function:
|
|
2493
|
+
return self.function_factory.create_function(args, FunctionType.SQRT, meta)
|
|
2494
|
+
|
|
2495
|
+
@v_args(meta=True)
|
|
2496
|
+
def frandom(self, meta: Meta, args) -> Function:
|
|
2497
|
+
return self.function_factory.create_function(args, FunctionType.RANDOM, meta)
|
|
2498
|
+
|
|
2499
|
+
@v_args(meta=True)
|
|
2500
|
+
def fround(self, meta, args) -> Function:
|
|
2501
|
+
if len(args) == 1:
|
|
2502
|
+
args.append(0)
|
|
2503
|
+
return self.function_factory.create_function(args, FunctionType.ROUND, meta)
|
|
2504
|
+
|
|
2505
|
+
@v_args(meta=True)
|
|
2506
|
+
def flog(self, meta, args) -> Function:
|
|
2507
|
+
if len(args) == 1:
|
|
2508
|
+
args.append(10)
|
|
2509
|
+
return self.function_factory.create_function(args, FunctionType.LOG, meta)
|
|
2510
|
+
|
|
2511
|
+
@v_args(meta=True)
|
|
2512
|
+
def ffloor(self, meta, args) -> Function:
|
|
2513
|
+
return self.function_factory.create_function(args, FunctionType.FLOOR, meta)
|
|
2514
|
+
|
|
2515
|
+
@v_args(meta=True)
|
|
2516
|
+
def fceil(self, meta, args) -> Function:
|
|
2517
|
+
return self.function_factory.create_function(args, FunctionType.CEIL, meta)
|
|
2518
|
+
|
|
2519
|
+
@v_args(meta=True)
|
|
2520
|
+
def fcase(self, meta, args: List[Union[CaseWhen, CaseElse]]) -> Function:
|
|
2521
|
+
return self.function_factory.create_function(args, FunctionType.CASE, meta)
|
|
2522
|
+
|
|
2523
|
+
@v_args(meta=True)
|
|
2524
|
+
def fcase_when(self, meta, args) -> CaseWhen:
|
|
2525
|
+
args = process_function_args(args, meta=meta, environment=self.environment)
|
|
2526
|
+
root = expr_to_boolean(args[0], self.function_factory)
|
|
2527
|
+
return CaseWhen(comparison=root, expr=args[1])
|
|
2528
|
+
|
|
2529
|
+
@v_args(meta=True)
|
|
2530
|
+
def fcase_else(self, meta, args) -> CaseElse:
|
|
2531
|
+
args = process_function_args(args, meta=meta, environment=self.environment)
|
|
2532
|
+
return CaseElse(expr=args[0])
|
|
2533
|
+
|
|
2534
|
+
@v_args(meta=True)
|
|
2535
|
+
def fcurrent_date(self, meta, args):
|
|
2536
|
+
return CurrentDate([])
|
|
2537
|
+
|
|
2538
|
+
@v_args(meta=True)
|
|
2539
|
+
def fcurrent_datetime(self, meta, args):
|
|
2540
|
+
return self.function_factory.create_function(
|
|
2541
|
+
args=[], operator=FunctionType.CURRENT_DATETIME, meta=meta
|
|
2542
|
+
)
|
|
2543
|
+
|
|
2544
|
+
@v_args(meta=True)
|
|
2545
|
+
def fcurrent_timestamp(self, meta, args):
|
|
2546
|
+
return self.function_factory.create_function(
|
|
2547
|
+
args=[], operator=FunctionType.CURRENT_TIMESTAMP, meta=meta
|
|
2548
|
+
)
|
|
2549
|
+
|
|
2550
|
+
@v_args(meta=True)
|
|
2551
|
+
def fnot(self, meta, args):
|
|
2552
|
+
if arg_to_datatype(args[0]) == DataType.BOOL:
|
|
2553
|
+
return Comparison(
|
|
2554
|
+
left=self.function_factory.create_function(
|
|
2555
|
+
[args[0], False], FunctionType.COALESCE, meta
|
|
2556
|
+
),
|
|
2557
|
+
operator=ComparisonOperator.EQ,
|
|
2558
|
+
right=False,
|
|
2559
|
+
meta=meta,
|
|
2560
|
+
)
|
|
2561
|
+
return self.function_factory.create_function(args, FunctionType.IS_NULL, meta)
|
|
2562
|
+
|
|
2563
|
+
@v_args(meta=True)
|
|
2564
|
+
def fbool(self, meta, args):
|
|
2565
|
+
return self.function_factory.create_function(args, FunctionType.BOOL, meta)
|
|
2566
|
+
|
|
2567
|
+
@v_args(meta=True)
|
|
2568
|
+
def fmap_keys(self, meta, args):
|
|
2569
|
+
return self.function_factory.create_function(args, FunctionType.MAP_KEYS, meta)
|
|
2570
|
+
|
|
2571
|
+
@v_args(meta=True)
|
|
2572
|
+
def fmap_values(self, meta, args):
|
|
2573
|
+
return self.function_factory.create_function(
|
|
2574
|
+
args, FunctionType.MAP_VALUES, meta
|
|
2575
|
+
)
|
|
2576
|
+
|
|
2577
|
+
@v_args(meta=True)
|
|
2578
|
+
def farray_sum(self, meta, args):
|
|
2579
|
+
return self.function_factory.create_function(args, FunctionType.ARRAY_SUM, meta)
|
|
2580
|
+
|
|
2581
|
+
@v_args(meta=True)
|
|
2582
|
+
def fgenerate_array(self, meta, args):
|
|
2583
|
+
return self.function_factory.create_function(
|
|
2584
|
+
args, FunctionType.GENERATE_ARRAY, meta
|
|
2585
|
+
)
|
|
2586
|
+
|
|
2587
|
+
@v_args(meta=True)
|
|
2588
|
+
def farray_distinct(self, meta, args):
|
|
2589
|
+
return self.function_factory.create_function(
|
|
2590
|
+
args, FunctionType.ARRAY_DISTINCT, meta
|
|
2591
|
+
)
|
|
2592
|
+
|
|
2593
|
+
@v_args(meta=True)
|
|
2594
|
+
def farray_to_string(self, meta, args):
|
|
2595
|
+
return self.function_factory.create_function(
|
|
2596
|
+
args, FunctionType.ARRAY_TO_STRING, meta
|
|
2597
|
+
)
|
|
2598
|
+
|
|
2599
|
+
@v_args(meta=True)
|
|
2600
|
+
def farray_sort(self, meta, args):
|
|
2601
|
+
if len(args) == 1:
|
|
2602
|
+
# this is a magic value to represent the default behavior
|
|
2603
|
+
args = [args[0], Ordering.ASCENDING]
|
|
2604
|
+
return self.function_factory.create_function(
|
|
2605
|
+
args, FunctionType.ARRAY_SORT, meta
|
|
2606
|
+
)
|
|
2607
|
+
|
|
2608
|
+
@v_args(meta=True)
|
|
2609
|
+
def transform_lambda(self, meta, args):
|
|
2610
|
+
return self.environment.functions[args[0]]
|
|
2611
|
+
|
|
2612
|
+
@v_args(meta=True)
|
|
2613
|
+
def farray_transform(self, meta, args) -> Function:
|
|
2614
|
+
factory: CustomFunctionFactory = args[1]
|
|
2615
|
+
if not len(factory.function_arguments) == 1:
|
|
2616
|
+
raise InvalidSyntaxException(
|
|
2617
|
+
"Array transform function must have exactly one argument;"
|
|
2618
|
+
)
|
|
2619
|
+
array_type = arg_to_datatype(args[0])
|
|
2620
|
+
if not isinstance(array_type, ArrayType):
|
|
2621
|
+
raise InvalidSyntaxException(
|
|
2622
|
+
f"Array transform function must be applied to an array, not {array_type}"
|
|
2623
|
+
)
|
|
2624
|
+
return self.function_factory.create_function(
|
|
2625
|
+
[
|
|
2626
|
+
args[0],
|
|
2627
|
+
factory.function_arguments[0],
|
|
2628
|
+
factory(
|
|
2629
|
+
ArgBinding(
|
|
2630
|
+
name=factory.function_arguments[0].name,
|
|
2631
|
+
datatype=array_type.value_data_type,
|
|
2632
|
+
)
|
|
2633
|
+
),
|
|
2634
|
+
],
|
|
2635
|
+
FunctionType.ARRAY_TRANSFORM,
|
|
2636
|
+
meta,
|
|
2637
|
+
)
|
|
2638
|
+
|
|
2639
|
+
@v_args(meta=True)
|
|
2640
|
+
def farray_filter(self, meta, args) -> Function:
|
|
2641
|
+
factory: CustomFunctionFactory = args[1]
|
|
2642
|
+
if not len(factory.function_arguments) == 1:
|
|
2643
|
+
raise InvalidSyntaxException(
|
|
2644
|
+
"Array filter function must have exactly one argument;"
|
|
2645
|
+
)
|
|
2646
|
+
array_type = arg_to_datatype(args[0])
|
|
2647
|
+
if not isinstance(array_type, ArrayType):
|
|
2648
|
+
raise InvalidSyntaxException(
|
|
2649
|
+
f"Array filter function must be applied to an array, not {array_type}"
|
|
2650
|
+
)
|
|
2651
|
+
return self.function_factory.create_function(
|
|
2652
|
+
[
|
|
2653
|
+
args[0],
|
|
2654
|
+
factory.function_arguments[0],
|
|
2655
|
+
factory(
|
|
2656
|
+
ArgBinding(
|
|
2657
|
+
name=factory.function_arguments[0].name,
|
|
2658
|
+
datatype=array_type.value_data_type,
|
|
2659
|
+
)
|
|
2660
|
+
),
|
|
2661
|
+
],
|
|
2662
|
+
FunctionType.ARRAY_FILTER,
|
|
2663
|
+
meta,
|
|
2664
|
+
)
|
|
2665
|
+
|
|
2666
|
+
|
|
2667
|
+
def unpack_visit_error(e: VisitError, text: str | None = None):
|
|
2668
|
+
"""This is required to get exceptions from imports, which would
|
|
2669
|
+
raise nested VisitErrors"""
|
|
2670
|
+
if isinstance(e.orig_exc, VisitError):
|
|
2671
|
+
unpack_visit_error(e.orig_exc, text)
|
|
2672
|
+
elif isinstance(e.orig_exc, (UndefinedConceptException, ImportError)):
|
|
2673
|
+
raise e.orig_exc
|
|
2674
|
+
elif isinstance(e.orig_exc, InvalidSyntaxException):
|
|
2675
|
+
raise e.orig_exc
|
|
2676
|
+
elif isinstance(e.orig_exc, (SyntaxError, TypeError)):
|
|
2677
|
+
if isinstance(e.obj, Tree):
|
|
2678
|
+
if text:
|
|
2679
|
+
extract = text[e.obj.meta.start_pos - 5 : e.obj.meta.end_pos + 5]
|
|
2680
|
+
raise InvalidSyntaxException(
|
|
2681
|
+
str(e.orig_exc)
|
|
2682
|
+
+ " Raised when parsing rule: "
|
|
2683
|
+
+ str(e.rule)
|
|
2684
|
+
+ f' Line: {e.obj.meta.line} "...{extract}..."'
|
|
2685
|
+
)
|
|
2686
|
+
InvalidSyntaxException(
|
|
2687
|
+
str(e.orig_exc) + " in " + str(e.rule) + f" Line: {e.obj.meta.line}"
|
|
2688
|
+
)
|
|
2689
|
+
raise InvalidSyntaxException(str(e.orig_exc)).with_traceback(
|
|
2690
|
+
e.orig_exc.__traceback__
|
|
2691
|
+
)
|
|
2692
|
+
raise e.orig_exc
|
|
2693
|
+
|
|
2694
|
+
|
|
2695
|
+
def parse_text_raw(text: str, environment: Optional[Environment] = None):
|
|
2696
|
+
PARSER.parse(text)
|
|
2697
|
+
|
|
2698
|
+
|
|
2699
|
+
ERROR_CODES: dict[int, str] = {
|
|
2700
|
+
# 100 code are SQL compatability errors
|
|
2701
|
+
101: "Using FROM keyword? Trilogy does not have a FROM clause (Datasource resolution is automatic).",
|
|
2702
|
+
# 200 codes relate to required explicit syntax (we could loosen these?)
|
|
2703
|
+
201: 'Missing alias? Alias must be specified with "AS" - e.g. `SELECT x+1 AS y`',
|
|
2704
|
+
202: "Missing closing semicolon? Statements must be terminated with a semicolon `;`.",
|
|
2705
|
+
210: "Missing order direction? Order by must be explicit about direction - specify `asc` or `desc`.",
|
|
2706
|
+
}
|
|
2707
|
+
|
|
2708
|
+
DEFAULT_ERROR_SPAN: int = 30
|
|
2709
|
+
|
|
2710
|
+
|
|
2711
|
+
def inject_context_maker(pos: int, text: str, span: int = 40) -> str:
|
|
2712
|
+
"""Returns a pretty string pinpointing the error in the text,
|
|
2713
|
+
with span amount of context characters around it.
|
|
2714
|
+
|
|
2715
|
+
Note:
|
|
2716
|
+
The parser doesn't hold a copy of the text it has to parse,
|
|
2717
|
+
so you have to provide it again
|
|
2718
|
+
"""
|
|
2719
|
+
|
|
2720
|
+
start = max(pos - span, 0)
|
|
2721
|
+
end = pos + span
|
|
2722
|
+
if not isinstance(text, bytes):
|
|
2723
|
+
|
|
2724
|
+
before = text[start:pos].rsplit("\n", 1)[-1]
|
|
2725
|
+
after = text[pos:end].split("\n", 1)[0]
|
|
2726
|
+
rcap = ""
|
|
2727
|
+
# if it goes beyond the end of text, no ...
|
|
2728
|
+
# if it terminates on a space, no need for ...
|
|
2729
|
+
if after and not after[-1].isspace() and not (end > len(text)):
|
|
2730
|
+
rcap = "..."
|
|
2731
|
+
lcap = ""
|
|
2732
|
+
if start > 0 and not before[0].isspace():
|
|
2733
|
+
lcap = "..."
|
|
2734
|
+
lpad = " "
|
|
2735
|
+
rpad = " "
|
|
2736
|
+
if before.endswith(" "):
|
|
2737
|
+
lpad = ""
|
|
2738
|
+
if after.startswith(" "):
|
|
2739
|
+
rpad = ""
|
|
2740
|
+
return f"{lcap}{before}{lpad}???{rpad}{after}{rcap}"
|
|
2741
|
+
else:
|
|
2742
|
+
before = text[start:pos].rsplit(b"\n", 1)[-1]
|
|
2743
|
+
after = text[pos:end].split(b"\n", 1)[0]
|
|
2744
|
+
return (before + b" ??? " + after).decode("ascii", "backslashreplace")
|
|
2745
|
+
|
|
2746
|
+
|
|
2747
|
+
def parse_text(
|
|
2748
|
+
text: str,
|
|
2749
|
+
environment: Optional[Environment] = None,
|
|
2750
|
+
root: Path | None = None,
|
|
2751
|
+
parse_config: Parsing | None = None,
|
|
2752
|
+
) -> Tuple[
|
|
2753
|
+
Environment,
|
|
2754
|
+
List[
|
|
2755
|
+
Datasource
|
|
2756
|
+
| ImportStatement
|
|
2757
|
+
| SelectStatement
|
|
2758
|
+
| PersistStatement
|
|
2759
|
+
| ShowStatement
|
|
2760
|
+
| RawSQLStatement
|
|
2761
|
+
| ValidateStatement
|
|
2762
|
+
| None
|
|
2763
|
+
],
|
|
2764
|
+
]:
|
|
2765
|
+
def _create_syntax_error(code: int, pos: int, text: str) -> InvalidSyntaxException:
|
|
2766
|
+
"""Helper to create standardized syntax error with context."""
|
|
2767
|
+
return InvalidSyntaxException(
|
|
2768
|
+
f"Syntax [{code}]: "
|
|
2769
|
+
+ ERROR_CODES[code]
|
|
2770
|
+
+ "\nLocation:\n"
|
|
2771
|
+
+ inject_context_maker(pos, text.replace("\n", " "), DEFAULT_ERROR_SPAN)
|
|
2772
|
+
)
|
|
2773
|
+
|
|
2774
|
+
def _create_generic_syntax_error(
|
|
2775
|
+
message: str, pos: int, text: str
|
|
2776
|
+
) -> InvalidSyntaxException:
|
|
2777
|
+
"""Helper to create generic syntax error with context."""
|
|
2778
|
+
return InvalidSyntaxException(
|
|
2779
|
+
message
|
|
2780
|
+
+ "\nLocation:\n"
|
|
2781
|
+
+ inject_context_maker(pos, text.replace("\n", " "), DEFAULT_ERROR_SPAN)
|
|
2782
|
+
)
|
|
2783
|
+
|
|
2784
|
+
def _handle_unexpected_token(e: UnexpectedToken, text: str) -> None:
|
|
2785
|
+
"""Handle UnexpectedToken errors to make friendlier error messages."""
|
|
2786
|
+
# Handle ordering direction error
|
|
2787
|
+
pos = e.pos_in_stream or 0
|
|
2788
|
+
if e.interactive_parser.lexer_thread.state:
|
|
2789
|
+
last_token = e.interactive_parser.lexer_thread.state.last_token
|
|
2790
|
+
else:
|
|
2791
|
+
last_token = None
|
|
2792
|
+
if e.expected == {"ORDERING_DIRECTION"}:
|
|
2793
|
+
raise _create_syntax_error(210, pos, text)
|
|
2794
|
+
|
|
2795
|
+
# Handle FROM token error
|
|
2796
|
+
parsed_tokens = (
|
|
2797
|
+
[x.value for x in e.token_history if x] if e.token_history else []
|
|
2798
|
+
)
|
|
2799
|
+
|
|
2800
|
+
if parsed_tokens == ["FROM"]:
|
|
2801
|
+
raise _create_syntax_error(101, pos, text)
|
|
2802
|
+
# check if they are missing a semicolon
|
|
2803
|
+
if last_token and e.token.type == "$END":
|
|
2804
|
+
try:
|
|
2805
|
+
|
|
2806
|
+
e.interactive_parser.feed_token(Token("_TERMINATOR", ";"))
|
|
2807
|
+
state = e.interactive_parser.lexer_thread.state
|
|
2808
|
+
if state and state.last_token:
|
|
2809
|
+
new_pos = state.last_token.end_pos or pos
|
|
2810
|
+
else:
|
|
2811
|
+
new_pos = pos
|
|
2812
|
+
raise _create_syntax_error(202, new_pos, text)
|
|
2813
|
+
except UnexpectedToken:
|
|
2814
|
+
pass
|
|
2815
|
+
# check if they forgot an as
|
|
2816
|
+
try:
|
|
2817
|
+
e.interactive_parser.feed_token(Token("AS", "AS"))
|
|
2818
|
+
state = e.interactive_parser.lexer_thread.state
|
|
2819
|
+
if state and state.last_token:
|
|
2820
|
+
new_pos = state.last_token.end_pos or pos
|
|
2821
|
+
else:
|
|
2822
|
+
new_pos = pos
|
|
2823
|
+
e.interactive_parser.feed_token(Token("IDENTIFIER", e.token.value))
|
|
2824
|
+
raise _create_syntax_error(201, new_pos, text)
|
|
2825
|
+
except UnexpectedToken:
|
|
2826
|
+
pass
|
|
2827
|
+
|
|
2828
|
+
# Default UnexpectedToken handling
|
|
2829
|
+
raise _create_generic_syntax_error(str(e), pos, text)
|
|
2830
|
+
|
|
2831
|
+
environment = environment or (
|
|
2832
|
+
Environment(working_path=root) if root else Environment()
|
|
2833
|
+
)
|
|
2834
|
+
parser = ParseToObjects(
|
|
2835
|
+
environment=environment, import_keys=["root"], parse_config=parse_config
|
|
2836
|
+
)
|
|
2837
|
+
start = datetime.now()
|
|
2838
|
+
|
|
2839
|
+
try:
|
|
2840
|
+
parser.set_text(text)
|
|
2841
|
+
# disable fail on missing to allow for circular dependencies
|
|
2842
|
+
parser.prepare_parse()
|
|
2843
|
+
parser.transform(PARSER.parse(text))
|
|
2844
|
+
# this will reset fail on missing
|
|
2845
|
+
pass_two = parser.run_second_parse_pass()
|
|
2846
|
+
output = [v for v in pass_two if v]
|
|
2847
|
+
environment.concepts.fail_on_missing = True
|
|
2848
|
+
end = datetime.now()
|
|
2849
|
+
perf_logger.debug(
|
|
2850
|
+
f"Parse time: {end - start} for {len(text)} characters, {len(output)} objects"
|
|
2851
|
+
)
|
|
2852
|
+
except VisitError as e:
|
|
2853
|
+
unpack_visit_error(e, text)
|
|
2854
|
+
# this will never be reached
|
|
2855
|
+
raise e
|
|
2856
|
+
except UnexpectedToken as e:
|
|
2857
|
+
_handle_unexpected_token(e, text)
|
|
2858
|
+
except (UnexpectedCharacters, UnexpectedEOF, UnexpectedInput) as e:
|
|
2859
|
+
raise _create_generic_syntax_error(str(e), e.pos_in_stream or 0, text)
|
|
2860
|
+
except (ValidationError, TypeError) as e:
|
|
2861
|
+
raise InvalidSyntaxException(str(e))
|
|
2862
|
+
|
|
2863
|
+
return environment, output
|