pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.138.dist-info/METADATA +525 -0
  5. pytrilogy-0.3.138.dist-info/RECORD +182 -0
  6. pytrilogy-0.3.138.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +9 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +87 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +143 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2672 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +494 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +748 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +517 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +106 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1359 -0
  112. trilogy/dialect/bigquery.py +256 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +177 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +91 -0
  121. trilogy/dialect/presto.py +104 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +90 -0
  124. trilogy/dialect/sql_server.py +92 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +750 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +7 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/dependency/Cargo.lock +617 -0
  148. trilogy/scripts/dependency/Cargo.toml +39 -0
  149. trilogy/scripts/dependency/README.md +131 -0
  150. trilogy/scripts/dependency/build.sh +25 -0
  151. trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
  152. trilogy/scripts/dependency/src/lib.rs +16 -0
  153. trilogy/scripts/dependency/src/main.rs +770 -0
  154. trilogy/scripts/dependency/src/parser.rs +435 -0
  155. trilogy/scripts/dependency/src/preql.pest +208 -0
  156. trilogy/scripts/dependency/src/python_bindings.rs +289 -0
  157. trilogy/scripts/dependency/src/resolver.rs +716 -0
  158. trilogy/scripts/dependency/tests/base.preql +3 -0
  159. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  160. trilogy/scripts/dependency/tests/customer.preql +6 -0
  161. trilogy/scripts/dependency/tests/main.preql +9 -0
  162. trilogy/scripts/dependency/tests/orders.preql +7 -0
  163. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  164. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  165. trilogy/scripts/dependency.py +323 -0
  166. trilogy/scripts/display.py +460 -0
  167. trilogy/scripts/environment.py +46 -0
  168. trilogy/scripts/parallel_execution.py +483 -0
  169. trilogy/scripts/single_execution.py +131 -0
  170. trilogy/scripts/trilogy.py +772 -0
  171. trilogy/std/__init__.py +0 -0
  172. trilogy/std/color.preql +3 -0
  173. trilogy/std/date.preql +13 -0
  174. trilogy/std/display.preql +18 -0
  175. trilogy/std/geography.preql +22 -0
  176. trilogy/std/metric.preql +15 -0
  177. trilogy/std/money.preql +67 -0
  178. trilogy/std/net.preql +14 -0
  179. trilogy/std/ranking.preql +7 -0
  180. trilogy/std/report.preql +5 -0
  181. trilogy/std/semantic.preql +6 -0
  182. trilogy/utility.py +34 -0
@@ -0,0 +1,2813 @@
1
+ from dataclasses import dataclass
2
+ from datetime import date, datetime
3
+ from enum import Enum
4
+ from logging import getLogger
5
+ from os.path import dirname, join
6
+ from pathlib import Path
7
+ from re import IGNORECASE
8
+ from typing import Any, List, Optional, Tuple, Union
9
+
10
+ from lark import Lark, ParseTree, Token, Transformer, Tree, v_args
11
+ from lark.exceptions import (
12
+ UnexpectedCharacters,
13
+ UnexpectedEOF,
14
+ UnexpectedInput,
15
+ UnexpectedToken,
16
+ VisitError,
17
+ )
18
+ from lark.tree import Meta
19
+ from pydantic import ValidationError
20
+
21
+ from trilogy.constants import (
22
+ CONFIG,
23
+ DEFAULT_NAMESPACE,
24
+ NULL_VALUE,
25
+ MagicConstants,
26
+ Parsing,
27
+ )
28
+ from trilogy.core.enums import (
29
+ BooleanOperator,
30
+ ComparisonOperator,
31
+ ConceptSource,
32
+ CreateMode,
33
+ DatasourceState,
34
+ DatePart,
35
+ Derivation,
36
+ FunctionType,
37
+ Granularity,
38
+ IOType,
39
+ Modifier,
40
+ Ordering,
41
+ PersistMode,
42
+ PublishAction,
43
+ Purpose,
44
+ ShowCategory,
45
+ ValidationScope,
46
+ WindowOrder,
47
+ WindowType,
48
+ )
49
+ from trilogy.core.exceptions import (
50
+ InvalidSyntaxException,
51
+ MissingParameterException,
52
+ UndefinedConceptException,
53
+ )
54
+ from trilogy.core.functions import (
55
+ CurrentDate,
56
+ FunctionFactory,
57
+ )
58
+ from trilogy.core.internal import ALL_ROWS_CONCEPT, INTERNAL_NAMESPACE
59
+ from trilogy.core.models.author import (
60
+ AggregateWrapper,
61
+ AlignClause,
62
+ AlignItem,
63
+ ArgBinding,
64
+ CaseElse,
65
+ CaseWhen,
66
+ Comment,
67
+ Comparison,
68
+ Concept,
69
+ ConceptRef,
70
+ Conditional,
71
+ CustomFunctionFactory,
72
+ CustomType,
73
+ DeriveClause,
74
+ DeriveItem,
75
+ Expr,
76
+ FilterItem,
77
+ Function,
78
+ FunctionCallWrapper,
79
+ Grain,
80
+ HavingClause,
81
+ Metadata,
82
+ MultiSelectLineage,
83
+ OrderBy,
84
+ OrderItem,
85
+ Parenthetical,
86
+ RowsetItem,
87
+ SubselectComparison,
88
+ UndefinedConceptFull,
89
+ WhereClause,
90
+ Window,
91
+ WindowItem,
92
+ WindowItemOrder,
93
+ WindowItemOver,
94
+ )
95
+ from trilogy.core.models.core import (
96
+ ArrayType,
97
+ DataType,
98
+ DataTyped,
99
+ ListWrapper,
100
+ MapType,
101
+ MapWrapper,
102
+ NumericType,
103
+ StructComponent,
104
+ StructType,
105
+ TraitDataType,
106
+ TupleWrapper,
107
+ arg_to_datatype,
108
+ dict_to_map_wrapper,
109
+ is_compatible_datatype,
110
+ list_to_wrapper,
111
+ tuple_to_wrapper,
112
+ )
113
+ from trilogy.core.models.datasource import (
114
+ Address,
115
+ ColumnAssignment,
116
+ Datasource,
117
+ Query,
118
+ RawColumnExpr,
119
+ )
120
+ from trilogy.core.models.environment import (
121
+ DictImportResolver,
122
+ Environment,
123
+ FileSystemImportResolver,
124
+ Import,
125
+ )
126
+ from trilogy.core.statements.author import (
127
+ ConceptDeclarationStatement,
128
+ ConceptDerivationStatement,
129
+ ConceptTransform,
130
+ CopyStatement,
131
+ CreateStatement,
132
+ FunctionDeclaration,
133
+ ImportStatement,
134
+ Limit,
135
+ MergeStatementV2,
136
+ MockStatement,
137
+ MultiSelectStatement,
138
+ PersistStatement,
139
+ PublishStatement,
140
+ RawSQLStatement,
141
+ RowsetDerivationStatement,
142
+ SelectItem,
143
+ SelectStatement,
144
+ ShowStatement,
145
+ TypeDeclaration,
146
+ ValidateStatement,
147
+ )
148
+ from trilogy.parsing.common import (
149
+ align_item_to_concept,
150
+ arbitrary_to_concept,
151
+ constant_to_concept,
152
+ derive_item_to_concept,
153
+ process_function_args,
154
+ rowset_to_concepts,
155
+ )
156
+ from trilogy.parsing.exceptions import NameShadowError, ParseError
157
+
158
+ perf_logger = getLogger("trilogy.parse.performance")
159
+
160
+
161
+ class ParsePass(Enum):
162
+ INITIAL = 1
163
+ VALIDATION = 2
164
+
165
+
166
+ CONSTANT_TYPES = (int, float, str, bool, ListWrapper, TupleWrapper, MapWrapper)
167
+
168
+ SELF_LABEL = "root"
169
+
170
+ MAX_PARSE_DEPTH = 10
171
+
172
+ SUPPORTED_INCREMENTAL_TYPES: set[DataType] = set([DataType.DATE, DataType.TIMESTAMP])
173
+
174
+ STDLIB_ROOT = Path(__file__).parent.parent
175
+
176
+
177
+ @dataclass
178
+ class WholeGrainWrapper:
179
+ where: WhereClause
180
+
181
+
182
+ @dataclass
183
+ class FunctionBindingType:
184
+ type: DataType | TraitDataType | None = None
185
+
186
+
187
+ @dataclass
188
+ class DropOn:
189
+ functions: List[FunctionType]
190
+
191
+
192
+ @dataclass
193
+ class AddOn:
194
+ functions: List[FunctionType]
195
+
196
+
197
+ @dataclass
198
+ class DatasourcePartitionClause:
199
+ columns: List[ConceptRef]
200
+
201
+
202
+ @dataclass
203
+ class DatasourceIncrementalClause:
204
+ columns: List[ConceptRef]
205
+
206
+
207
+ with open(join(dirname(__file__), "trilogy.lark"), "r") as f:
208
+ PARSER = Lark(
209
+ f.read(),
210
+ start="start",
211
+ propagate_positions=True,
212
+ g_regex_flags=IGNORECASE,
213
+ parser="lalr",
214
+ cache=True,
215
+ )
216
+
217
+
218
+ def parse_concept_reference(
219
+ name: str, environment: Environment, purpose: Optional[Purpose] = None
220
+ ) -> Tuple[str, str, str, str | None]:
221
+ parent = None
222
+ if "." in name:
223
+ if purpose == Purpose.PROPERTY:
224
+ parent, name = name.rsplit(".", 1)
225
+ namespace = environment.concepts[parent].namespace or DEFAULT_NAMESPACE
226
+ lookup = f"{namespace}.{name}"
227
+ else:
228
+ namespace, name = name.rsplit(".", 1)
229
+ lookup = f"{namespace}.{name}"
230
+ else:
231
+ namespace = environment.namespace or DEFAULT_NAMESPACE
232
+ lookup = name
233
+ return lookup, namespace, name, parent
234
+
235
+
236
+ def expr_to_boolean(
237
+ root,
238
+ function_factory: FunctionFactory,
239
+ ) -> Union[Comparison, SubselectComparison, Conditional]:
240
+ if not isinstance(root, (Comparison, SubselectComparison, Conditional)):
241
+ if arg_to_datatype(root) == DataType.BOOL:
242
+ root = Comparison(left=root, right=True, operator=ComparisonOperator.EQ)
243
+ elif arg_to_datatype(root) == DataType.INTEGER:
244
+ root = Comparison(
245
+ left=function_factory.create_function(
246
+ [root],
247
+ FunctionType.BOOL,
248
+ ),
249
+ right=True,
250
+ operator=ComparisonOperator.EQ,
251
+ )
252
+ else:
253
+ root = Comparison(
254
+ left=root, right=NULL_VALUE, operator=ComparisonOperator.IS_NOT
255
+ )
256
+
257
+ return root
258
+
259
+
260
+ def unwrap_transformation(
261
+ input: Expr,
262
+ environment: Environment,
263
+ ) -> (
264
+ Function
265
+ | FilterItem
266
+ | WindowItem
267
+ | AggregateWrapper
268
+ | FunctionCallWrapper
269
+ | Parenthetical
270
+ ):
271
+ if isinstance(input, Function):
272
+ return input
273
+ elif isinstance(input, AggregateWrapper):
274
+ return input
275
+ elif isinstance(input, ConceptRef):
276
+ concept = environment.concepts[input.address]
277
+ return Function(
278
+ operator=FunctionType.ALIAS,
279
+ output_datatype=concept.datatype,
280
+ output_purpose=concept.purpose,
281
+ arguments=[input],
282
+ )
283
+ elif isinstance(input, FilterItem):
284
+ return input
285
+ elif isinstance(input, WindowItem):
286
+ return input
287
+ elif isinstance(input, FunctionCallWrapper):
288
+ return input
289
+ elif isinstance(input, Parenthetical):
290
+ return input
291
+ else:
292
+ return Function.model_construct(
293
+ operator=FunctionType.CONSTANT,
294
+ output_datatype=arg_to_datatype(input),
295
+ output_purpose=Purpose.CONSTANT,
296
+ arguments=[input],
297
+ )
298
+
299
+
300
+ def rehydrate_lineage(
301
+ lineage: Any, environment: Environment, function_factory: FunctionFactory
302
+ ) -> Any:
303
+ """Fix datatype propagation. This is a hack to fix the fact that we don't know the datatypes of functions until we've parsed all concepts"""
304
+ if isinstance(lineage, Function):
305
+ rehydrated = [
306
+ rehydrate_lineage(x, environment, function_factory)
307
+ for x in lineage.arguments
308
+ ]
309
+ return function_factory.create_function(
310
+ rehydrated,
311
+ operator=lineage.operator,
312
+ )
313
+ elif isinstance(lineage, Parenthetical):
314
+ lineage.content = rehydrate_lineage(
315
+ lineage.content, environment, function_factory
316
+ )
317
+ return lineage
318
+ elif isinstance(lineage, WindowItem):
319
+ # this is temporarily guaranteed until we do some upstream work
320
+ assert isinstance(lineage.content, ConceptRef)
321
+ lineage.content.datatype = environment.concepts[
322
+ lineage.content.address
323
+ ].datatype
324
+ return lineage
325
+ elif isinstance(lineage, AggregateWrapper):
326
+ lineage.function = rehydrate_lineage(
327
+ lineage.function, environment, function_factory
328
+ )
329
+ return lineage
330
+ elif isinstance(lineage, RowsetItem):
331
+ lineage.content.datatype = environment.concepts[
332
+ lineage.content.address
333
+ ].datatype
334
+ return lineage
335
+ else:
336
+ return lineage
337
+
338
+
339
+ def rehydrate_concept_lineage(
340
+ concept: Concept, environment: Environment, function_factory: FunctionFactory
341
+ ) -> Concept:
342
+ concept.lineage = rehydrate_lineage(concept.lineage, environment, function_factory)
343
+ if isinstance(concept.lineage, DataTyped):
344
+ concept.datatype = concept.lineage.output_datatype
345
+ return concept
346
+
347
+
348
+ class ParseToObjects(Transformer):
349
+ def __init__(
350
+ self,
351
+ environment: Environment,
352
+ parse_address: str | None = None,
353
+ token_address: Path | str | None = None,
354
+ parsed: dict[str, "ParseToObjects"] | None = None,
355
+ tokens: dict[Path | str, ParseTree] | None = None,
356
+ text_lookup: dict[Path | str, str] | None = None,
357
+ environment_lookup: dict[str, Environment] | None = None,
358
+ import_keys: list[str] | None = None,
359
+ parse_config: Parsing | None = None,
360
+ ):
361
+ Transformer.__init__(self, True)
362
+ self.environment: Environment = environment
363
+ self.parse_address: str = parse_address or SELF_LABEL
364
+ self.token_address: Path | str = token_address or SELF_LABEL
365
+ self.parsed: dict[str, ParseToObjects] = parsed if parsed is not None else {}
366
+ self.tokens: dict[Path | str, ParseTree] = tokens if tokens is not None else {}
367
+ self.environments: dict[str, Environment] = environment_lookup or {}
368
+ self.text_lookup: dict[Path | str, str] = (
369
+ text_lookup if text_lookup is not None else {}
370
+ )
371
+ # we do a second pass to pick up circular dependencies
372
+ # after initial parsing
373
+ self.parse_pass = ParsePass.INITIAL
374
+ self.function_factory = FunctionFactory(self.environment)
375
+ self.import_keys: list[str] = import_keys or ["root"]
376
+ self.parse_config: Parsing = parse_config or CONFIG.parsing
377
+
378
+ def set_text(self, text: str):
379
+ self.text_lookup[self.token_address] = text
380
+
381
+ def transform(self, tree: Tree):
382
+ results = super().transform(tree)
383
+ self.tokens[self.token_address] = tree
384
+ return results
385
+
386
+ def prepare_parse(self):
387
+ self.parse_pass = ParsePass.INITIAL
388
+ self.environment.concepts.fail_on_missing = False
389
+ for _, v in self.parsed.items():
390
+ v.prepare_parse()
391
+
392
+ def run_second_parse_pass(self, force: bool = False):
393
+ if self.token_address not in self.tokens:
394
+ return []
395
+ self.parse_pass = ParsePass.VALIDATION
396
+ for _, v in list(self.parsed.items()):
397
+ if v.parse_pass == ParsePass.VALIDATION:
398
+ continue
399
+ v.run_second_parse_pass()
400
+ reparsed = self.transform(self.tokens[self.token_address])
401
+ self.environment.concepts.undefined = {}
402
+ passed = False
403
+ passes = 0
404
+ # output datatypes for functions may have been wrong
405
+ # as they were derived from not fully understood upstream types
406
+ # so loop through to recreate function lineage until all datatypes are known
407
+
408
+ while not passed:
409
+ new_passed = True
410
+ for x, y in self.environment.concepts.items():
411
+ if y.datatype == DataType.UNKNOWN and y.lineage:
412
+ self.environment.concepts[x] = rehydrate_concept_lineage(
413
+ y, self.environment, self.function_factory
414
+ )
415
+ new_passed = False
416
+ passes += 1
417
+ if passes > MAX_PARSE_DEPTH:
418
+ break
419
+ passed = new_passed
420
+
421
+ return reparsed
422
+
423
+ def start(self, args):
424
+ return args
425
+
426
+ def LINE_SEPARATOR(self, args):
427
+ return MagicConstants.LINE_SEPARATOR
428
+
429
+ def block(self, args):
430
+ output = args[0]
431
+ if isinstance(output, ConceptDeclarationStatement):
432
+ if len(args) > 1 and args[1] != MagicConstants.LINE_SEPARATOR:
433
+ comments = [x for x in args[1:] if isinstance(x, Comment)]
434
+ merged = "\n".join([x.text.split("#")[1].rstrip() for x in comments])
435
+ output.concept.metadata.description = merged
436
+ # this is a bad plan for now;
437
+ # because a comment after an import statement is very common
438
+ # and it's not intuitive that it modifies the import description
439
+ # if isinstance(output, ImportStatement):
440
+ # if len(args) > 1 and isinstance(args[1], Comment):
441
+ # comment = args[1].text.split("#")[1].strip()
442
+ # namespace = output.alias
443
+ # for _, v in self.environment.concepts.items():
444
+ # if v.namespace == namespace:
445
+ # if v.metadata.description:
446
+ # v.metadata.description = (
447
+ # f"{comment}: {v.metadata.description}"
448
+ # )
449
+ # else:
450
+ # v.metadata.description = comment
451
+
452
+ return args[0]
453
+
454
+ def metadata(self, args):
455
+ pairs = {key: val for key, val in zip(args[::2], args[1::2])}
456
+ return Metadata(**pairs)
457
+
458
+ def IDENTIFIER(self, args) -> str:
459
+ return args.value
460
+
461
+ def ORDER_IDENTIFIER(self, args) -> ConceptRef:
462
+ return self.environment.concepts[args.value.strip()].reference
463
+
464
+ def WILDCARD_IDENTIFIER(self, args) -> str:
465
+ return args.value
466
+
467
+ def QUOTED_IDENTIFIER(self, args) -> str:
468
+ return args.value[1:-1]
469
+
470
+ @v_args(meta=True)
471
+ def concept_lit(self, meta: Meta, args) -> ConceptRef:
472
+ address = args[0]
473
+ if "." not in address and self.environment.namespace == DEFAULT_NAMESPACE:
474
+ address = f"{DEFAULT_NAMESPACE}.{address}"
475
+ mapping = self.environment.concepts[address]
476
+ datatype = mapping.output_datatype
477
+ return ConceptRef(
478
+ # this is load-bearing to handle pseudonyms
479
+ address=mapping.address,
480
+ metadata=Metadata(line_number=meta.line),
481
+ datatype=datatype,
482
+ )
483
+
484
+ def ADDRESS(self, args) -> Address:
485
+ return Address(location=args.value, quoted=False)
486
+
487
+ def QUOTED_ADDRESS(self, args) -> Address:
488
+ return Address(location=args.value[1:-1], quoted=True)
489
+
490
+ def STRING_CHARS(self, args) -> str:
491
+ return args.value
492
+
493
+ def SINGLE_STRING_CHARS(self, args) -> str:
494
+ return args.value
495
+
496
+ def DOUBLE_STRING_CHARS(self, args) -> str:
497
+ return args.value
498
+
499
+ def MINUS(self, args) -> str:
500
+ return "-"
501
+
502
+ @v_args(meta=True)
503
+ def struct_component(self, meta: Meta, args) -> StructComponent:
504
+ modifiers = []
505
+ for arg in args:
506
+ if isinstance(arg, Modifier):
507
+ modifiers.append(arg)
508
+ return StructComponent(name=args[0], type=args[1], modifiers=modifiers)
509
+
510
+ @v_args(meta=True)
511
+ def struct_type(self, meta: Meta, args) -> StructType:
512
+ final: list[
513
+ DataType
514
+ | MapType
515
+ | ArrayType
516
+ | NumericType
517
+ | StructType
518
+ | StructComponent
519
+ | Concept
520
+ ] = []
521
+ for arg in args:
522
+ if isinstance(arg, StructComponent):
523
+ final.append(arg)
524
+ else:
525
+ new = self.environment.concepts.__getitem__( # type: ignore
526
+ key=arg, line_no=meta.line
527
+ )
528
+ final.append(new)
529
+
530
+ return StructType(
531
+ fields=final,
532
+ fields_map={
533
+ x.name: x for x in final if isinstance(x, (Concept, StructComponent))
534
+ },
535
+ )
536
+
537
+ def list_type(self, args) -> ArrayType:
538
+ content = args[0]
539
+ if isinstance(content, str):
540
+ content = self.environment.concepts[content]
541
+ return ArrayType(type=content)
542
+
543
+ def numeric_type(self, args) -> NumericType:
544
+ return NumericType(precision=args[0], scale=args[1])
545
+
546
+ def map_type(self, args) -> MapType:
547
+ key = args[0]
548
+ value = args[1]
549
+ if isinstance(key, str):
550
+ key = self.environment.concepts[key]
551
+ elif isinstance(value, str):
552
+ value = self.environment.concepts[value]
553
+ return MapType(key_type=key, value_type=value)
554
+
555
+ @v_args(meta=True)
556
+ def data_type(
557
+ self, meta: Meta, args
558
+ ) -> DataType | TraitDataType | ArrayType | StructType | MapType | NumericType:
559
+ resolved = args[0]
560
+ traits = args[2:]
561
+ base: DataType | TraitDataType | ArrayType | StructType | MapType | NumericType
562
+ if isinstance(resolved, StructType):
563
+ base = resolved
564
+ elif isinstance(resolved, ArrayType):
565
+ base = resolved
566
+ elif isinstance(resolved, NumericType):
567
+ base = resolved
568
+ elif isinstance(resolved, MapType):
569
+ base = resolved
570
+ else:
571
+ base = DataType(args[0].lower())
572
+ if traits:
573
+ for trait in traits:
574
+ if trait not in self.environment.data_types:
575
+ raise ParseError(
576
+ f"Invalid trait (type) {trait} for {base}, line {meta.line}."
577
+ )
578
+ matched = self.environment.data_types[trait]
579
+ if not is_compatible_datatype(matched.type, base):
580
+ raise ParseError(
581
+ f"Invalid trait (type) {trait} for {base}, line {meta.line}. Trait expects type {matched.type}, has {base}"
582
+ )
583
+ return TraitDataType(type=base, traits=traits)
584
+
585
+ return base
586
+
587
+ def array_comparison(self, args) -> ComparisonOperator:
588
+ return ComparisonOperator([x.value.lower() for x in args])
589
+
590
+ def COMPARISON_OPERATOR(self, args) -> ComparisonOperator:
591
+ return ComparisonOperator(args.strip())
592
+
593
+ def LOGICAL_OPERATOR(self, args) -> BooleanOperator:
594
+ return BooleanOperator(args.lower())
595
+
596
+ def concept_assignment(self, args):
597
+ return args
598
+
599
+ @v_args(meta=True)
600
+ def column_assignment(self, meta: Meta, args):
601
+ modifiers = []
602
+ if len(args) == 2:
603
+ alias = args[0]
604
+ concept_list = args[1]
605
+ else:
606
+ alias = args[0][-1]
607
+ concept_list = args[0]
608
+ # recursively collect modifiers
609
+ if len(concept_list) > 1:
610
+ modifiers += concept_list[:-1]
611
+ concept = concept_list[-1]
612
+ resolved = self.environment.concepts.__getitem__( # type: ignore
613
+ key=concept, line_no=meta.line, file=self.token_address
614
+ )
615
+ return ColumnAssignment(
616
+ alias=alias, modifiers=modifiers, concept=resolved.reference
617
+ )
618
+
619
+ def _TERMINATOR(self, args):
620
+ return None
621
+
622
+ def _static_functions(self, args):
623
+ return args[0]
624
+
625
+ def MODIFIER(self, args) -> Modifier:
626
+ return Modifier(args.value)
627
+
628
+ def SHORTHAND_MODIFIER(self, args) -> Modifier:
629
+ return Modifier(args.value)
630
+
631
+ def PURPOSE(self, args) -> Purpose:
632
+ return Purpose(args.value)
633
+
634
+ def AUTO(self, args) -> Purpose:
635
+ return Purpose.AUTO
636
+
637
+ def CONST(self, args) -> Purpose:
638
+ return Purpose.CONSTANT
639
+
640
+ def CONSTANT(self, args) -> Purpose:
641
+ return Purpose.CONSTANT
642
+
643
+ def PROPERTY(self, args):
644
+ return Purpose.PROPERTY
645
+
646
+ def HASH_TYPE(self, args):
647
+ return args.value
648
+
649
+ @v_args(meta=True)
650
+ def prop_ident(self, meta: Meta, args) -> Tuple[List[Concept], str]:
651
+ return [self.environment.concepts[grain] for grain in args[:-1]], args[-1]
652
+
653
+ @v_args(meta=True)
654
+ def concept_property_declaration(self, meta: Meta, args) -> Concept:
655
+ unique = False
656
+ if not args[0] == Purpose.PROPERTY:
657
+ unique = True
658
+ args = args[1:]
659
+ metadata = Metadata()
660
+ modifiers = []
661
+ for arg in args:
662
+ if isinstance(arg, Metadata):
663
+ metadata = arg
664
+ if isinstance(arg, Modifier):
665
+ modifiers.append(arg)
666
+
667
+ declaration = args[1]
668
+ if isinstance(declaration, (tuple)):
669
+ parents, name = declaration
670
+ if "." in name:
671
+ namespace, name = name.split(".", 1)
672
+ else:
673
+ namespace = self.environment.namespace or DEFAULT_NAMESPACE
674
+ else:
675
+ if "." not in declaration:
676
+ raise ParseError(
677
+ f"Property declaration {args[1]} must be fully qualified with a parent key"
678
+ )
679
+ grain, name = declaration.rsplit(".", 1)
680
+ parent = self.environment.concepts[grain]
681
+ parents = [parent]
682
+ namespace = parent.namespace
683
+ concept = Concept(
684
+ name=name,
685
+ datatype=args[2],
686
+ purpose=Purpose.PROPERTY if not unique else Purpose.UNIQUE_PROPERTY,
687
+ metadata=metadata,
688
+ grain=Grain(components={x.address for x in parents}),
689
+ namespace=namespace,
690
+ keys=set([x.address for x in parents]),
691
+ modifiers=modifiers,
692
+ )
693
+
694
+ self.environment.add_concept(concept, meta)
695
+ return concept
696
+
697
+ @v_args(meta=True)
698
+ def concept_declaration(self, meta: Meta, args) -> ConceptDeclarationStatement:
699
+ metadata = Metadata()
700
+ modifiers = []
701
+ purpose = args[0]
702
+ datatype = args[2]
703
+ for arg in args:
704
+ if isinstance(arg, Metadata):
705
+ metadata = arg
706
+ if isinstance(arg, Modifier):
707
+ modifiers.append(arg)
708
+ name = args[1]
709
+ _, namespace, name, _ = parse_concept_reference(name, self.environment)
710
+ if purpose == Purpose.PARAMETER:
711
+ value = self.environment.parameters.get(name, None)
712
+ if not value:
713
+ raise MissingParameterException(
714
+ f'This script requires parameter "{name}" to be set in environment.'
715
+ )
716
+ if datatype == DataType.INTEGER:
717
+ value = int(value)
718
+ elif datatype == DataType.FLOAT:
719
+ value = float(value)
720
+ elif datatype == DataType.BOOL:
721
+ value = bool(value)
722
+ elif datatype == DataType.STRING:
723
+ value = str(value)
724
+ elif datatype == DataType.DATE:
725
+ if isinstance(value, date):
726
+ value = value
727
+ else:
728
+ value = date.fromisoformat(value)
729
+ elif datatype == DataType.DATETIME:
730
+ if isinstance(value, datetime):
731
+ value = value
732
+ else:
733
+ value = datetime.fromisoformat(value)
734
+ else:
735
+ raise ParseError(
736
+ f"Unsupported datatype {datatype} for parameter {name}."
737
+ )
738
+ rval = self.constant_derivation(
739
+ meta, [Purpose.CONSTANT, name, value, metadata]
740
+ )
741
+ return rval
742
+
743
+ concept = Concept(
744
+ name=name,
745
+ datatype=datatype,
746
+ purpose=purpose,
747
+ metadata=metadata,
748
+ namespace=namespace,
749
+ modifiers=modifiers,
750
+ derivation=Derivation.ROOT,
751
+ granularity=Granularity.MULTI_ROW,
752
+ )
753
+ if concept.metadata:
754
+ concept.metadata.line_number = meta.line
755
+ self.environment.add_concept(concept, meta=meta)
756
+ return ConceptDeclarationStatement(concept=concept)
757
+
758
+ @v_args(meta=True)
759
+ def concept_derivation(self, meta: Meta, args) -> ConceptDerivationStatement:
760
+
761
+ if len(args) > 3:
762
+ metadata = args[3]
763
+ else:
764
+ metadata = None
765
+ purpose = args[0]
766
+ raw_name = args[1]
767
+ # abc.def.property pattern
768
+ if isinstance(raw_name, str):
769
+ lookup, namespace, name, parent_concept = parse_concept_reference(
770
+ raw_name, self.environment, purpose
771
+ )
772
+ # <abc.def,zef.gf>.property pattern
773
+ else:
774
+ keys, name = raw_name
775
+ keys = [x.address for x in keys]
776
+ namespaces = set([x.rsplit(".", 1)[0] for x in keys])
777
+ if not len(namespaces) == 1:
778
+ namespace = self.environment.namespace or DEFAULT_NAMESPACE
779
+ else:
780
+ namespace = namespaces.pop()
781
+ source_value = args[2]
782
+ # we need to strip off every parenthetical to see what is being assigned.
783
+ while isinstance(source_value, Parenthetical):
784
+ source_value = source_value.content
785
+
786
+ if isinstance(
787
+ source_value,
788
+ (
789
+ FilterItem,
790
+ WindowItem,
791
+ AggregateWrapper,
792
+ Function,
793
+ FunctionCallWrapper,
794
+ Comparison,
795
+ ),
796
+ ):
797
+ concept = arbitrary_to_concept(
798
+ source_value,
799
+ name=name,
800
+ namespace=namespace,
801
+ environment=self.environment,
802
+ metadata=metadata,
803
+ )
804
+
805
+ # let constant purposes exist to support round-tripping
806
+ # as a build concept may end up with a constant based on constant inlining happening recursively
807
+ if purpose == Purpose.KEY and concept.purpose != Purpose.KEY:
808
+ concept.purpose = Purpose.KEY
809
+ elif (
810
+ purpose
811
+ and purpose != Purpose.AUTO
812
+ and concept.purpose != purpose
813
+ and purpose != Purpose.CONSTANT
814
+ ):
815
+ raise SyntaxError(
816
+ f'Concept {name} purpose {concept.purpose} does not match declared purpose {purpose}. Suggest defaulting to "auto"'
817
+ )
818
+
819
+ if concept.metadata:
820
+ concept.metadata.line_number = meta.line
821
+ self.environment.add_concept(concept, meta=meta)
822
+ return ConceptDerivationStatement(concept=concept)
823
+
824
+ elif isinstance(source_value, CONSTANT_TYPES):
825
+ concept = constant_to_concept(
826
+ source_value,
827
+ name=name,
828
+ namespace=namespace,
829
+ metadata=metadata,
830
+ )
831
+ if concept.metadata:
832
+ concept.metadata.line_number = meta.line
833
+ self.environment.add_concept(concept, meta=meta)
834
+ return ConceptDerivationStatement(concept=concept)
835
+
836
+ raise SyntaxError(
837
+ f"Received invalid type {type(args[2])} {args[2]} as input to concept derivation: `{self.text_lookup[self.token_address][meta.start_pos:meta.end_pos]}`"
838
+ )
839
+
840
+ @v_args(meta=True)
841
+ def rowset_derivation_statement(
842
+ self, meta: Meta, args
843
+ ) -> RowsetDerivationStatement:
844
+ name = args[0]
845
+ select: SelectStatement | MultiSelectStatement = args[1]
846
+ output = RowsetDerivationStatement(
847
+ name=name,
848
+ select=select,
849
+ namespace=self.environment.namespace or DEFAULT_NAMESPACE,
850
+ )
851
+
852
+ for new_concept in rowset_to_concepts(output, self.environment):
853
+ if new_concept.metadata:
854
+ new_concept.metadata.line_number = meta.line
855
+ self.environment.add_concept(new_concept, force=True)
856
+
857
+ self.environment.add_rowset(
858
+ output.name, output.select.as_lineage(self.environment)
859
+ )
860
+ return output
861
+
862
+ @v_args(meta=True)
863
+ def constant_derivation(
864
+ self, meta: Meta, args: tuple[Purpose, str, Any, Optional[Metadata]]
865
+ ) -> Concept:
866
+
867
+ if len(args) > 3:
868
+ metadata = args[3]
869
+ else:
870
+ metadata = None
871
+ name = args[1]
872
+ constant: Union[str, float, int, bool, MapWrapper, ListWrapper] = args[2]
873
+ lookup, namespace, name, parent = parse_concept_reference(
874
+ name, self.environment
875
+ )
876
+ concept = Concept(
877
+ name=name,
878
+ datatype=arg_to_datatype(constant),
879
+ purpose=Purpose.CONSTANT,
880
+ metadata=Metadata(line_number=meta.line) if not metadata else metadata,
881
+ lineage=Function(
882
+ operator=FunctionType.CONSTANT,
883
+ output_datatype=arg_to_datatype(constant),
884
+ output_purpose=Purpose.CONSTANT,
885
+ arguments=[constant],
886
+ ),
887
+ grain=Grain(components=set()),
888
+ namespace=namespace,
889
+ granularity=Granularity.SINGLE_ROW,
890
+ )
891
+ if concept.metadata:
892
+ concept.metadata.line_number = meta.line
893
+ self.environment.add_concept(concept, meta)
894
+ return concept
895
+
896
+ @v_args(meta=True)
897
+ def concept(self, meta: Meta, args) -> ConceptDeclarationStatement:
898
+ if isinstance(args[0], Concept):
899
+ concept: Concept = args[0]
900
+ else:
901
+ concept = args[0].concept
902
+ if concept.metadata:
903
+ concept.metadata.line_number = meta.line
904
+ return ConceptDeclarationStatement(concept=concept)
905
+
906
+ def column_assignment_list(self, args):
907
+ return args
908
+
909
+ def column_list(self, args) -> List:
910
+ return args
911
+
912
+ def grain_clause(self, args) -> Grain:
913
+ return Grain(
914
+ components=set([self.environment.concepts[a].address for a in args[0]])
915
+ )
916
+
917
+ @v_args(meta=True)
918
+ def aggregate_by(self, meta: Meta, args):
919
+ base = args[0]
920
+ b_concept = base.value.split(" ")[-1]
921
+ args = [self.environment.concepts[a] for a in [b_concept] + args[1:]]
922
+ return self.function_factory.create_function(args, FunctionType.GROUP, meta)
923
+
924
+ def whole_grain_clause(self, args) -> WholeGrainWrapper:
925
+ return WholeGrainWrapper(where=args[0])
926
+
927
+ def MULTILINE_STRING(self, args) -> str:
928
+ return args[3:-3]
929
+
930
+ def raw_column_assignment(self, args):
931
+ return RawColumnExpr(text=args[1])
932
+
933
+ def DATASOURCE_STATUS(self, args) -> DatasourceState:
934
+ return DatasourceState(args.value.lower())
935
+
936
+ @v_args(meta=True)
937
+ def datasource_status_clause(self, meta: Meta, args):
938
+ return args[1]
939
+
940
+ @v_args(meta=True)
941
+ def datasource_partition_clause(self, meta: Meta, args):
942
+ return DatasourcePartitionClause([ConceptRef(address=arg) for arg in args[0]])
943
+
944
+ @v_args(meta=True)
945
+ def datasource_increment_clause(self, meta: Meta, args):
946
+ return DatasourceIncrementalClause([ConceptRef(address=arg) for arg in args[0]])
947
+
948
+ @v_args(meta=True)
949
+ def datasource(self, meta: Meta, args):
950
+ name = args[0]
951
+ columns: List[ColumnAssignment] = args[1]
952
+ grain: Optional[Grain] = None
953
+ address: Optional[Address] = None
954
+ where: Optional[WhereClause] = None
955
+ non_partial_for: Optional[WhereClause] = None
956
+ incremental_by: List[ConceptRef] = []
957
+ partition_by: List[ConceptRef] = []
958
+ datasource_status: DatasourceState = DatasourceState.PUBLISHED
959
+ for val in args[1:]:
960
+ if isinstance(val, Address):
961
+ address = val
962
+ elif isinstance(val, Grain):
963
+ grain = val
964
+ elif isinstance(val, WholeGrainWrapper):
965
+ non_partial_for = val.where
966
+ elif isinstance(val, Query):
967
+ address = Address(location=f"({val.text})", is_query=True)
968
+ elif isinstance(val, WhereClause):
969
+ where = val
970
+ elif isinstance(val, DatasourceState):
971
+ datasource_status = val
972
+ elif isinstance(val, DatasourceIncrementalClause):
973
+ incremental_by = val.columns
974
+ elif isinstance(val, DatasourcePartitionClause):
975
+ partition_by = val.columns
976
+ if not address:
977
+ raise ValueError(
978
+ "Malformed datasource, missing address or query declaration"
979
+ )
980
+
981
+ datasource = Datasource(
982
+ name=name,
983
+ columns=columns,
984
+ # grain will be set by default from args
985
+ # TODO: move to factory
986
+ grain=grain, # type: ignore
987
+ address=address,
988
+ namespace=self.environment.namespace,
989
+ where=where,
990
+ non_partial_for=non_partial_for,
991
+ status=datasource_status,
992
+ incremental_by=incremental_by,
993
+ partition_by=partition_by,
994
+ )
995
+ if datasource.where:
996
+ for x in datasource.where.concept_arguments:
997
+ if x.address not in datasource.output_concepts:
998
+ raise ValueError(
999
+ f"Datasource {name} where condition depends on concept {x.address} that does not exist on the datasource, line {meta.line}."
1000
+ )
1001
+ if self.parse_pass == ParsePass.VALIDATION:
1002
+ self.environment.add_datasource(datasource, meta=meta)
1003
+ # if we have any foreign keys on the datasource, we can
1004
+ # at this point optimize them to properties if they do not have other usage.
1005
+ for column in columns:
1006
+ # skip partial for now
1007
+ if not grain:
1008
+ continue
1009
+ if column.concept.address in grain.components:
1010
+ continue
1011
+ target_c = self.environment.concepts[column.concept.address]
1012
+ if target_c.purpose != Purpose.KEY:
1013
+ continue
1014
+
1015
+ key_inputs = grain.components
1016
+ eligible = True
1017
+ for key in key_inputs:
1018
+ # never overwrite a key with a dependency on a property
1019
+ # for example - binding a datasource with a grain of <x>.fun should
1020
+ # never override the grain of x to <fun>
1021
+ if column.concept.address in (
1022
+ self.environment.concepts[key].keys or set()
1023
+ ):
1024
+ eligible = False
1025
+ if not eligible:
1026
+ continue
1027
+ keys = [self.environment.concepts[grain] for grain in key_inputs]
1028
+ # target_c.purpose = Purpose.PROPERTY
1029
+ target_c.keys = set([x.address for x in keys])
1030
+ # target_c.grain = Grain(components={x.address for x in keys})
1031
+
1032
+ return datasource
1033
+
1034
+ @v_args(meta=True)
1035
+ def comment(self, meta: Meta, args):
1036
+ assert len(args) == 1
1037
+ return Comment(text=args[0].value)
1038
+
1039
+ def PARSE_COMMENT(self, args):
1040
+ return Comment(text=args.value.rstrip())
1041
+
1042
+ @v_args(meta=True)
1043
+ def select_transform(self, meta: Meta, args) -> ConceptTransform:
1044
+ output: str = args[1]
1045
+ transformation = unwrap_transformation(args[0], self.environment)
1046
+ lookup, namespace, output, parent = parse_concept_reference(
1047
+ output, self.environment
1048
+ )
1049
+
1050
+ metadata = Metadata(line_number=meta.line, concept_source=ConceptSource.SELECT)
1051
+ concept = arbitrary_to_concept(
1052
+ transformation,
1053
+ environment=self.environment,
1054
+ namespace=namespace,
1055
+ name=output,
1056
+ metadata=metadata,
1057
+ )
1058
+ return ConceptTransform(function=transformation, output=concept)
1059
+
1060
+ @v_args(meta=True)
1061
+ def concept_nullable_modifier(self, meta: Meta, args) -> Modifier:
1062
+ return Modifier.NULLABLE
1063
+
1064
+ @v_args(meta=True)
1065
+ def select_hide_modifier(self, meta: Meta, args) -> Modifier:
1066
+ return Modifier.HIDDEN
1067
+
1068
+ @v_args(meta=True)
1069
+ def select_partial_modifier(self, meta: Meta, args) -> Modifier:
1070
+ return Modifier.PARTIAL
1071
+
1072
+ @v_args(meta=True)
1073
+ def select_item(self, meta: Meta, args) -> Optional[SelectItem]:
1074
+ modifiers = [arg for arg in args if isinstance(arg, Modifier)]
1075
+ args = [arg for arg in args if not isinstance(arg, (Modifier, Comment))]
1076
+
1077
+ if not args:
1078
+ return None
1079
+ if len(args) != 1:
1080
+ raise ParseError(
1081
+ "Malformed select statement"
1082
+ f" {args} {self.text_lookup[self.parse_address][meta.start_pos:meta.end_pos]}"
1083
+ )
1084
+ content = args[0]
1085
+ if isinstance(content, ConceptTransform):
1086
+ return SelectItem(content=content, modifiers=modifiers)
1087
+ return SelectItem(
1088
+ content=content,
1089
+ modifiers=modifiers,
1090
+ )
1091
+
1092
+ def select_list(self, args):
1093
+ return [arg for arg in args if arg]
1094
+
1095
+ def limit(self, args):
1096
+ return Limit(count=int(args[0].value))
1097
+
1098
+ def ordering(self, args: list[str]):
1099
+ base = args[0].lower()
1100
+ if len(args) > 1:
1101
+ null_sort = args[-1]
1102
+ return Ordering(" ".join([base, "nulls", null_sort.lower()]))
1103
+ return Ordering(base)
1104
+
1105
+ def order_list(self, args) -> List[OrderItem]:
1106
+ return [
1107
+ OrderItem(
1108
+ expr=x,
1109
+ order=y,
1110
+ )
1111
+ for x, y in zip(args[::2], args[1::2])
1112
+ ]
1113
+
1114
+ def order_by(self, args):
1115
+ return OrderBy(items=args[0])
1116
+
1117
+ def over_component(self, args):
1118
+ return ConceptRef(address=args[0].value.lstrip(",").strip())
1119
+
1120
+ def over_list(self, args):
1121
+ return [x for x in args]
1122
+
1123
+ def PUBLISH_ACTION(self, args) -> PublishAction:
1124
+ action = args.value.lower()
1125
+ if action == "publish":
1126
+ return PublishAction.PUBLISH
1127
+ elif action == "unpublish":
1128
+ return PublishAction.UNPUBLISH
1129
+ else:
1130
+ raise SyntaxError(f"Unknown publish action: {action}")
1131
+
1132
+ @v_args(meta=True)
1133
+ def publish_statement(self, meta: Meta, args) -> PublishStatement:
1134
+ targets = []
1135
+ scope = ValidationScope.DATASOURCES
1136
+ publish_action = PublishAction.PUBLISH
1137
+ for arg in args:
1138
+ if isinstance(arg, str):
1139
+ targets.append(arg)
1140
+ elif isinstance(arg, PublishAction):
1141
+ publish_action = arg
1142
+ elif isinstance(arg, ValidationScope):
1143
+ scope = arg
1144
+ if arg != ValidationScope.DATASOURCES:
1145
+ raise SyntaxError(
1146
+ f"Publishing is only supported for Datasources, got {arg} on line {meta.line}"
1147
+ )
1148
+ return PublishStatement(
1149
+ scope=scope,
1150
+ targets=targets,
1151
+ action=publish_action,
1152
+ )
1153
+
1154
+ def create_modifier_clause(self, args):
1155
+ token = args[0]
1156
+ if token.type == "CREATE_IF_NOT_EXISTS":
1157
+ return CreateMode.CREATE_IF_NOT_EXISTS
1158
+ elif token.type == "CREATE_OR_REPLACE":
1159
+ return CreateMode.CREATE_OR_REPLACE
1160
+
1161
+ @v_args(meta=True)
1162
+ def create_statement(self, meta: Meta, args) -> CreateStatement:
1163
+ targets = []
1164
+ scope = ValidationScope.DATASOURCES
1165
+ create_mode = CreateMode.CREATE
1166
+ for arg in args:
1167
+ if isinstance(arg, str):
1168
+ targets.append(arg)
1169
+ elif isinstance(arg, ValidationScope):
1170
+ scope = arg
1171
+ if arg != ValidationScope.DATASOURCES:
1172
+ raise SyntaxError(
1173
+ f"Creating is only supported for Datasources, got {arg} on line {meta.line}"
1174
+ )
1175
+ elif isinstance(arg, CreateMode):
1176
+ create_mode = arg
1177
+
1178
+ return CreateStatement(scope=scope, targets=targets, create_mode=create_mode)
1179
+
1180
+ def VALIDATE_SCOPE(self, args) -> ValidationScope:
1181
+ base: str = args.lower()
1182
+ if not base.endswith("s"):
1183
+ base += "s"
1184
+ return ValidationScope(base)
1185
+
1186
+ @v_args(meta=True)
1187
+ def validate_statement(self, meta: Meta, args) -> ValidateStatement:
1188
+ if len(args) > 1:
1189
+ scope = args[0]
1190
+ targets = args[1:]
1191
+ elif len(args) == 0:
1192
+ scope = ValidationScope.ALL
1193
+ targets = None
1194
+ else:
1195
+ scope = args[0]
1196
+ targets = None
1197
+ return ValidateStatement(
1198
+ scope=scope,
1199
+ targets=targets,
1200
+ )
1201
+
1202
+ @v_args(meta=True)
1203
+ def mock_statement(self, meta: Meta, args) -> MockStatement:
1204
+ return MockStatement(scope=args[0], targets=args[1:])
1205
+
1206
+ @v_args(meta=True)
1207
+ def merge_statement(self, meta: Meta, args) -> MergeStatementV2 | None:
1208
+ modifiers = []
1209
+ cargs: list[str] = []
1210
+ source_wildcard = None
1211
+ target_wildcard = None
1212
+ for arg in args:
1213
+ if isinstance(arg, Modifier):
1214
+ modifiers.append(arg)
1215
+ else:
1216
+ cargs.append(arg)
1217
+ source, target = cargs
1218
+ if source.endswith(".*"):
1219
+ if not target.endswith(".*"):
1220
+ raise ValueError("Invalid merge, source is wildcard, target is not")
1221
+ source_wildcard = source[:-2]
1222
+ target_wildcard = target[:-2]
1223
+ sources: list[Concept] = [
1224
+ v
1225
+ for k, v in self.environment.concepts.items()
1226
+ if v.namespace == source_wildcard
1227
+ ]
1228
+ targets: dict[str, Concept] = {}
1229
+ for x in sources:
1230
+ target = target_wildcard + "." + x.name
1231
+ if target in self.environment.concepts:
1232
+ targets[x.address] = self.environment.concepts[target]
1233
+ sources = [x for x in sources if x.address in targets]
1234
+ else:
1235
+ sources = [self.environment.concepts[source]]
1236
+ targets = {sources[0].address: self.environment.concepts[target]}
1237
+
1238
+ if self.parse_pass == ParsePass.VALIDATION:
1239
+ for source_c in sources:
1240
+ if isinstance(source_c, UndefinedConceptFull):
1241
+ raise SyntaxError(
1242
+ f"Cannot merge non-existent source concept {source_c.address} on line: {meta.line}"
1243
+ )
1244
+ new = MergeStatementV2(
1245
+ sources=sources,
1246
+ targets=targets,
1247
+ modifiers=modifiers,
1248
+ source_wildcard=source_wildcard,
1249
+ target_wildcard=target_wildcard,
1250
+ )
1251
+ for source_c in new.sources:
1252
+ self.environment.merge_concept(
1253
+ source_c, targets[source_c.address], modifiers
1254
+ )
1255
+
1256
+ return new
1257
+ return None
1258
+
1259
+ @v_args(meta=True)
1260
+ def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
1261
+ statement = RawSQLStatement(meta=Metadata(line_number=meta.line), text=args[0])
1262
+ return statement
1263
+
1264
+ def COPY_TYPE(self, args) -> IOType:
1265
+ return IOType(args.value)
1266
+
1267
+ @v_args(meta=True)
1268
+ def copy_statement(self, meta: Meta, args) -> CopyStatement:
1269
+ return CopyStatement(
1270
+ target=args[1],
1271
+ target_type=args[0],
1272
+ meta=Metadata(line_number=meta.line),
1273
+ select=args[-1],
1274
+ )
1275
+
1276
+ def resolve_import_address(self, address: str, is_stdlib: bool = False) -> str:
1277
+ if (
1278
+ isinstance(
1279
+ self.environment.config.import_resolver, FileSystemImportResolver
1280
+ )
1281
+ or is_stdlib
1282
+ ):
1283
+ with open(address, "r", encoding="utf-8") as f:
1284
+ text = f.read()
1285
+ elif isinstance(self.environment.config.import_resolver, DictImportResolver):
1286
+ lookup = address
1287
+ if lookup not in self.environment.config.import_resolver.content:
1288
+ raise ImportError(
1289
+ f"Unable to import file {lookup}, not resolvable from provided source files."
1290
+ )
1291
+ text = self.environment.config.import_resolver.content[lookup]
1292
+ else:
1293
+ raise ImportError(
1294
+ f"Unable to import file {address}, resolver type {type(self.environment.config.import_resolver)} not supported"
1295
+ )
1296
+ return text
1297
+
1298
+ def IMPORT_DOT(self, args) -> str:
1299
+ return "."
1300
+
1301
+ def import_statement(self, args: list[str]) -> ImportStatement:
1302
+ start = datetime.now()
1303
+ is_file_resolver = isinstance(
1304
+ self.environment.config.import_resolver, FileSystemImportResolver
1305
+ )
1306
+ parent_dirs = -1
1307
+ parsed_args = []
1308
+ for x in args:
1309
+ if x == ".":
1310
+ parent_dirs += 1
1311
+ else:
1312
+ parsed_args.append(x)
1313
+ parent_dirs = max(parent_dirs, 0)
1314
+ args = parsed_args
1315
+ if len(args) == 2:
1316
+ alias = args[-1]
1317
+ cache_key = args[-1]
1318
+ else:
1319
+ alias = self.environment.namespace
1320
+ cache_key = args[0]
1321
+ input_path = args[0]
1322
+ # lstrip off '.' from parent if they exist;
1323
+ # each one is an extra directory up after the first
1324
+
1325
+ path = input_path.split(".")
1326
+ is_stdlib = False
1327
+ if path[0] == "std":
1328
+ is_stdlib = True
1329
+ target = join(STDLIB_ROOT, *path) + ".preql"
1330
+ token_lookup: Path | str = Path(target)
1331
+ elif is_file_resolver:
1332
+ troot = Path(self.environment.working_path)
1333
+ if parent_dirs > 0:
1334
+ for _ in range(parent_dirs):
1335
+ troot = troot.parent
1336
+ target = join(troot, *path) + ".preql"
1337
+ # tokens + text are cached by path
1338
+ token_lookup = Path(target)
1339
+ elif isinstance(self.environment.config.import_resolver, DictImportResolver):
1340
+ target = ".".join(path)
1341
+ token_lookup = target
1342
+ else:
1343
+ raise NotImplementedError
1344
+
1345
+ # parser + env has to be cached by prior import path + current key
1346
+ key_path = self.import_keys + [cache_key]
1347
+ cache_lookup = "-".join(key_path)
1348
+
1349
+ # we don't iterate past the max parse depth
1350
+ if len(key_path) > MAX_PARSE_DEPTH:
1351
+ return ImportStatement(
1352
+ alias=alias, input_path=input_path, path=Path(target)
1353
+ )
1354
+
1355
+ if token_lookup in self.tokens:
1356
+ perf_logger.debug(f"\tTokens cached for {token_lookup}")
1357
+ raw_tokens = self.tokens[token_lookup]
1358
+ text = self.text_lookup[token_lookup]
1359
+ else:
1360
+ perf_logger.debug(f"\tTokens not cached for {token_lookup}, resolving")
1361
+ text = self.resolve_import_address(target, is_stdlib)
1362
+ self.text_lookup[token_lookup] = text
1363
+
1364
+ try:
1365
+ raw_tokens = PARSER.parse(text)
1366
+ except Exception as e:
1367
+ raise ImportError(
1368
+ f"Unable to import '{target}', parsing error: {e}"
1369
+ ) from e
1370
+ self.tokens[token_lookup] = raw_tokens
1371
+
1372
+ if cache_lookup in self.parsed:
1373
+ perf_logger.debug(f"\tEnvironment cached for {token_lookup}")
1374
+ nparser = self.parsed[cache_lookup]
1375
+ new_env = nparser.environment
1376
+ if nparser.parse_pass != ParsePass.VALIDATION:
1377
+ # nparser.transform(raw_tokens)
1378
+ second_pass_start = datetime.now()
1379
+ nparser.run_second_parse_pass()
1380
+ second_pass_end = datetime.now()
1381
+ perf_logger.debug(
1382
+ f"{second_pass_end - second_pass_start} seconds | Import {alias} key ({cache_key}) second pass took {second_pass_end - second_pass_start} to parse, {len(new_env.concepts)} concepts"
1383
+ )
1384
+ else:
1385
+ perf_logger.debug(f"\tParsing new for {token_lookup}")
1386
+ root = None
1387
+ if "." in str(token_lookup):
1388
+ root = str(token_lookup).rsplit(".", 1)[0]
1389
+ try:
1390
+ new_env = Environment(
1391
+ working_path=dirname(target),
1392
+ env_file_path=token_lookup,
1393
+ config=self.environment.config.copy_for_root(root=root),
1394
+ parameters=self.environment.parameters,
1395
+ )
1396
+ new_env.concepts.fail_on_missing = False
1397
+ self.parsed[self.parse_address] = self
1398
+ nparser = ParseToObjects(
1399
+ environment=new_env,
1400
+ parse_address=cache_lookup,
1401
+ token_address=token_lookup,
1402
+ parsed=self.parsed,
1403
+ tokens=self.tokens,
1404
+ text_lookup=self.text_lookup,
1405
+ import_keys=self.import_keys + [cache_key],
1406
+ parse_config=self.parse_config,
1407
+ )
1408
+ nparser.transform(raw_tokens)
1409
+ self.parsed[cache_lookup] = nparser
1410
+ except Exception as e:
1411
+ raise ImportError(
1412
+ f"Unable to import file {target}, parsing error: {e}"
1413
+ ) from e
1414
+
1415
+ parsed_path = Path(args[0])
1416
+ imps = ImportStatement(alias=alias, input_path=input_path, path=parsed_path)
1417
+
1418
+ self.environment.add_import(
1419
+ alias,
1420
+ new_env,
1421
+ Import(
1422
+ alias=alias,
1423
+ path=parsed_path,
1424
+ input_path=Path(target) if is_file_resolver else None,
1425
+ ),
1426
+ )
1427
+ end = datetime.now()
1428
+ perf_logger.debug(
1429
+ f"{end - start} seconds | Import {alias} key ({cache_key}) took to parse, {len(new_env.concepts)} concepts"
1430
+ )
1431
+ return imps
1432
+
1433
+ @v_args(meta=True)
1434
+ def show_category(self, meta: Meta, args) -> ShowCategory:
1435
+ return ShowCategory(args[0])
1436
+
1437
+ @v_args(meta=True)
1438
+ def show_statement(self, meta: Meta, args) -> ShowStatement:
1439
+ return ShowStatement(content=args[0])
1440
+
1441
+ @v_args(meta=True)
1442
+ def persist_partition_clause(self, meta: Meta, args) -> DatasourcePartitionClause:
1443
+ return DatasourcePartitionClause([ConceptRef(address=a) for a in args[0]])
1444
+
1445
+ @v_args(meta=True)
1446
+ def PERSIST_MODE(self, args) -> PersistMode:
1447
+ base = args.value.lower()
1448
+ if base == "persist":
1449
+ return PersistMode.OVERWRITE
1450
+ return PersistMode(base)
1451
+
1452
+ @v_args(meta=True)
1453
+ def auto_persist(self, meta: Meta, args) -> PersistStatement | None:
1454
+ if self.parse_pass != ParsePass.VALIDATION:
1455
+ return None
1456
+ persist_mode = args[0]
1457
+ target_name = args[1]
1458
+ where = args[2] if len(args) > 2 else None
1459
+
1460
+ if target_name not in self.environment.datasources:
1461
+ raise SyntaxError(
1462
+ f"Auto persist target datasource {target_name} does not exist in environment on line {meta.line}. Have {list(self.environment.datasources.keys())}"
1463
+ )
1464
+ target = self.environment.datasources[target_name]
1465
+ select: SelectStatement = SelectStatement.from_inputs(
1466
+ environment=self.environment,
1467
+ selection=[
1468
+ SelectItem(
1469
+ content=ConceptRef(address=col.concept.address),
1470
+ modifiers=[],
1471
+ )
1472
+ for col in target.columns
1473
+ ],
1474
+ where_clause=where,
1475
+ meta=Metadata(line_number=meta.line),
1476
+ )
1477
+ return PersistStatement(
1478
+ select=select,
1479
+ datasource=target,
1480
+ persist_mode=persist_mode,
1481
+ partition_by=target.incremental_by,
1482
+ meta=Metadata(line_number=meta.line),
1483
+ )
1484
+
1485
+ @v_args(meta=True)
1486
+ def full_persist(self, meta: Meta, args) -> PersistStatement | None:
1487
+ if self.parse_pass != ParsePass.VALIDATION:
1488
+ return None
1489
+ partition_clause = DatasourcePartitionClause([])
1490
+ labels = [x for x in args if isinstance(x, str)]
1491
+ for x in args:
1492
+ if isinstance(x, DatasourcePartitionClause):
1493
+ partition_clause = x
1494
+ if len(labels) == 2:
1495
+ identifier = labels[0]
1496
+ address = labels[1]
1497
+ else:
1498
+ identifier = labels[0]
1499
+ address = None
1500
+ target: Datasource | None = self.environment.datasources.get(identifier)
1501
+
1502
+ if not address and not target:
1503
+ raise SyntaxError(
1504
+ f'Append statement without concrete table address on line {meta.line} attempts to insert into datasource "{identifier}" that cannot be found in the environment. Add a physical address to create a new datasource, or check the name.'
1505
+ )
1506
+ elif target:
1507
+ address = target.safe_address
1508
+
1509
+ assert address is not None
1510
+
1511
+ modes = [x for x in args if isinstance(x, PersistMode)]
1512
+ mode = modes[0] if modes else PersistMode.OVERWRITE
1513
+ select: SelectStatement = [x for x in args if isinstance(x, SelectStatement)][0]
1514
+
1515
+ if mode == PersistMode.APPEND:
1516
+ if target is None:
1517
+ raise SyntaxError(
1518
+ f"Cannot append to non-existent datasource {identifier} on line {meta.line}."
1519
+ )
1520
+ new_datasource: Datasource = target
1521
+ if not new_datasource.partition_by == partition_clause.columns:
1522
+ raise SyntaxError(
1523
+ f"Cannot append to datasource {identifier} with different partitioning scheme then insert on line {meta.line}. Datasource partitioning: {new_datasource.partition_by}, insert partitioning: {partition_clause.columns if partition_clause else '[]'}"
1524
+ )
1525
+ if len(partition_clause.columns) > 1:
1526
+ raise NotImplementedError(
1527
+ "Incremental partition overwrites by more than 1 column are not yet supported."
1528
+ )
1529
+ for x in partition_clause.columns:
1530
+ concept = self.environment.concepts[x.address]
1531
+ if concept.output_datatype not in SUPPORTED_INCREMENTAL_TYPES:
1532
+ raise SyntaxError(
1533
+ f"Cannot incremental persist on concept {concept.address} of type {concept.output_datatype} on line {meta.line}."
1534
+ )
1535
+ elif target:
1536
+ new_datasource = target
1537
+ else:
1538
+ new_datasource = select.to_datasource(
1539
+ namespace=(
1540
+ self.environment.namespace
1541
+ if self.environment.namespace
1542
+ else DEFAULT_NAMESPACE
1543
+ ),
1544
+ name=identifier,
1545
+ address=Address(location=address),
1546
+ grain=select.grain,
1547
+ environment=self.environment,
1548
+ )
1549
+ return PersistStatement(
1550
+ select=select,
1551
+ datasource=new_datasource,
1552
+ persist_mode=mode,
1553
+ partition_by=partition_clause.columns if partition_clause else [],
1554
+ meta=Metadata(line_number=meta.line),
1555
+ )
1556
+
1557
+ @v_args(meta=True)
1558
+ def persist_statement(self, meta: Meta, args) -> PersistStatement:
1559
+ return args[0]
1560
+
1561
+ @v_args(meta=True)
1562
+ def align_item(self, meta: Meta, args) -> AlignItem:
1563
+ return AlignItem(
1564
+ alias=args[0],
1565
+ namespace=self.environment.namespace,
1566
+ concepts=[self.environment.concepts[arg].reference for arg in args[1:]],
1567
+ )
1568
+
1569
+ @v_args(meta=True)
1570
+ def align_clause(self, meta: Meta, args) -> AlignClause:
1571
+ return AlignClause(items=args)
1572
+
1573
+ @v_args(meta=True)
1574
+ def derive_item(self, meta: Meta, args) -> DeriveItem:
1575
+ return DeriveItem(
1576
+ expr=args[0], name=args[1], namespace=self.environment.namespace
1577
+ )
1578
+
1579
+ @v_args(meta=True)
1580
+ def derive_clause(self, meta: Meta, args) -> DeriveClause:
1581
+
1582
+ return DeriveClause(items=args)
1583
+
1584
+ @v_args(meta=True)
1585
+ def multi_select_statement(self, meta: Meta, args) -> MultiSelectStatement:
1586
+
1587
+ selects: list[SelectStatement] = []
1588
+ align: AlignClause | None = None
1589
+ limit: int | None = None
1590
+ order_by: OrderBy | None = None
1591
+ where: WhereClause | None = None
1592
+ having: HavingClause | None = None
1593
+ derive: DeriveClause | None = None
1594
+ for arg in args:
1595
+ if isinstance(arg, SelectStatement):
1596
+ selects.append(arg)
1597
+ elif isinstance(arg, Limit):
1598
+ limit = arg.count
1599
+ elif isinstance(arg, OrderBy):
1600
+ order_by = arg
1601
+ elif isinstance(arg, WhereClause):
1602
+ where = arg
1603
+ elif isinstance(arg, HavingClause):
1604
+ having = arg
1605
+ elif isinstance(arg, AlignClause):
1606
+ align = arg
1607
+ elif isinstance(arg, DeriveClause):
1608
+ derive = arg
1609
+
1610
+ assert align
1611
+ assert align is not None
1612
+
1613
+ derived_concepts = []
1614
+ new_selects = [x.as_lineage(self.environment) for x in selects]
1615
+ lineage = MultiSelectLineage(
1616
+ selects=new_selects,
1617
+ align=align,
1618
+ derive=derive,
1619
+ namespace=self.environment.namespace,
1620
+ where_clause=where,
1621
+ having_clause=having,
1622
+ limit=limit,
1623
+ hidden_components=set(y for x in new_selects for y in x.hidden_components),
1624
+ )
1625
+ for x in align.items:
1626
+ concept = align_item_to_concept(
1627
+ x,
1628
+ align,
1629
+ selects,
1630
+ where=where,
1631
+ having=having,
1632
+ limit=limit,
1633
+ environment=self.environment,
1634
+ )
1635
+ derived_concepts.append(concept)
1636
+ self.environment.add_concept(concept, meta=meta)
1637
+ if derive:
1638
+ for derived in derive.items:
1639
+ derivation = derived.expr
1640
+ name = derived.name
1641
+ if not isinstance(derivation, (Function, Comparison, WindowItem)):
1642
+ raise SyntaxError(
1643
+ f"Invalid derive expression {derivation} in {meta.line}, must be a function or conditional"
1644
+ )
1645
+ concept = derive_item_to_concept(
1646
+ derivation, name, lineage, self.environment.namespace
1647
+ )
1648
+ derived_concepts.append(concept)
1649
+ self.environment.add_concept(concept, meta=meta)
1650
+ multi = MultiSelectStatement(
1651
+ selects=selects,
1652
+ align=align,
1653
+ namespace=self.environment.namespace,
1654
+ where_clause=where,
1655
+ order_by=order_by,
1656
+ limit=limit,
1657
+ meta=Metadata(line_number=meta.line),
1658
+ derived_concepts=derived_concepts,
1659
+ derive=derive,
1660
+ )
1661
+ return multi
1662
+
1663
+ @v_args(meta=True)
1664
+ def select_statement(self, meta: Meta, args) -> SelectStatement:
1665
+ select_items: List[SelectItem] | None = None
1666
+ limit = None
1667
+ order_by = None
1668
+ where = None
1669
+ having = None
1670
+ for arg in args:
1671
+ if isinstance(arg, List):
1672
+ select_items = arg
1673
+ elif isinstance(arg, Limit):
1674
+ limit = arg.count
1675
+ elif isinstance(arg, OrderBy):
1676
+ order_by = arg
1677
+ elif isinstance(arg, WhereClause) and not isinstance(arg, HavingClause):
1678
+ if where is not None:
1679
+ raise ParseError(
1680
+ "Multiple where clauses defined are not supported!"
1681
+ )
1682
+ where = arg
1683
+ elif isinstance(arg, HavingClause):
1684
+ having = arg
1685
+ if not select_items:
1686
+ raise ParseError("Malformed select, missing select items")
1687
+ pre_keys = set(self.environment.concepts.keys())
1688
+ base = SelectStatement.from_inputs(
1689
+ environment=self.environment,
1690
+ selection=select_items,
1691
+ order_by=order_by,
1692
+ where_clause=where,
1693
+ having_clause=having,
1694
+ limit=limit,
1695
+ meta=Metadata(line_number=meta.line),
1696
+ )
1697
+ if (
1698
+ self.parse_pass == ParsePass.INITIAL
1699
+ and self.parse_config.strict_name_shadow_enforcement
1700
+ ):
1701
+ intersection = base.locally_derived.intersection(pre_keys)
1702
+ if intersection:
1703
+ for x in intersection:
1704
+ if str(base.local_concepts[x].lineage) == str(
1705
+ self.environment.concepts[x].lineage
1706
+ ):
1707
+ local = base.local_concepts[x]
1708
+ friendly_name = (
1709
+ local.name
1710
+ if local.namespace == DEFAULT_NAMESPACE
1711
+ else local.namespace
1712
+ )
1713
+ raise NameShadowError(
1714
+ f"Select statement {base} creates a new concept '{friendly_name}' with identical definition as the existing concept '{friendly_name}'. Replace {base.local_concepts[x].lineage} with a direct reference to {friendly_name}."
1715
+ )
1716
+ else:
1717
+ raise NameShadowError(
1718
+ f"Select statement {base} creates new named concepts from calculations {list(intersection)} with identical name(s) to existing concept(s). Use new unique names for these."
1719
+ )
1720
+ return base
1721
+
1722
+ @v_args(meta=True)
1723
+ def address(self, meta: Meta, args):
1724
+ return args[0]
1725
+
1726
+ @v_args(meta=True)
1727
+ def query(self, meta: Meta, args):
1728
+ return Query(text=args[0])
1729
+
1730
+ def where(self, args):
1731
+ root = args[0]
1732
+ root = expr_to_boolean(root, self.function_factory)
1733
+ return WhereClause(conditional=root)
1734
+
1735
+ def having(self, args):
1736
+ root = args[0]
1737
+ if not isinstance(root, (Comparison, Conditional, Parenthetical)):
1738
+ if arg_to_datatype(root) == DataType.BOOL:
1739
+ root = Comparison(left=root, right=True, operator=ComparisonOperator.EQ)
1740
+ else:
1741
+ root = Comparison(
1742
+ left=root,
1743
+ right=MagicConstants.NULL,
1744
+ operator=ComparisonOperator.IS_NOT,
1745
+ )
1746
+ return HavingClause(conditional=root)
1747
+
1748
+ @v_args(meta=True)
1749
+ def function_binding_list(self, meta: Meta, args) -> list[ArgBinding]:
1750
+ return args
1751
+
1752
+ @v_args(meta=True)
1753
+ def function_binding_type(self, meta: Meta, args) -> FunctionBindingType:
1754
+ return FunctionBindingType(type=args[0])
1755
+
1756
+ @v_args(meta=True)
1757
+ def function_binding_default(self, meta: Meta, args):
1758
+ return args[1]
1759
+
1760
+ @v_args(meta=True)
1761
+ def function_binding_item(self, meta: Meta, args) -> ArgBinding:
1762
+ default = None
1763
+ type = None
1764
+ for arg in args[1:]:
1765
+ if isinstance(arg, FunctionBindingType):
1766
+ type = arg.type
1767
+ else:
1768
+ default = arg
1769
+ return ArgBinding.model_construct(name=args[0], datatype=type, default=default)
1770
+
1771
+ @v_args(meta=True)
1772
+ def raw_function(self, meta: Meta, args) -> FunctionDeclaration:
1773
+ identity = args[0]
1774
+ function_arguments: list[ArgBinding] = args[1]
1775
+ output = args[2]
1776
+
1777
+ self.environment.functions[identity] = CustomFunctionFactory(
1778
+ function=output,
1779
+ namespace=self.environment.namespace,
1780
+ function_arguments=function_arguments,
1781
+ name=identity,
1782
+ )
1783
+ return FunctionDeclaration(name=identity, args=function_arguments, expr=output)
1784
+
1785
+ def custom_function(self, args) -> FunctionCallWrapper:
1786
+ name = args[0]
1787
+ args = args[1:]
1788
+ remapped = FunctionCallWrapper(
1789
+ content=self.environment.functions[name](*args), name=name, args=args
1790
+ )
1791
+
1792
+ return remapped
1793
+
1794
+ @v_args(meta=True)
1795
+ def function(self, meta: Meta, args) -> Function:
1796
+ return args[0]
1797
+
1798
+ @v_args(meta=True)
1799
+ def type_drop_clause(self, meta: Meta, args) -> DropOn:
1800
+ return DropOn([FunctionType(x) for x in args])
1801
+
1802
+ @v_args(meta=True)
1803
+ def type_add_clause(self, meta: Meta, args) -> AddOn:
1804
+ return AddOn([FunctionType(x) for x in args])
1805
+
1806
+ @v_args(meta=True)
1807
+ def type_declaration(self, meta: Meta, args) -> TypeDeclaration:
1808
+ key = args[0]
1809
+ datatype: list[DataType] = [x for x in args[1:] if isinstance(x, DataType)]
1810
+ if len(datatype) == 1:
1811
+ final_datatype: list[DataType] | DataType = datatype[0]
1812
+ else:
1813
+ final_datatype = datatype
1814
+ add_on = None
1815
+ drop_on = None
1816
+ for x in args[1:]:
1817
+ if isinstance(x, AddOn):
1818
+ add_on = x
1819
+ elif isinstance(x, DropOn):
1820
+ drop_on = x
1821
+ new = CustomType(
1822
+ name=key,
1823
+ type=final_datatype,
1824
+ drop_on=drop_on.functions if drop_on else [],
1825
+ add_on=add_on.functions if add_on else [],
1826
+ )
1827
+ self.environment.data_types[key] = new
1828
+ return TypeDeclaration(type=new)
1829
+
1830
+ def int_lit(self, args):
1831
+ return int("".join(args))
1832
+
1833
+ def bool_lit(self, args):
1834
+ return args[0].capitalize() == "True"
1835
+
1836
+ def null_lit(self, args):
1837
+ return NULL_VALUE
1838
+
1839
+ def float_lit(self, args):
1840
+ return float(args[0])
1841
+
1842
+ def array_lit(self, args):
1843
+ return list_to_wrapper(args)
1844
+
1845
+ def tuple_lit(self, args):
1846
+ return tuple_to_wrapper(args)
1847
+
1848
+ def string_lit(self, args) -> str:
1849
+ if not args:
1850
+ return ""
1851
+
1852
+ return args[0]
1853
+
1854
+ @v_args(meta=True)
1855
+ def struct_lit(self, meta, args):
1856
+ return self.function_factory.create_function(
1857
+ args, operator=FunctionType.STRUCT, meta=meta
1858
+ )
1859
+
1860
+ def map_lit(self, args):
1861
+ parsed = dict(zip(args[::2], args[1::2]))
1862
+ wrapped = dict_to_map_wrapper(parsed)
1863
+ return wrapped
1864
+
1865
+ def literal(self, args):
1866
+ return args[0]
1867
+
1868
+ def product_operator(self, args) -> Function | Any:
1869
+ if len(args) == 1:
1870
+ return args[0]
1871
+ result = args[0]
1872
+ for i in range(1, len(args), 2):
1873
+ new_result = None
1874
+ op = args[i]
1875
+ right = args[i + 1]
1876
+ if op == "*":
1877
+ new_result = self.function_factory.create_function(
1878
+ [result, right], operator=FunctionType.MULTIPLY
1879
+ )
1880
+ elif op == "**":
1881
+ new_result = self.function_factory.create_function(
1882
+ [result, right], operator=FunctionType.POWER
1883
+ )
1884
+ elif op == "/":
1885
+ new_result = self.function_factory.create_function(
1886
+ [result, right], operator=FunctionType.DIVIDE
1887
+ )
1888
+ elif op == "%":
1889
+ new_result = self.function_factory.create_function(
1890
+ [result, right], operator=FunctionType.MOD
1891
+ )
1892
+ else:
1893
+ raise ValueError(f"Unknown operator: {op}")
1894
+ result = new_result
1895
+ return new_result
1896
+
1897
+ def PLUS_OR_MINUS(self, args) -> str:
1898
+ return args.value
1899
+
1900
+ def MULTIPLY_DIVIDE_PERCENT(self, args) -> str:
1901
+ return args.value
1902
+
1903
+ @v_args(meta=True)
1904
+ def sum_operator(self, meta: Meta, args) -> Function | Any:
1905
+ if len(args) == 1:
1906
+ return args[0]
1907
+ result = args[0]
1908
+ for i in range(1, len(args), 2):
1909
+ new_result = None
1910
+ op = args[i].lower()
1911
+ right = args[i + 1]
1912
+ if op == "+":
1913
+ new_result = self.function_factory.create_function(
1914
+ [result, right], operator=FunctionType.ADD, meta=meta
1915
+ )
1916
+ elif op == "-":
1917
+ new_result = self.function_factory.create_function(
1918
+ [result, right], operator=FunctionType.SUBTRACT, meta=meta
1919
+ )
1920
+ elif op == "||":
1921
+ new_result = self.function_factory.create_function(
1922
+ [result, right], operator=FunctionType.CONCAT, meta=meta
1923
+ )
1924
+ elif op == "like":
1925
+ new_result = self.function_factory.create_function(
1926
+ [result, right], operator=FunctionType.LIKE, meta=meta
1927
+ )
1928
+ else:
1929
+ raise ValueError(f"Unknown operator: {op}")
1930
+ result = new_result
1931
+ return result
1932
+
1933
+ def comparison(self, args) -> Comparison:
1934
+ if len(args) == 1:
1935
+ return args[0]
1936
+ left = args[0]
1937
+ right = args[2]
1938
+ if args[1] in (ComparisonOperator.IN, ComparisonOperator.NOT_IN):
1939
+ return SubselectComparison(
1940
+ left=left,
1941
+ right=right,
1942
+ operator=args[1],
1943
+ )
1944
+ return Comparison(left=left, right=right, operator=args[1])
1945
+
1946
+ def between_comparison(self, args) -> Conditional:
1947
+ left_bound = args[1]
1948
+ right_bound = args[2]
1949
+ return Conditional(
1950
+ left=Comparison(
1951
+ left=args[0], right=left_bound, operator=ComparisonOperator.GTE
1952
+ ),
1953
+ right=Comparison(
1954
+ left=args[0], right=right_bound, operator=ComparisonOperator.LTE
1955
+ ),
1956
+ operator=BooleanOperator.AND,
1957
+ )
1958
+
1959
+ @v_args(meta=True)
1960
+ def subselect_comparison(self, meta: Meta, args) -> SubselectComparison:
1961
+ right = args[2]
1962
+
1963
+ while isinstance(right, Parenthetical) and isinstance(
1964
+ right.content,
1965
+ (
1966
+ Concept,
1967
+ Function,
1968
+ FilterItem,
1969
+ WindowItem,
1970
+ AggregateWrapper,
1971
+ ListWrapper,
1972
+ TupleWrapper,
1973
+ ),
1974
+ ):
1975
+ right = right.content
1976
+ if isinstance(right, (Function, FilterItem, WindowItem, AggregateWrapper)):
1977
+ right_concept = arbitrary_to_concept(right, environment=self.environment)
1978
+ self.environment.add_concept(right_concept, meta=meta)
1979
+ right = right_concept.reference
1980
+ return SubselectComparison(
1981
+ left=args[0],
1982
+ right=right,
1983
+ operator=args[1],
1984
+ )
1985
+
1986
+ def expr_tuple(self, args):
1987
+ datatypes = set([arg_to_datatype(x) for x in args])
1988
+ if len(datatypes) != 1:
1989
+ raise ParseError("Tuple must have same type for all elements")
1990
+ return TupleWrapper(val=tuple(args), type=datatypes.pop())
1991
+
1992
+ def parenthetical(self, args):
1993
+ return Parenthetical(content=args[0])
1994
+
1995
+ @v_args(meta=True)
1996
+ def condition_parenthetical(self, meta, args):
1997
+ if len(args) == 2:
1998
+ return Comparison(
1999
+ left=Parenthetical(content=args[1]),
2000
+ right=False,
2001
+ operator=ComparisonOperator.EQ,
2002
+ )
2003
+ return Parenthetical(content=args[0])
2004
+
2005
+ def conditional(self, args):
2006
+ def munch_args(args):
2007
+ while args:
2008
+ if len(args) == 1:
2009
+ return args[0]
2010
+ else:
2011
+ return Conditional(
2012
+ left=args[0], operator=args[1], right=munch_args(args[2:])
2013
+ )
2014
+
2015
+ return munch_args(args)
2016
+
2017
+ def window_order(self, args):
2018
+ return WindowOrder(args[0])
2019
+
2020
+ def window_order_by(self, args):
2021
+ # flatten tree
2022
+ return args[0]
2023
+
2024
+ def window(self, args):
2025
+
2026
+ return Window(count=args[1].value, window_order=args[0])
2027
+
2028
+ def WINDOW_TYPE(self, args):
2029
+ return WindowType(args.strip())
2030
+
2031
+ def window_item_over(self, args):
2032
+
2033
+ return WindowItemOver(contents=args[0])
2034
+
2035
+ def window_item_order(self, args):
2036
+ return WindowItemOrder(contents=args[0])
2037
+
2038
+ def logical_operator(self, args):
2039
+ return BooleanOperator(args[0].value.lower())
2040
+
2041
+ def DATE_PART(self, args):
2042
+ return DatePart(args.value)
2043
+
2044
+ @v_args(meta=True)
2045
+ def window_item(self, meta: Meta, args) -> WindowItem:
2046
+ type: WindowType = args[0]
2047
+ order_by = []
2048
+ over = []
2049
+ index = None
2050
+ concept: Concept | None = None
2051
+ for item in args:
2052
+ if isinstance(item, int):
2053
+ index = item
2054
+ elif isinstance(item, WindowItemOrder):
2055
+ order_by = item.contents
2056
+ elif isinstance(item, WindowItemOver):
2057
+ over = item.contents
2058
+ elif isinstance(item, str):
2059
+ concept = self.environment.concepts[item]
2060
+ elif isinstance(item, ConceptRef):
2061
+ concept = self.environment.concepts[item.address]
2062
+ elif isinstance(item, WindowType):
2063
+ type = item
2064
+ else:
2065
+ concept = arbitrary_to_concept(item, environment=self.environment)
2066
+ self.environment.add_concept(concept, meta=meta)
2067
+ if not concept:
2068
+ raise ParseError(
2069
+ f"Window statements must be on fields, not constants - error in: `{self.text_lookup[self.parse_address][meta.start_pos:meta.end_pos]}`"
2070
+ )
2071
+ return WindowItem(
2072
+ type=type,
2073
+ content=concept.reference,
2074
+ over=over,
2075
+ order_by=order_by,
2076
+ index=index,
2077
+ )
2078
+
2079
+ def filter_item(self, args) -> FilterItem:
2080
+ where: WhereClause
2081
+ expr, raw = args
2082
+ if isinstance(raw, WhereClause):
2083
+ where = raw
2084
+ else:
2085
+ where = WhereClause(conditional=expr_to_boolean(raw, self.function_factory))
2086
+ if isinstance(expr, str):
2087
+ expr = self.environment.concepts[expr].reference
2088
+ return FilterItem(content=expr, where=where)
2089
+
2090
+ # BEGIN FUNCTIONS
2091
+ @v_args(meta=True)
2092
+ def expr_reference(self, meta, args) -> Concept:
2093
+ return self.environment.concepts.__getitem__(args[0], meta.line)
2094
+
2095
+ def expr(self, args):
2096
+ if len(args) > 1:
2097
+ raise ParseError("Expression should have one child only.")
2098
+ return args[0]
2099
+
2100
+ def aggregate_over(self, args):
2101
+ return args[0]
2102
+
2103
+ def aggregate_all(self, args):
2104
+ return [
2105
+ ConceptRef(
2106
+ address=f"{INTERNAL_NAMESPACE}.{ALL_ROWS_CONCEPT}",
2107
+ datatype=DataType.INTEGER,
2108
+ )
2109
+ ]
2110
+
2111
+ def aggregate_functions(self, args):
2112
+ if len(args) == 2:
2113
+ return AggregateWrapper(function=args[0], by=args[1])
2114
+ return AggregateWrapper(function=args[0])
2115
+
2116
+ @v_args(meta=True)
2117
+ def index_access(self, meta, args):
2118
+ args = process_function_args(args, meta=meta, environment=self.environment)
2119
+ base = args[0]
2120
+ if base.datatype == DataType.MAP or isinstance(base.datatype, MapType):
2121
+ return self.function_factory.create_function(
2122
+ args, FunctionType.MAP_ACCESS, meta
2123
+ )
2124
+ return self.function_factory.create_function(
2125
+ args, FunctionType.INDEX_ACCESS, meta
2126
+ )
2127
+
2128
+ @v_args(meta=True)
2129
+ def map_key_access(self, meta, args):
2130
+ return self.function_factory.create_function(
2131
+ args, FunctionType.MAP_ACCESS, meta
2132
+ )
2133
+
2134
+ @v_args(meta=True)
2135
+ def attr_access(self, meta, args):
2136
+ return self.function_factory.create_function(
2137
+ args, FunctionType.ATTR_ACCESS, meta
2138
+ )
2139
+
2140
+ @v_args(meta=True)
2141
+ def fcoalesce(self, meta, args):
2142
+ return self.function_factory.create_function(args, FunctionType.COALESCE, meta)
2143
+
2144
+ @v_args(meta=True)
2145
+ def fnullif(self, meta, args):
2146
+ return self.function_factory.create_function(args, FunctionType.NULLIF, meta)
2147
+
2148
+ @v_args(meta=True)
2149
+ def frecurse_edge(self, meta, args):
2150
+ return self.function_factory.create_function(
2151
+ args, FunctionType.RECURSE_EDGE, meta
2152
+ )
2153
+
2154
+ @v_args(meta=True)
2155
+ def unnest(self, meta, args):
2156
+
2157
+ return self.function_factory.create_function(args, FunctionType.UNNEST, meta)
2158
+
2159
+ @v_args(meta=True)
2160
+ def count(self, meta, args):
2161
+ return self.function_factory.create_function(args, FunctionType.COUNT, meta)
2162
+
2163
+ @v_args(meta=True)
2164
+ def fgroup(self, meta, args):
2165
+ if len(args) == 2:
2166
+ fargs = [args[0]] + list(args[1])
2167
+ else:
2168
+ fargs = [args[0]]
2169
+ return self.function_factory.create_function(fargs, FunctionType.GROUP, meta)
2170
+
2171
+ @v_args(meta=True)
2172
+ def fabs(self, meta, args):
2173
+ return self.function_factory.create_function(args, FunctionType.ABS, meta)
2174
+
2175
+ @v_args(meta=True)
2176
+ def count_distinct(self, meta, args):
2177
+ return self.function_factory.create_function(
2178
+ args, FunctionType.COUNT_DISTINCT, meta
2179
+ )
2180
+
2181
+ @v_args(meta=True)
2182
+ def sum(self, meta, args):
2183
+ return self.function_factory.create_function(args, FunctionType.SUM, meta)
2184
+
2185
+ @v_args(meta=True)
2186
+ def array_agg(self, meta, args):
2187
+ return self.function_factory.create_function(args, FunctionType.ARRAY_AGG, meta)
2188
+
2189
+ @v_args(meta=True)
2190
+ def any(self, meta, args):
2191
+ return self.function_factory.create_function(args, FunctionType.ANY, meta)
2192
+
2193
+ @v_args(meta=True)
2194
+ def bool_and(self, meta, args):
2195
+ return self.function_factory.create_function(args, FunctionType.BOOL_AND, meta)
2196
+
2197
+ @v_args(meta=True)
2198
+ def bool_or(self, meta, args):
2199
+ return self.function_factory.create_function(args, FunctionType.BOOL_OR, meta)
2200
+
2201
+ @v_args(meta=True)
2202
+ def avg(self, meta, args):
2203
+ return self.function_factory.create_function(args, FunctionType.AVG, meta)
2204
+
2205
+ @v_args(meta=True)
2206
+ def max(self, meta, args):
2207
+ return self.function_factory.create_function(args, FunctionType.MAX, meta)
2208
+
2209
+ @v_args(meta=True)
2210
+ def min(self, meta, args):
2211
+ return self.function_factory.create_function(args, FunctionType.MIN, meta)
2212
+
2213
+ @v_args(meta=True)
2214
+ def len(self, meta, args):
2215
+ return self.function_factory.create_function(args, FunctionType.LENGTH, meta)
2216
+
2217
+ @v_args(meta=True)
2218
+ def fsplit(self, meta, args):
2219
+ return self.function_factory.create_function(args, FunctionType.SPLIT, meta)
2220
+
2221
+ @v_args(meta=True)
2222
+ def concat(self, meta, args):
2223
+ return self.function_factory.create_function(args, FunctionType.CONCAT, meta)
2224
+
2225
+ @v_args(meta=True)
2226
+ def union(self, meta, args):
2227
+ return self.function_factory.create_function(args, FunctionType.UNION, meta)
2228
+
2229
+ @v_args(meta=True)
2230
+ def like(self, meta, args):
2231
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
2232
+
2233
+ @v_args(meta=True)
2234
+ def alt_like(self, meta, args):
2235
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
2236
+
2237
+ @v_args(meta=True)
2238
+ def ilike(self, meta, args):
2239
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
2240
+
2241
+ @v_args(meta=True)
2242
+ def upper(self, meta, args):
2243
+ return self.function_factory.create_function(args, FunctionType.UPPER, meta)
2244
+
2245
+ @v_args(meta=True)
2246
+ def fstrpos(self, meta, args):
2247
+ return self.function_factory.create_function(args, FunctionType.STRPOS, meta)
2248
+
2249
+ @v_args(meta=True)
2250
+ def freplace(self, meta, args):
2251
+ return self.function_factory.create_function(args, FunctionType.REPLACE, meta)
2252
+
2253
+ @v_args(meta=True)
2254
+ def fcontains(self, meta, args):
2255
+ return self.function_factory.create_function(args, FunctionType.CONTAINS, meta)
2256
+
2257
+ @v_args(meta=True)
2258
+ def ftrim(self, meta, args):
2259
+ return self.function_factory.create_function(args, FunctionType.TRIM, meta)
2260
+
2261
+ @v_args(meta=True)
2262
+ def fhash(self, meta, args):
2263
+ return self.function_factory.create_function(args, FunctionType.HASH, meta)
2264
+
2265
+ @v_args(meta=True)
2266
+ def fsubstring(self, meta, args):
2267
+ return self.function_factory.create_function(args, FunctionType.SUBSTRING, meta)
2268
+
2269
+ @v_args(meta=True)
2270
+ def flower(self, meta, args):
2271
+ return self.function_factory.create_function(args, FunctionType.LOWER, meta)
2272
+
2273
+ @v_args(meta=True)
2274
+ def fregexp_contains(self, meta, args):
2275
+ return self.function_factory.create_function(
2276
+ args, FunctionType.REGEXP_CONTAINS, meta
2277
+ )
2278
+
2279
+ @v_args(meta=True)
2280
+ def fregexp_extract(self, meta, args):
2281
+ if len(args) == 2:
2282
+ # this is a magic value to represent the default behavior
2283
+ args.append(-1)
2284
+ return self.function_factory.create_function(
2285
+ args, FunctionType.REGEXP_EXTRACT, meta
2286
+ )
2287
+
2288
+ @v_args(meta=True)
2289
+ def fregexp_replace(self, meta, args):
2290
+ return self.function_factory.create_function(
2291
+ args, FunctionType.REGEXP_REPLACE, meta
2292
+ )
2293
+
2294
+ # date functions
2295
+ @v_args(meta=True)
2296
+ def fdate(self, meta, args):
2297
+ return self.function_factory.create_function(args, FunctionType.DATE, meta)
2298
+
2299
+ @v_args(meta=True)
2300
+ def fdate_trunc(self, meta, args):
2301
+ return self.function_factory.create_function(
2302
+ args, FunctionType.DATE_TRUNCATE, meta
2303
+ )
2304
+
2305
+ @v_args(meta=True)
2306
+ def fdate_part(self, meta, args):
2307
+ return self.function_factory.create_function(args, FunctionType.DATE_PART, meta)
2308
+
2309
+ @v_args(meta=True)
2310
+ def fdate_add(self, meta, args):
2311
+ return self.function_factory.create_function(args, FunctionType.DATE_ADD, meta)
2312
+
2313
+ @v_args(meta=True)
2314
+ def fdate_sub(self, meta, args):
2315
+ return self.function_factory.create_function(args, FunctionType.DATE_SUB, meta)
2316
+
2317
+ @v_args(meta=True)
2318
+ def fdate_diff(self, meta, args):
2319
+ return self.function_factory.create_function(args, FunctionType.DATE_DIFF, meta)
2320
+
2321
+ @v_args(meta=True)
2322
+ def fdatetime(self, meta, args):
2323
+ return self.function_factory.create_function(args, FunctionType.DATETIME, meta)
2324
+
2325
+ @v_args(meta=True)
2326
+ def ftimestamp(self, meta, args):
2327
+ return self.function_factory.create_function(args, FunctionType.TIMESTAMP, meta)
2328
+
2329
+ @v_args(meta=True)
2330
+ def fsecond(self, meta, args):
2331
+ return self.function_factory.create_function(args, FunctionType.SECOND, meta)
2332
+
2333
+ @v_args(meta=True)
2334
+ def fminute(self, meta, args):
2335
+ return self.function_factory.create_function(args, FunctionType.MINUTE, meta)
2336
+
2337
+ @v_args(meta=True)
2338
+ def fhour(self, meta, args):
2339
+ return self.function_factory.create_function(args, FunctionType.HOUR, meta)
2340
+
2341
+ @v_args(meta=True)
2342
+ def fday(self, meta, args):
2343
+ return self.function_factory.create_function(args, FunctionType.DAY, meta)
2344
+
2345
+ @v_args(meta=True)
2346
+ def fday_name(self, meta, args):
2347
+ return self.function_factory.create_function(args, FunctionType.DAY_NAME, meta)
2348
+
2349
+ @v_args(meta=True)
2350
+ def fday_of_week(self, meta, args):
2351
+ return self.function_factory.create_function(
2352
+ args, FunctionType.DAY_OF_WEEK, meta
2353
+ )
2354
+
2355
+ @v_args(meta=True)
2356
+ def fweek(self, meta, args):
2357
+ return self.function_factory.create_function(args, FunctionType.WEEK, meta)
2358
+
2359
+ @v_args(meta=True)
2360
+ def fmonth(self, meta, args):
2361
+ return self.function_factory.create_function(args, FunctionType.MONTH, meta)
2362
+
2363
+ @v_args(meta=True)
2364
+ def fmonth_name(self, meta, args):
2365
+ return self.function_factory.create_function(
2366
+ args, FunctionType.MONTH_NAME, meta
2367
+ )
2368
+
2369
+ @v_args(meta=True)
2370
+ def fquarter(self, meta, args):
2371
+ return self.function_factory.create_function(args, FunctionType.QUARTER, meta)
2372
+
2373
+ @v_args(meta=True)
2374
+ def fyear(self, meta, args):
2375
+ return self.function_factory.create_function(args, FunctionType.YEAR, meta)
2376
+
2377
+ def internal_fcast(self, meta, args) -> Function:
2378
+ args = process_function_args(args, meta=meta, environment=self.environment)
2379
+
2380
+ if isinstance(args[0], str):
2381
+
2382
+ processed: date | datetime | int | float | bool | str
2383
+ if args[1] == DataType.DATE:
2384
+ processed = date.fromisoformat(args[0])
2385
+ elif args[1] == DataType.DATETIME:
2386
+ processed = datetime.fromisoformat(args[0])
2387
+ elif args[1] == DataType.TIMESTAMP:
2388
+ processed = datetime.fromisoformat(args[0])
2389
+ elif args[1] == DataType.INTEGER:
2390
+ processed = int(args[0])
2391
+ elif args[1] == DataType.FLOAT:
2392
+ processed = float(args[0])
2393
+ elif args[1] == DataType.BOOL:
2394
+ processed = args[0].capitalize() == "True"
2395
+ elif args[1] == DataType.STRING:
2396
+ processed = args[0]
2397
+ else:
2398
+ raise SyntaxError(f"Invalid cast type {args[1]}")
2399
+ if isinstance(args[1], TraitDataType):
2400
+ return self.function_factory.create_function(
2401
+ [processed, args[1]], FunctionType.TYPED_CONSTANT, meta
2402
+ )
2403
+ return self.function_factory.create_function(
2404
+ [processed], FunctionType.CONSTANT, meta
2405
+ )
2406
+ return self.function_factory.create_function(args, FunctionType.CAST, meta)
2407
+
2408
+ @v_args(meta=True)
2409
+ def fdate_spine(self, meta, args) -> Function:
2410
+ return self.function_factory.create_function(
2411
+ args, FunctionType.DATE_SPINE, meta
2412
+ )
2413
+
2414
+ # utility functions
2415
+ @v_args(meta=True)
2416
+ def fcast(self, meta, args) -> Function:
2417
+ return self.internal_fcast(meta, args)
2418
+
2419
+ # math functions
2420
+ @v_args(meta=True)
2421
+ def fadd(self, meta, args) -> Function:
2422
+
2423
+ return self.function_factory.create_function(args, FunctionType.ADD, meta)
2424
+
2425
+ @v_args(meta=True)
2426
+ def fsub(self, meta, args) -> Function:
2427
+ return self.function_factory.create_function(args, FunctionType.SUBTRACT, meta)
2428
+
2429
+ @v_args(meta=True)
2430
+ def fmul(self, meta, args) -> Function:
2431
+ return self.function_factory.create_function(args, FunctionType.MULTIPLY, meta)
2432
+
2433
+ @v_args(meta=True)
2434
+ def fdiv(self, meta: Meta, args) -> Function:
2435
+ return self.function_factory.create_function(args, FunctionType.DIVIDE, meta)
2436
+
2437
+ @v_args(meta=True)
2438
+ def fmod(self, meta: Meta, args) -> Function:
2439
+ return self.function_factory.create_function(args, FunctionType.MOD, meta)
2440
+
2441
+ @v_args(meta=True)
2442
+ def fsqrt(self, meta: Meta, args) -> Function:
2443
+ return self.function_factory.create_function(args, FunctionType.SQRT, meta)
2444
+
2445
+ @v_args(meta=True)
2446
+ def frandom(self, meta: Meta, args) -> Function:
2447
+ return self.function_factory.create_function(args, FunctionType.RANDOM, meta)
2448
+
2449
+ @v_args(meta=True)
2450
+ def fround(self, meta, args) -> Function:
2451
+ if len(args) == 1:
2452
+ args.append(0)
2453
+ return self.function_factory.create_function(args, FunctionType.ROUND, meta)
2454
+
2455
+ @v_args(meta=True)
2456
+ def flog(self, meta, args) -> Function:
2457
+ if len(args) == 1:
2458
+ args.append(10)
2459
+ return self.function_factory.create_function(args, FunctionType.LOG, meta)
2460
+
2461
+ @v_args(meta=True)
2462
+ def ffloor(self, meta, args) -> Function:
2463
+ return self.function_factory.create_function(args, FunctionType.FLOOR, meta)
2464
+
2465
+ @v_args(meta=True)
2466
+ def fceil(self, meta, args) -> Function:
2467
+ return self.function_factory.create_function(args, FunctionType.CEIL, meta)
2468
+
2469
+ @v_args(meta=True)
2470
+ def fcase(self, meta, args: List[Union[CaseWhen, CaseElse]]) -> Function:
2471
+ return self.function_factory.create_function(args, FunctionType.CASE, meta)
2472
+
2473
+ @v_args(meta=True)
2474
+ def fcase_when(self, meta, args) -> CaseWhen:
2475
+ args = process_function_args(args, meta=meta, environment=self.environment)
2476
+ root = expr_to_boolean(args[0], self.function_factory)
2477
+ return CaseWhen(comparison=root, expr=args[1])
2478
+
2479
+ @v_args(meta=True)
2480
+ def fcase_else(self, meta, args) -> CaseElse:
2481
+ args = process_function_args(args, meta=meta, environment=self.environment)
2482
+ return CaseElse(expr=args[0])
2483
+
2484
+ @v_args(meta=True)
2485
+ def fcurrent_date(self, meta, args):
2486
+ return CurrentDate([])
2487
+
2488
+ @v_args(meta=True)
2489
+ def fcurrent_datetime(self, meta, args):
2490
+ return self.function_factory.create_function(
2491
+ args=[], operator=FunctionType.CURRENT_DATETIME, meta=meta
2492
+ )
2493
+
2494
+ @v_args(meta=True)
2495
+ def fcurrent_timestamp(self, meta, args):
2496
+ return self.function_factory.create_function(
2497
+ args=[], operator=FunctionType.CURRENT_TIMESTAMP, meta=meta
2498
+ )
2499
+
2500
+ @v_args(meta=True)
2501
+ def fnot(self, meta, args):
2502
+ if arg_to_datatype(args[0]) == DataType.BOOL:
2503
+ return Comparison(
2504
+ left=self.function_factory.create_function(
2505
+ [args[0], False], FunctionType.COALESCE, meta
2506
+ ),
2507
+ operator=ComparisonOperator.EQ,
2508
+ right=False,
2509
+ meta=meta,
2510
+ )
2511
+ return self.function_factory.create_function(args, FunctionType.IS_NULL, meta)
2512
+
2513
+ @v_args(meta=True)
2514
+ def fbool(self, meta, args):
2515
+ return self.function_factory.create_function(args, FunctionType.BOOL, meta)
2516
+
2517
+ @v_args(meta=True)
2518
+ def fmap_keys(self, meta, args):
2519
+ return self.function_factory.create_function(args, FunctionType.MAP_KEYS, meta)
2520
+
2521
+ @v_args(meta=True)
2522
+ def fmap_values(self, meta, args):
2523
+ return self.function_factory.create_function(
2524
+ args, FunctionType.MAP_VALUES, meta
2525
+ )
2526
+
2527
+ @v_args(meta=True)
2528
+ def farray_sum(self, meta, args):
2529
+ return self.function_factory.create_function(args, FunctionType.ARRAY_SUM, meta)
2530
+
2531
+ @v_args(meta=True)
2532
+ def fgenerate_array(self, meta, args):
2533
+ return self.function_factory.create_function(
2534
+ args, FunctionType.GENERATE_ARRAY, meta
2535
+ )
2536
+
2537
+ @v_args(meta=True)
2538
+ def farray_distinct(self, meta, args):
2539
+ return self.function_factory.create_function(
2540
+ args, FunctionType.ARRAY_DISTINCT, meta
2541
+ )
2542
+
2543
+ @v_args(meta=True)
2544
+ def farray_to_string(self, meta, args):
2545
+ return self.function_factory.create_function(
2546
+ args, FunctionType.ARRAY_TO_STRING, meta
2547
+ )
2548
+
2549
+ @v_args(meta=True)
2550
+ def farray_sort(self, meta, args):
2551
+ if len(args) == 1:
2552
+ # this is a magic value to represent the default behavior
2553
+ args = [args[0], Ordering.ASCENDING]
2554
+ return self.function_factory.create_function(
2555
+ args, FunctionType.ARRAY_SORT, meta
2556
+ )
2557
+
2558
+ @v_args(meta=True)
2559
+ def transform_lambda(self, meta, args):
2560
+ return self.environment.functions[args[0]]
2561
+
2562
+ @v_args(meta=True)
2563
+ def farray_transform(self, meta, args) -> Function:
2564
+ factory: CustomFunctionFactory = args[1]
2565
+ if not len(factory.function_arguments) == 1:
2566
+ raise InvalidSyntaxException(
2567
+ "Array transform function must have exactly one argument;"
2568
+ )
2569
+ array_type = arg_to_datatype(args[0])
2570
+ if not isinstance(array_type, ArrayType):
2571
+ raise InvalidSyntaxException(
2572
+ f"Array transform function must be applied to an array, not {array_type}"
2573
+ )
2574
+ return self.function_factory.create_function(
2575
+ [
2576
+ args[0],
2577
+ factory.function_arguments[0],
2578
+ factory(
2579
+ ArgBinding(
2580
+ name=factory.function_arguments[0].name,
2581
+ datatype=array_type.value_data_type,
2582
+ )
2583
+ ),
2584
+ ],
2585
+ FunctionType.ARRAY_TRANSFORM,
2586
+ meta,
2587
+ )
2588
+
2589
+ @v_args(meta=True)
2590
+ def farray_filter(self, meta, args) -> Function:
2591
+ factory: CustomFunctionFactory = args[1]
2592
+ if not len(factory.function_arguments) == 1:
2593
+ raise InvalidSyntaxException(
2594
+ "Array filter function must have exactly one argument;"
2595
+ )
2596
+ array_type = arg_to_datatype(args[0])
2597
+ if not isinstance(array_type, ArrayType):
2598
+ raise InvalidSyntaxException(
2599
+ f"Array filter function must be applied to an array, not {array_type}"
2600
+ )
2601
+ return self.function_factory.create_function(
2602
+ [
2603
+ args[0],
2604
+ factory.function_arguments[0],
2605
+ factory(
2606
+ ArgBinding(
2607
+ name=factory.function_arguments[0].name,
2608
+ datatype=array_type.value_data_type,
2609
+ )
2610
+ ),
2611
+ ],
2612
+ FunctionType.ARRAY_FILTER,
2613
+ meta,
2614
+ )
2615
+
2616
+
2617
+ def unpack_visit_error(e: VisitError, text: str | None = None):
2618
+ """This is required to get exceptions from imports, which would
2619
+ raise nested VisitErrors"""
2620
+ if isinstance(e.orig_exc, VisitError):
2621
+ unpack_visit_error(e.orig_exc, text)
2622
+ elif isinstance(e.orig_exc, (UndefinedConceptException, ImportError)):
2623
+ raise e.orig_exc
2624
+ elif isinstance(e.orig_exc, InvalidSyntaxException):
2625
+ raise e.orig_exc
2626
+ elif isinstance(e.orig_exc, (SyntaxError, TypeError)):
2627
+ if isinstance(e.obj, Tree):
2628
+ if text:
2629
+ extract = text[e.obj.meta.start_pos - 5 : e.obj.meta.end_pos + 5]
2630
+ raise InvalidSyntaxException(
2631
+ str(e.orig_exc)
2632
+ + " Raised when parsing rule: "
2633
+ + str(e.rule)
2634
+ + f' Line: {e.obj.meta.line} "...{extract}..."'
2635
+ )
2636
+ InvalidSyntaxException(
2637
+ str(e.orig_exc) + " in " + str(e.rule) + f" Line: {e.obj.meta.line}"
2638
+ )
2639
+ raise InvalidSyntaxException(str(e.orig_exc)).with_traceback(
2640
+ e.orig_exc.__traceback__
2641
+ )
2642
+ raise e.orig_exc
2643
+
2644
+
2645
+ def parse_text_raw(text: str, environment: Optional[Environment] = None):
2646
+ PARSER.parse(text)
2647
+
2648
+
2649
+ ERROR_CODES: dict[int, str] = {
2650
+ # 100 code are SQL compatability errors
2651
+ 101: "Using FROM keyword? Trilogy does not have a FROM clause (Datasource resolution is automatic).",
2652
+ # 200 codes relate to required explicit syntax (we could loosen these?)
2653
+ 201: 'Missing alias? Alias must be specified with "AS" - e.g. `SELECT x+1 AS y`',
2654
+ 202: "Missing closing semicolon? Statements must be terminated with a semicolon `;`.",
2655
+ 210: "Missing order direction? Order by must be explicit about direction - specify `asc` or `desc`.",
2656
+ }
2657
+
2658
+ DEFAULT_ERROR_SPAN: int = 30
2659
+
2660
+
2661
+ def inject_context_maker(pos: int, text: str, span: int = 40) -> str:
2662
+ """Returns a pretty string pinpointing the error in the text,
2663
+ with span amount of context characters around it.
2664
+
2665
+ Note:
2666
+ The parser doesn't hold a copy of the text it has to parse,
2667
+ so you have to provide it again
2668
+ """
2669
+
2670
+ start = max(pos - span, 0)
2671
+ end = pos + span
2672
+ if not isinstance(text, bytes):
2673
+
2674
+ before = text[start:pos].rsplit("\n", 1)[-1]
2675
+ after = text[pos:end].split("\n", 1)[0]
2676
+ rcap = ""
2677
+ # if it goes beyond the end of text, no ...
2678
+ # if it terminates on a space, no need for ...
2679
+ if after and not after[-1].isspace() and not (end > len(text)):
2680
+ rcap = "..."
2681
+ lcap = ""
2682
+ if start > 0 and not before[0].isspace():
2683
+ lcap = "..."
2684
+ lpad = " "
2685
+ rpad = " "
2686
+ if before.endswith(" "):
2687
+ lpad = ""
2688
+ if after.startswith(" "):
2689
+ rpad = ""
2690
+ return f"{lcap}{before}{lpad}???{rpad}{after}{rcap}"
2691
+ else:
2692
+ before = text[start:pos].rsplit(b"\n", 1)[-1]
2693
+ after = text[pos:end].split(b"\n", 1)[0]
2694
+ return (before + b" ??? " + after).decode("ascii", "backslashreplace")
2695
+
2696
+
2697
+ def parse_text(
2698
+ text: str,
2699
+ environment: Optional[Environment] = None,
2700
+ root: Path | None = None,
2701
+ parse_config: Parsing | None = None,
2702
+ ) -> Tuple[
2703
+ Environment,
2704
+ List[
2705
+ Datasource
2706
+ | ImportStatement
2707
+ | SelectStatement
2708
+ | PersistStatement
2709
+ | ShowStatement
2710
+ | RawSQLStatement
2711
+ | ValidateStatement
2712
+ | None
2713
+ ],
2714
+ ]:
2715
+ def _create_syntax_error(code: int, pos: int, text: str) -> InvalidSyntaxException:
2716
+ """Helper to create standardized syntax error with context."""
2717
+ return InvalidSyntaxException(
2718
+ f"Syntax [{code}]: "
2719
+ + ERROR_CODES[code]
2720
+ + "\nLocation:\n"
2721
+ + inject_context_maker(pos, text.replace("\n", " "), DEFAULT_ERROR_SPAN)
2722
+ )
2723
+
2724
+ def _create_generic_syntax_error(
2725
+ message: str, pos: int, text: str
2726
+ ) -> InvalidSyntaxException:
2727
+ """Helper to create generic syntax error with context."""
2728
+ return InvalidSyntaxException(
2729
+ message
2730
+ + "\nLocation:\n"
2731
+ + inject_context_maker(pos, text.replace("\n", " "), DEFAULT_ERROR_SPAN)
2732
+ )
2733
+
2734
+ def _handle_unexpected_token(e: UnexpectedToken, text: str) -> None:
2735
+ """Handle UnexpectedToken errors to make friendlier error messages."""
2736
+ # Handle ordering direction error
2737
+ pos = e.pos_in_stream or 0
2738
+ if e.interactive_parser.lexer_thread.state:
2739
+ last_token = e.interactive_parser.lexer_thread.state.last_token
2740
+ else:
2741
+ last_token = None
2742
+ if e.expected == {"ORDERING_DIRECTION"}:
2743
+ raise _create_syntax_error(210, pos, text)
2744
+
2745
+ # Handle FROM token error
2746
+ parsed_tokens = (
2747
+ [x.value for x in e.token_history if x] if e.token_history else []
2748
+ )
2749
+
2750
+ if parsed_tokens == ["FROM"]:
2751
+ raise _create_syntax_error(101, pos, text)
2752
+ # check if they are missing a semicolon
2753
+ if last_token and e.token.type == "$END":
2754
+ try:
2755
+
2756
+ e.interactive_parser.feed_token(Token("_TERMINATOR", ";"))
2757
+ state = e.interactive_parser.lexer_thread.state
2758
+ if state and state.last_token:
2759
+ new_pos = state.last_token.end_pos or pos
2760
+ else:
2761
+ new_pos = pos
2762
+ raise _create_syntax_error(202, new_pos, text)
2763
+ except UnexpectedToken:
2764
+ pass
2765
+ # check if they forgot an as
2766
+ try:
2767
+ e.interactive_parser.feed_token(Token("AS", "AS"))
2768
+ state = e.interactive_parser.lexer_thread.state
2769
+ if state and state.last_token:
2770
+ new_pos = state.last_token.end_pos or pos
2771
+ else:
2772
+ new_pos = pos
2773
+ e.interactive_parser.feed_token(Token("IDENTIFIER", e.token.value))
2774
+ raise _create_syntax_error(201, new_pos, text)
2775
+ except UnexpectedToken:
2776
+ pass
2777
+
2778
+ # Default UnexpectedToken handling
2779
+ raise _create_generic_syntax_error(str(e), pos, text)
2780
+
2781
+ environment = environment or (
2782
+ Environment(working_path=root) if root else Environment()
2783
+ )
2784
+ parser = ParseToObjects(
2785
+ environment=environment, import_keys=["root"], parse_config=parse_config
2786
+ )
2787
+ start = datetime.now()
2788
+
2789
+ try:
2790
+ parser.set_text(text)
2791
+ # disable fail on missing to allow for circular dependencies
2792
+ parser.prepare_parse()
2793
+ parser.transform(PARSER.parse(text))
2794
+ # this will reset fail on missing
2795
+ pass_two = parser.run_second_parse_pass()
2796
+ output = [v for v in pass_two if v]
2797
+ environment.concepts.fail_on_missing = True
2798
+ end = datetime.now()
2799
+ perf_logger.debug(
2800
+ f"Parse time: {end - start} for {len(text)} characters, {len(output)} objects"
2801
+ )
2802
+ except VisitError as e:
2803
+ unpack_visit_error(e, text)
2804
+ # this will never be reached
2805
+ raise e
2806
+ except UnexpectedToken as e:
2807
+ _handle_unexpected_token(e, text)
2808
+ except (UnexpectedCharacters, UnexpectedEOF, UnexpectedInput) as e:
2809
+ raise _create_generic_syntax_error(str(e), e.pos_in_stream or 0, text)
2810
+ except (ValidationError, TypeError) as e:
2811
+ raise InvalidSyntaxException(str(e))
2812
+
2813
+ return environment, output