pytrilogy 0.3.149__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp313-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.149.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.149.dist-info/RECORD +207 -0
  6. pytrilogy-0.3.149.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.149.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.149.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2670 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +436 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +846 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1432 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +397 -0
  117. trilogy/dialect/enums.py +151 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/exceptions.py +26 -0
  130. trilogy/execution/state/file_state_store.py +0 -0
  131. trilogy/execution/state/sqllite_state_store.py +0 -0
  132. trilogy/execution/state/state_store.py +406 -0
  133. trilogy/executor.py +692 -0
  134. trilogy/hooks/__init__.py +4 -0
  135. trilogy/hooks/base_hook.py +40 -0
  136. trilogy/hooks/graph_hook.py +135 -0
  137. trilogy/hooks/query_debugger.py +166 -0
  138. trilogy/metadata/__init__.py +0 -0
  139. trilogy/parser.py +10 -0
  140. trilogy/parsing/README.md +21 -0
  141. trilogy/parsing/__init__.py +0 -0
  142. trilogy/parsing/common.py +1069 -0
  143. trilogy/parsing/config.py +5 -0
  144. trilogy/parsing/exceptions.py +8 -0
  145. trilogy/parsing/helpers.py +1 -0
  146. trilogy/parsing/parse_engine.py +2876 -0
  147. trilogy/parsing/render.py +775 -0
  148. trilogy/parsing/trilogy.lark +546 -0
  149. trilogy/py.typed +0 -0
  150. trilogy/render.py +45 -0
  151. trilogy/scripts/README.md +9 -0
  152. trilogy/scripts/__init__.py +0 -0
  153. trilogy/scripts/agent.py +41 -0
  154. trilogy/scripts/agent_info.py +306 -0
  155. trilogy/scripts/common.py +432 -0
  156. trilogy/scripts/dependency/Cargo.lock +617 -0
  157. trilogy/scripts/dependency/Cargo.toml +39 -0
  158. trilogy/scripts/dependency/README.md +131 -0
  159. trilogy/scripts/dependency/build.sh +25 -0
  160. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  161. trilogy/scripts/dependency/src/lib.rs +16 -0
  162. trilogy/scripts/dependency/src/main.rs +770 -0
  163. trilogy/scripts/dependency/src/parser.rs +435 -0
  164. trilogy/scripts/dependency/src/preql.pest +208 -0
  165. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  166. trilogy/scripts/dependency/src/resolver.rs +716 -0
  167. trilogy/scripts/dependency/tests/base.preql +3 -0
  168. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  169. trilogy/scripts/dependency/tests/customer.preql +6 -0
  170. trilogy/scripts/dependency/tests/main.preql +9 -0
  171. trilogy/scripts/dependency/tests/orders.preql +7 -0
  172. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  173. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  174. trilogy/scripts/dependency.py +323 -0
  175. trilogy/scripts/display.py +555 -0
  176. trilogy/scripts/environment.py +59 -0
  177. trilogy/scripts/fmt.py +32 -0
  178. trilogy/scripts/ingest.py +487 -0
  179. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  180. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  181. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  182. trilogy/scripts/ingest_helpers/typing.py +161 -0
  183. trilogy/scripts/init.py +105 -0
  184. trilogy/scripts/parallel_execution.py +762 -0
  185. trilogy/scripts/plan.py +189 -0
  186. trilogy/scripts/refresh.py +161 -0
  187. trilogy/scripts/run.py +79 -0
  188. trilogy/scripts/serve.py +202 -0
  189. trilogy/scripts/serve_helpers/__init__.py +41 -0
  190. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  191. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  192. trilogy/scripts/serve_helpers/models.py +38 -0
  193. trilogy/scripts/single_execution.py +131 -0
  194. trilogy/scripts/testing.py +143 -0
  195. trilogy/scripts/trilogy.py +75 -0
  196. trilogy/std/__init__.py +0 -0
  197. trilogy/std/color.preql +3 -0
  198. trilogy/std/date.preql +13 -0
  199. trilogy/std/display.preql +18 -0
  200. trilogy/std/geography.preql +22 -0
  201. trilogy/std/metric.preql +15 -0
  202. trilogy/std/money.preql +67 -0
  203. trilogy/std/net.preql +14 -0
  204. trilogy/std/ranking.preql +7 -0
  205. trilogy/std/report.preql +5 -0
  206. trilogy/std/semantic.preql +6 -0
  207. trilogy/utility.py +34 -0
@@ -0,0 +1,2876 @@
1
+ from dataclasses import dataclass
2
+ from datetime import date, datetime
3
+ from enum import Enum
4
+ from logging import getLogger
5
+ from os.path import dirname, join
6
+ from pathlib import Path
7
+ from re import IGNORECASE
8
+ from typing import Any, List, Optional, Tuple, Union
9
+
10
+ from lark import Lark, ParseTree, Token, Transformer, Tree, v_args
11
+ from lark.exceptions import (
12
+ UnexpectedCharacters,
13
+ UnexpectedEOF,
14
+ UnexpectedInput,
15
+ UnexpectedToken,
16
+ VisitError,
17
+ )
18
+ from lark.tree import Meta
19
+ from pydantic import ValidationError
20
+
21
+ from trilogy.constants import (
22
+ CONFIG,
23
+ DEFAULT_NAMESPACE,
24
+ NULL_VALUE,
25
+ MagicConstants,
26
+ Parsing,
27
+ )
28
+ from trilogy.core.enums import (
29
+ AddressType,
30
+ BooleanOperator,
31
+ ComparisonOperator,
32
+ ConceptSource,
33
+ CreateMode,
34
+ DatasourceState,
35
+ DatePart,
36
+ Derivation,
37
+ FunctionType,
38
+ Granularity,
39
+ IOType,
40
+ Modifier,
41
+ Ordering,
42
+ PersistMode,
43
+ PublishAction,
44
+ Purpose,
45
+ ShowCategory,
46
+ ValidationScope,
47
+ WindowOrder,
48
+ WindowType,
49
+ )
50
+ from trilogy.core.exceptions import (
51
+ InvalidSyntaxException,
52
+ MissingParameterException,
53
+ UndefinedConceptException,
54
+ )
55
+ from trilogy.core.functions import (
56
+ CurrentDate,
57
+ FunctionFactory,
58
+ )
59
+ from trilogy.core.internal import ALL_ROWS_CONCEPT, INTERNAL_NAMESPACE
60
+ from trilogy.core.models.author import (
61
+ AggregateWrapper,
62
+ AlignClause,
63
+ AlignItem,
64
+ ArgBinding,
65
+ CaseElse,
66
+ CaseWhen,
67
+ Comment,
68
+ Comparison,
69
+ Concept,
70
+ ConceptRef,
71
+ Conditional,
72
+ CustomFunctionFactory,
73
+ CustomType,
74
+ DeriveClause,
75
+ DeriveItem,
76
+ Expr,
77
+ FilterItem,
78
+ Function,
79
+ FunctionCallWrapper,
80
+ Grain,
81
+ HavingClause,
82
+ Metadata,
83
+ MultiSelectLineage,
84
+ OrderBy,
85
+ OrderItem,
86
+ Parenthetical,
87
+ RowsetItem,
88
+ SubselectComparison,
89
+ UndefinedConceptFull,
90
+ WhereClause,
91
+ Window,
92
+ WindowItem,
93
+ WindowItemOrder,
94
+ WindowItemOver,
95
+ )
96
+ from trilogy.core.models.core import (
97
+ ArrayType,
98
+ DataType,
99
+ DataTyped,
100
+ ListWrapper,
101
+ MapType,
102
+ MapWrapper,
103
+ NumericType,
104
+ StructComponent,
105
+ StructType,
106
+ TraitDataType,
107
+ TupleWrapper,
108
+ arg_to_datatype,
109
+ dict_to_map_wrapper,
110
+ is_compatible_datatype,
111
+ list_to_wrapper,
112
+ tuple_to_wrapper,
113
+ )
114
+ from trilogy.core.models.datasource import (
115
+ Address,
116
+ ColumnAssignment,
117
+ Datasource,
118
+ File,
119
+ Query,
120
+ RawColumnExpr,
121
+ )
122
+ from trilogy.core.models.environment import (
123
+ DictImportResolver,
124
+ Environment,
125
+ FileSystemImportResolver,
126
+ Import,
127
+ )
128
+ from trilogy.core.statements.author import (
129
+ ConceptDeclarationStatement,
130
+ ConceptDerivationStatement,
131
+ ConceptTransform,
132
+ CopyStatement,
133
+ CreateStatement,
134
+ FunctionDeclaration,
135
+ ImportStatement,
136
+ Limit,
137
+ MergeStatementV2,
138
+ MockStatement,
139
+ MultiSelectStatement,
140
+ PersistStatement,
141
+ PublishStatement,
142
+ RawSQLStatement,
143
+ RowsetDerivationStatement,
144
+ SelectItem,
145
+ SelectStatement,
146
+ ShowStatement,
147
+ TypeDeclaration,
148
+ ValidateStatement,
149
+ )
150
+ from trilogy.parsing.common import (
151
+ align_item_to_concept,
152
+ arbitrary_to_concept,
153
+ constant_to_concept,
154
+ derive_item_to_concept,
155
+ process_function_args,
156
+ rowset_to_concepts,
157
+ )
158
+ from trilogy.parsing.exceptions import NameShadowError, ParseError
159
+
160
+ perf_logger = getLogger("trilogy.parse.performance")
161
+
162
+
163
+ class ParsePass(Enum):
164
+ INITIAL = 1
165
+ VALIDATION = 2
166
+
167
+
168
+ CONSTANT_TYPES = (int, float, str, bool, ListWrapper, TupleWrapper, MapWrapper)
169
+
170
+ SELF_LABEL = "root"
171
+
172
+ MAX_PARSE_DEPTH = 10
173
+
174
+ SUPPORTED_INCREMENTAL_TYPES: set[DataType] = set([DataType.DATE, DataType.TIMESTAMP])
175
+
176
+ STDLIB_ROOT = Path(__file__).parent.parent
177
+
178
+
179
+ @dataclass
180
+ class WholeGrainWrapper:
181
+ where: WhereClause
182
+
183
+
184
+ @dataclass
185
+ class FunctionBindingType:
186
+ type: DataType | TraitDataType | None = None
187
+
188
+
189
+ @dataclass
190
+ class DropOn:
191
+ functions: List[FunctionType]
192
+
193
+
194
+ @dataclass
195
+ class AddOn:
196
+ functions: List[FunctionType]
197
+
198
+
199
+ @dataclass
200
+ class DatasourcePartitionClause:
201
+ columns: List[ConceptRef]
202
+
203
+
204
+ class DatasourceUpdateTrigger(Enum):
205
+ INCREMENTAL = "incremental"
206
+ FRESHNESS = "freshness"
207
+
208
+
209
+ @dataclass
210
+ class DatasourceUpdateTriggerClause:
211
+ trigger_type: DatasourceUpdateTrigger
212
+ columns: List[ConceptRef]
213
+
214
+
215
+ with open(join(dirname(__file__), "trilogy.lark"), "r") as f:
216
+ PARSER = Lark(
217
+ f.read(),
218
+ start="start",
219
+ propagate_positions=True,
220
+ g_regex_flags=IGNORECASE,
221
+ parser="lalr",
222
+ cache=True,
223
+ )
224
+
225
+
226
+ def parse_concept_reference(
227
+ name: str, environment: Environment, purpose: Optional[Purpose] = None
228
+ ) -> Tuple[str, str, str, str | None]:
229
+ parent = None
230
+ if "." in name:
231
+ if purpose == Purpose.PROPERTY:
232
+ parent, name = name.rsplit(".", 1)
233
+ namespace = environment.concepts[parent].namespace or DEFAULT_NAMESPACE
234
+ lookup = f"{namespace}.{name}"
235
+ else:
236
+ namespace, name = name.rsplit(".", 1)
237
+ lookup = f"{namespace}.{name}"
238
+ else:
239
+ namespace = environment.namespace or DEFAULT_NAMESPACE
240
+ lookup = name
241
+ return lookup, namespace, name, parent
242
+
243
+
244
+ def expr_to_boolean(
245
+ root,
246
+ function_factory: FunctionFactory,
247
+ ) -> Union[Comparison, SubselectComparison, Conditional]:
248
+ if not isinstance(root, (Comparison, SubselectComparison, Conditional)):
249
+ if arg_to_datatype(root) == DataType.BOOL:
250
+ root = Comparison(left=root, right=True, operator=ComparisonOperator.EQ)
251
+ elif arg_to_datatype(root) == DataType.INTEGER:
252
+ root = Comparison(
253
+ left=function_factory.create_function(
254
+ [root],
255
+ FunctionType.BOOL,
256
+ ),
257
+ right=True,
258
+ operator=ComparisonOperator.EQ,
259
+ )
260
+ else:
261
+ root = Comparison(
262
+ left=root, right=NULL_VALUE, operator=ComparisonOperator.IS_NOT
263
+ )
264
+
265
+ return root
266
+
267
+
268
+ def unwrap_transformation(
269
+ input: Expr,
270
+ environment: Environment,
271
+ ) -> (
272
+ Function
273
+ | FilterItem
274
+ | WindowItem
275
+ | AggregateWrapper
276
+ | FunctionCallWrapper
277
+ | Parenthetical
278
+ ):
279
+ if isinstance(input, Function):
280
+ return input
281
+ elif isinstance(input, AggregateWrapper):
282
+ return input
283
+ elif isinstance(input, ConceptRef):
284
+ concept = environment.concepts[input.address]
285
+ return Function(
286
+ operator=FunctionType.ALIAS,
287
+ output_datatype=concept.datatype,
288
+ output_purpose=concept.purpose,
289
+ arguments=[input],
290
+ )
291
+ elif isinstance(input, FilterItem):
292
+ return input
293
+ elif isinstance(input, WindowItem):
294
+ return input
295
+ elif isinstance(input, FunctionCallWrapper):
296
+ return input
297
+ elif isinstance(input, Parenthetical):
298
+ return input
299
+ else:
300
+ return Function.model_construct(
301
+ operator=FunctionType.CONSTANT,
302
+ output_datatype=arg_to_datatype(input),
303
+ output_purpose=Purpose.CONSTANT,
304
+ arguments=[input],
305
+ )
306
+
307
+
308
+ def rehydrate_lineage(
309
+ lineage: Any, environment: Environment, function_factory: FunctionFactory
310
+ ) -> Any:
311
+ """Fix datatype propagation. This is a hack to fix the fact that we don't know the datatypes of functions until we've parsed all concepts"""
312
+ if isinstance(lineage, Function):
313
+ rehydrated = [
314
+ rehydrate_lineage(x, environment, function_factory)
315
+ for x in lineage.arguments
316
+ ]
317
+ return function_factory.create_function(
318
+ rehydrated,
319
+ operator=lineage.operator,
320
+ )
321
+ elif isinstance(lineage, Parenthetical):
322
+ lineage.content = rehydrate_lineage(
323
+ lineage.content, environment, function_factory
324
+ )
325
+ return lineage
326
+ elif isinstance(lineage, WindowItem):
327
+ # this is temporarily guaranteed until we do some upstream work
328
+ assert isinstance(lineage.content, ConceptRef)
329
+ lineage.content.datatype = environment.concepts[
330
+ lineage.content.address
331
+ ].datatype
332
+ return lineage
333
+ elif isinstance(lineage, AggregateWrapper):
334
+ lineage.function = rehydrate_lineage(
335
+ lineage.function, environment, function_factory
336
+ )
337
+ return lineage
338
+ elif isinstance(lineage, RowsetItem):
339
+ lineage.content.datatype = environment.concepts[
340
+ lineage.content.address
341
+ ].datatype
342
+ return lineage
343
+ else:
344
+ return lineage
345
+
346
+
347
+ def rehydrate_concept_lineage(
348
+ concept: Concept, environment: Environment, function_factory: FunctionFactory
349
+ ) -> Concept:
350
+ concept.lineage = rehydrate_lineage(concept.lineage, environment, function_factory)
351
+ if isinstance(concept.lineage, DataTyped):
352
+ concept.datatype = concept.lineage.output_datatype
353
+ return concept
354
+
355
+
356
+ class ParseToObjects(Transformer):
357
+ def __init__(
358
+ self,
359
+ environment: Environment,
360
+ parse_address: str | None = None,
361
+ token_address: Path | str | None = None,
362
+ parsed: dict[str, "ParseToObjects"] | None = None,
363
+ tokens: dict[Path | str, ParseTree] | None = None,
364
+ text_lookup: dict[Path | str, str] | None = None,
365
+ environment_lookup: dict[str, Environment] | None = None,
366
+ import_keys: list[str] | None = None,
367
+ parse_config: Parsing | None = None,
368
+ ):
369
+ Transformer.__init__(self, True)
370
+ self.environment: Environment = environment
371
+ self.parse_address: str = parse_address or SELF_LABEL
372
+ self.token_address: Path | str = token_address or SELF_LABEL
373
+ self.parsed: dict[str, ParseToObjects] = parsed if parsed is not None else {}
374
+ self.tokens: dict[Path | str, ParseTree] = tokens if tokens is not None else {}
375
+ self.environments: dict[str, Environment] = environment_lookup or {}
376
+ self.text_lookup: dict[Path | str, str] = (
377
+ text_lookup if text_lookup is not None else {}
378
+ )
379
+ # we do a second pass to pick up circular dependencies
380
+ # after initial parsing
381
+ self.parse_pass = ParsePass.INITIAL
382
+ self.function_factory = FunctionFactory(self.environment)
383
+ self.import_keys: list[str] = import_keys or ["root"]
384
+ self.parse_config: Parsing = parse_config or CONFIG.parsing
385
+
386
+ def set_text(self, text: str):
387
+ self.text_lookup[self.token_address] = text
388
+
389
+ def transform(self, tree: Tree):
390
+ results = super().transform(tree)
391
+ self.tokens[self.token_address] = tree
392
+ return results
393
+
394
+ def prepare_parse(self):
395
+ self.parse_pass = ParsePass.INITIAL
396
+ self.environment.concepts.fail_on_missing = False
397
+ for _, v in self.parsed.items():
398
+ v.prepare_parse()
399
+
400
+ def run_second_parse_pass(self, force: bool = False):
401
+ if self.token_address not in self.tokens:
402
+ return []
403
+ self.parse_pass = ParsePass.VALIDATION
404
+ for _, v in list(self.parsed.items()):
405
+ if v.parse_pass == ParsePass.VALIDATION:
406
+ continue
407
+ v.run_second_parse_pass()
408
+ reparsed = self.transform(self.tokens[self.token_address])
409
+ self.environment.concepts.undefined = {}
410
+ passed = False
411
+ passes = 0
412
+ # output datatypes for functions may have been wrong
413
+ # as they were derived from not fully understood upstream types
414
+ # so loop through to recreate function lineage until all datatypes are known
415
+
416
+ while not passed:
417
+ new_passed = True
418
+ for x, y in self.environment.concepts.items():
419
+ if y.datatype == DataType.UNKNOWN and y.lineage:
420
+ self.environment.concepts[x] = rehydrate_concept_lineage(
421
+ y, self.environment, self.function_factory
422
+ )
423
+ new_passed = False
424
+ passes += 1
425
+ if passes > MAX_PARSE_DEPTH:
426
+ break
427
+ passed = new_passed
428
+
429
+ return reparsed
430
+
431
+ def start(self, args):
432
+ return args
433
+
434
+ def LINE_SEPARATOR(self, args):
435
+ return MagicConstants.LINE_SEPARATOR
436
+
437
+ def block(self, args):
438
+ output = args[0]
439
+ if isinstance(output, ConceptDeclarationStatement):
440
+ if len(args) > 1 and args[1] != MagicConstants.LINE_SEPARATOR:
441
+ comments = [x for x in args[1:] if isinstance(x, Comment)]
442
+ merged = "\n".join([x.text.split("#")[1].rstrip() for x in comments])
443
+ output.concept.metadata.description = merged
444
+ # this is a bad plan for now;
445
+ # because a comment after an import statement is very common
446
+ # and it's not intuitive that it modifies the import description
447
+ # if isinstance(output, ImportStatement):
448
+ # if len(args) > 1 and isinstance(args[1], Comment):
449
+ # comment = args[1].text.split("#")[1].strip()
450
+ # namespace = output.alias
451
+ # for _, v in self.environment.concepts.items():
452
+ # if v.namespace == namespace:
453
+ # if v.metadata.description:
454
+ # v.metadata.description = (
455
+ # f"{comment}: {v.metadata.description}"
456
+ # )
457
+ # else:
458
+ # v.metadata.description = comment
459
+
460
+ return args[0]
461
+
462
+ def metadata(self, args):
463
+ pairs = {key: val for key, val in zip(args[::2], args[1::2])}
464
+ return Metadata(**pairs)
465
+
466
+ def IDENTIFIER(self, args) -> str:
467
+ return args.value
468
+
469
+ def ORDER_IDENTIFIER(self, args) -> ConceptRef:
470
+ return self.environment.concepts[args.value.strip()].reference
471
+
472
+ def WILDCARD_IDENTIFIER(self, args) -> str:
473
+ return args.value
474
+
475
+ def QUOTED_IDENTIFIER(self, args) -> str:
476
+ return args.value[1:-1]
477
+
478
+ @v_args(meta=True)
479
+ def concept_lit(self, meta: Meta, args) -> ConceptRef:
480
+ address = args[0]
481
+ if "." not in address and self.environment.namespace == DEFAULT_NAMESPACE:
482
+ address = f"{DEFAULT_NAMESPACE}.{address}"
483
+ mapping = self.environment.concepts[address]
484
+ datatype = mapping.output_datatype
485
+ return ConceptRef(
486
+ # this is load-bearing to handle pseudonyms
487
+ address=mapping.address,
488
+ metadata=Metadata(line_number=meta.line),
489
+ datatype=datatype,
490
+ )
491
+
492
+ def ADDRESS(self, args) -> Address:
493
+ return Address(location=args.value, quoted=False)
494
+
495
+ def QUOTED_ADDRESS(self, args) -> Address:
496
+ return Address(location=args.value[1:-1], quoted=True)
497
+
498
+ def STRING_CHARS(self, args) -> str:
499
+ return args.value
500
+
501
+ def SINGLE_STRING_CHARS(self, args) -> str:
502
+ return args.value
503
+
504
+ def DOUBLE_STRING_CHARS(self, args) -> str:
505
+ return args.value
506
+
507
+ def MINUS(self, args) -> str:
508
+ return "-"
509
+
510
+ @v_args(meta=True)
511
+ def struct_component(self, meta: Meta, args) -> StructComponent:
512
+ modifiers = []
513
+ for arg in args:
514
+ if isinstance(arg, Modifier):
515
+ modifiers.append(arg)
516
+ return StructComponent(name=args[0], type=args[1], modifiers=modifiers)
517
+
518
+ @v_args(meta=True)
519
+ def struct_type(self, meta: Meta, args) -> StructType:
520
+ final: list[
521
+ DataType
522
+ | MapType
523
+ | ArrayType
524
+ | NumericType
525
+ | StructType
526
+ | StructComponent
527
+ | Concept
528
+ ] = []
529
+ for arg in args:
530
+ if isinstance(arg, StructComponent):
531
+ final.append(arg)
532
+ else:
533
+ new = self.environment.concepts.__getitem__( # type: ignore
534
+ key=arg, line_no=meta.line
535
+ )
536
+ final.append(new)
537
+
538
+ return StructType(
539
+ fields=final,
540
+ fields_map={
541
+ x.name: x for x in final if isinstance(x, (Concept, StructComponent))
542
+ },
543
+ )
544
+
545
+ def list_type(self, args) -> ArrayType:
546
+ content = args[0]
547
+ if isinstance(content, str):
548
+ content = self.environment.concepts[content]
549
+ return ArrayType(type=content)
550
+
551
+ def numeric_type(self, args) -> NumericType:
552
+ return NumericType(precision=args[0], scale=args[1])
553
+
554
+ def map_type(self, args) -> MapType:
555
+ key = args[0]
556
+ value = args[1]
557
+ if isinstance(key, str):
558
+ key = self.environment.concepts[key]
559
+ elif isinstance(value, str):
560
+ value = self.environment.concepts[value]
561
+ return MapType(key_type=key, value_type=value)
562
+
563
+ @v_args(meta=True)
564
+ def data_type(
565
+ self, meta: Meta, args
566
+ ) -> DataType | TraitDataType | ArrayType | StructType | MapType | NumericType:
567
+ resolved = args[0]
568
+ traits = args[2:]
569
+ base: DataType | TraitDataType | ArrayType | StructType | MapType | NumericType
570
+ if isinstance(resolved, StructType):
571
+ base = resolved
572
+ elif isinstance(resolved, ArrayType):
573
+ base = resolved
574
+ elif isinstance(resolved, NumericType):
575
+ base = resolved
576
+ elif isinstance(resolved, MapType):
577
+ base = resolved
578
+ else:
579
+ base = DataType(args[0].lower())
580
+ if traits:
581
+ for trait in traits:
582
+ if trait not in self.environment.data_types:
583
+ raise ParseError(
584
+ f"Invalid trait (type) {trait} for {base}, line {meta.line}."
585
+ )
586
+ matched = self.environment.data_types[trait]
587
+ if not is_compatible_datatype(matched.type, base):
588
+ raise ParseError(
589
+ f"Invalid trait (type) {trait} for {base}, line {meta.line}. Trait expects type {matched.type}, has {base}"
590
+ )
591
+ return TraitDataType(type=base, traits=traits)
592
+
593
+ return base
594
+
595
+ def array_comparison(self, args) -> ComparisonOperator:
596
+ return ComparisonOperator([x.value.lower() for x in args])
597
+
598
+ def COMPARISON_OPERATOR(self, args) -> ComparisonOperator:
599
+ return ComparisonOperator(args.strip())
600
+
601
+ def LOGICAL_OPERATOR(self, args) -> BooleanOperator:
602
+ return BooleanOperator(args.lower())
603
+
604
+ def concept_assignment(self, args):
605
+ return args
606
+
607
+ @v_args(meta=True)
608
+ def column_assignment(self, meta: Meta, args):
609
+ modifiers = []
610
+ if len(args) == 2:
611
+ alias = args[0]
612
+ concept_list = args[1]
613
+ else:
614
+ alias = args[0][-1]
615
+ concept_list = args[0]
616
+ # recursively collect modifiers
617
+ if len(concept_list) > 1:
618
+ modifiers += concept_list[:-1]
619
+ concept = concept_list[-1]
620
+ resolved = self.environment.concepts.__getitem__( # type: ignore
621
+ key=concept, line_no=meta.line, file=self.token_address
622
+ )
623
+ return ColumnAssignment(
624
+ alias=alias, modifiers=modifiers, concept=resolved.reference
625
+ )
626
+
627
+ def _TERMINATOR(self, args):
628
+ return None
629
+
630
+ def _static_functions(self, args):
631
+ return args[0]
632
+
633
+ def MODIFIER(self, args) -> Modifier:
634
+ return Modifier(args.value)
635
+
636
+ def SHORTHAND_MODIFIER(self, args) -> Modifier:
637
+ return Modifier(args.value)
638
+
639
+ def PURPOSE(self, args) -> Purpose:
640
+ return Purpose(args.value)
641
+
642
+ def AUTO(self, args) -> Purpose:
643
+ return Purpose.AUTO
644
+
645
+ def CONST(self, args) -> Purpose:
646
+ return Purpose.CONSTANT
647
+
648
+ def CONSTANT(self, args) -> Purpose:
649
+ return Purpose.CONSTANT
650
+
651
+ def PROPERTY(self, args):
652
+ return Purpose.PROPERTY
653
+
654
+ def HASH_TYPE(self, args):
655
+ return args.value
656
+
657
+ @v_args(meta=True)
658
+ def prop_ident(self, meta: Meta, args) -> Tuple[List[Concept], str]:
659
+ return [self.environment.concepts[grain] for grain in args[:-1]], args[-1]
660
+
661
+ @v_args(meta=True)
662
+ def concept_property_declaration(self, meta: Meta, args) -> Concept:
663
+ unique = False
664
+ if not args[0] == Purpose.PROPERTY:
665
+ unique = True
666
+ args = args[1:]
667
+ metadata = Metadata()
668
+ modifiers = []
669
+ for arg in args:
670
+ if isinstance(arg, Metadata):
671
+ metadata = arg
672
+ if isinstance(arg, Modifier):
673
+ modifiers.append(arg)
674
+
675
+ declaration = args[1]
676
+ if isinstance(declaration, (tuple)):
677
+ parents, name = declaration
678
+ if "." in name:
679
+ namespace, name = name.split(".", 1)
680
+ else:
681
+ namespace = self.environment.namespace or DEFAULT_NAMESPACE
682
+ else:
683
+ if "." not in declaration:
684
+ raise ParseError(
685
+ f"Property declaration {args[1]} must be fully qualified with a parent key"
686
+ )
687
+ grain, name = declaration.rsplit(".", 1)
688
+ parent = self.environment.concepts[grain]
689
+ parents = [parent]
690
+ namespace = parent.namespace
691
+ concept = Concept(
692
+ name=name,
693
+ datatype=args[2],
694
+ purpose=Purpose.PROPERTY if not unique else Purpose.UNIQUE_PROPERTY,
695
+ metadata=metadata,
696
+ grain=Grain(components={x.address for x in parents}),
697
+ namespace=namespace,
698
+ keys=set([x.address for x in parents]),
699
+ modifiers=modifiers,
700
+ )
701
+
702
+ self.environment.add_concept(concept, meta)
703
+ return concept
704
+
705
+ @v_args(meta=True)
706
+ def concept_declaration(self, meta: Meta, args) -> ConceptDeclarationStatement:
707
+ metadata = Metadata()
708
+ modifiers = []
709
+ purpose = args[0]
710
+ datatype = args[2]
711
+ for arg in args:
712
+ if isinstance(arg, Metadata):
713
+ metadata = arg
714
+ if isinstance(arg, Modifier):
715
+ modifiers.append(arg)
716
+ name = args[1]
717
+ _, namespace, name, _ = parse_concept_reference(name, self.environment)
718
+ if purpose == Purpose.PARAMETER:
719
+ value = self.environment.parameters.get(name, None)
720
+ if not value:
721
+ raise MissingParameterException(
722
+ f'This script requires parameter "{name}" to be set in environment.'
723
+ )
724
+ if datatype == DataType.INTEGER:
725
+ value = int(value)
726
+ elif datatype == DataType.FLOAT:
727
+ value = float(value)
728
+ elif datatype == DataType.BOOL:
729
+ value = bool(value)
730
+ elif datatype == DataType.STRING:
731
+ value = str(value)
732
+ elif datatype == DataType.DATE:
733
+ if isinstance(value, date):
734
+ value = value
735
+ else:
736
+ value = date.fromisoformat(value)
737
+ elif datatype == DataType.DATETIME:
738
+ if isinstance(value, datetime):
739
+ value = value
740
+ else:
741
+ value = datetime.fromisoformat(value)
742
+ else:
743
+ raise ParseError(
744
+ f"Unsupported datatype {datatype} for parameter {name}."
745
+ )
746
+ rval = self.constant_derivation(
747
+ meta, [Purpose.CONSTANT, name, value, metadata]
748
+ )
749
+ return rval
750
+
751
+ concept = Concept(
752
+ name=name,
753
+ datatype=datatype,
754
+ purpose=purpose,
755
+ metadata=metadata,
756
+ namespace=namespace,
757
+ modifiers=modifiers,
758
+ derivation=Derivation.ROOT,
759
+ granularity=Granularity.MULTI_ROW,
760
+ )
761
+ if concept.metadata:
762
+ concept.metadata.line_number = meta.line
763
+ self.environment.add_concept(concept, meta=meta)
764
+ return ConceptDeclarationStatement(concept=concept)
765
+
766
+ @v_args(meta=True)
767
+ def concept_derivation(self, meta: Meta, args) -> ConceptDerivationStatement:
768
+
769
+ if len(args) > 3:
770
+ metadata = args[3]
771
+ else:
772
+ metadata = None
773
+ purpose = args[0]
774
+ raw_name = args[1]
775
+ # abc.def.property pattern
776
+ if isinstance(raw_name, str):
777
+ lookup, namespace, name, parent_concept = parse_concept_reference(
778
+ raw_name, self.environment, purpose
779
+ )
780
+ # <abc.def,zef.gf>.property pattern
781
+ else:
782
+ keys, name = raw_name
783
+ keys = [x.address for x in keys]
784
+ namespaces = set([x.rsplit(".", 1)[0] for x in keys])
785
+ if not len(namespaces) == 1:
786
+ namespace = self.environment.namespace or DEFAULT_NAMESPACE
787
+ else:
788
+ namespace = namespaces.pop()
789
+ source_value = args[2]
790
+ # we need to strip off every parenthetical to see what is being assigned.
791
+ while isinstance(source_value, Parenthetical):
792
+ source_value = source_value.content
793
+
794
+ if isinstance(
795
+ source_value,
796
+ (
797
+ FilterItem,
798
+ WindowItem,
799
+ AggregateWrapper,
800
+ Function,
801
+ FunctionCallWrapper,
802
+ Comparison,
803
+ ),
804
+ ):
805
+ concept = arbitrary_to_concept(
806
+ source_value,
807
+ name=name,
808
+ namespace=namespace,
809
+ environment=self.environment,
810
+ metadata=metadata,
811
+ )
812
+
813
+ # let constant purposes exist to support round-tripping
814
+ # as a build concept may end up with a constant based on constant inlining happening recursively
815
+ if purpose == Purpose.KEY and concept.purpose != Purpose.KEY:
816
+ concept.purpose = Purpose.KEY
817
+ elif (
818
+ purpose
819
+ and purpose != Purpose.AUTO
820
+ and concept.purpose != purpose
821
+ and purpose != Purpose.CONSTANT
822
+ ):
823
+ raise SyntaxError(
824
+ f'Concept {name} purpose {concept.purpose} does not match declared purpose {purpose}. Suggest defaulting to "auto"'
825
+ )
826
+
827
+ if concept.metadata:
828
+ concept.metadata.line_number = meta.line
829
+ self.environment.add_concept(concept, meta=meta)
830
+ return ConceptDerivationStatement(concept=concept)
831
+
832
+ elif isinstance(source_value, CONSTANT_TYPES):
833
+ concept = constant_to_concept(
834
+ source_value,
835
+ name=name,
836
+ namespace=namespace,
837
+ metadata=metadata,
838
+ )
839
+ if concept.metadata:
840
+ concept.metadata.line_number = meta.line
841
+ self.environment.add_concept(concept, meta=meta)
842
+ return ConceptDerivationStatement(concept=concept)
843
+
844
+ raise SyntaxError(
845
+ f"Received invalid type {type(args[2])} {args[2]} as input to concept derivation: `{self.text_lookup[self.token_address][meta.start_pos:meta.end_pos]}`"
846
+ )
847
+
848
+ @v_args(meta=True)
849
+ def rowset_derivation_statement(
850
+ self, meta: Meta, args
851
+ ) -> RowsetDerivationStatement:
852
+ name = args[0]
853
+ select: SelectStatement | MultiSelectStatement = args[1]
854
+ output = RowsetDerivationStatement(
855
+ name=name,
856
+ select=select,
857
+ namespace=self.environment.namespace or DEFAULT_NAMESPACE,
858
+ )
859
+
860
+ for new_concept in rowset_to_concepts(output, self.environment):
861
+ if new_concept.metadata:
862
+ new_concept.metadata.line_number = meta.line
863
+ self.environment.add_concept(new_concept, force=True)
864
+
865
+ self.environment.add_rowset(
866
+ output.name, output.select.as_lineage(self.environment)
867
+ )
868
+ return output
869
+
870
+ @v_args(meta=True)
871
+ def constant_derivation(
872
+ self, meta: Meta, args: tuple[Purpose, str, Any, Optional[Metadata]]
873
+ ) -> Concept:
874
+
875
+ if len(args) > 3:
876
+ metadata = args[3]
877
+ else:
878
+ metadata = None
879
+ name = args[1]
880
+ constant: Union[str, float, int, bool, MapWrapper, ListWrapper] = args[2]
881
+ lookup, namespace, name, parent = parse_concept_reference(
882
+ name, self.environment
883
+ )
884
+ concept = Concept(
885
+ name=name,
886
+ datatype=arg_to_datatype(constant),
887
+ purpose=Purpose.CONSTANT,
888
+ metadata=Metadata(line_number=meta.line) if not metadata else metadata,
889
+ lineage=Function(
890
+ operator=FunctionType.CONSTANT,
891
+ output_datatype=arg_to_datatype(constant),
892
+ output_purpose=Purpose.CONSTANT,
893
+ arguments=[constant],
894
+ ),
895
+ grain=Grain(components=set()),
896
+ namespace=namespace,
897
+ granularity=Granularity.SINGLE_ROW,
898
+ )
899
+ if concept.metadata:
900
+ concept.metadata.line_number = meta.line
901
+ self.environment.add_concept(concept, meta)
902
+ return concept
903
+
904
+ @v_args(meta=True)
905
+ def concept(self, meta: Meta, args) -> ConceptDeclarationStatement:
906
+ if isinstance(args[0], Concept):
907
+ concept: Concept = args[0]
908
+ else:
909
+ concept = args[0].concept
910
+ if concept.metadata:
911
+ concept.metadata.line_number = meta.line
912
+ return ConceptDeclarationStatement(concept=concept)
913
+
914
+ def column_assignment_list(self, args):
915
+ return args
916
+
917
+ def column_list(self, args) -> List:
918
+ return args
919
+
920
+ def grain_clause(self, args) -> Grain:
921
+ return Grain(
922
+ components=set([self.environment.concepts[a].address for a in args[0]])
923
+ )
924
+
925
+ @v_args(meta=True)
926
+ def aggregate_by(self, meta: Meta, args):
927
+ base = args[0]
928
+ b_concept = base.value.split(" ")[-1]
929
+ args = [self.environment.concepts[a] for a in [b_concept] + args[1:]]
930
+ return self.function_factory.create_function(args, FunctionType.GROUP, meta)
931
+
932
+ def whole_grain_clause(self, args) -> WholeGrainWrapper:
933
+ return WholeGrainWrapper(where=args[0])
934
+
935
+ def MULTILINE_STRING(self, args) -> str:
936
+ return args[3:-3]
937
+
938
+ def raw_column_assignment(self, args):
939
+ return RawColumnExpr(text=args[1])
940
+
941
+ def DATASOURCE_STATUS(self, args) -> DatasourceState:
942
+ return DatasourceState(args.value.lower())
943
+
944
+ @v_args(meta=True)
945
+ def datasource_status_clause(self, meta: Meta, args):
946
+ return args[1]
947
+
948
+ @v_args(meta=True)
949
+ def datasource_partition_clause(self, meta: Meta, args):
950
+ return DatasourcePartitionClause([ConceptRef(address=arg) for arg in args[0]])
951
+
952
+ @v_args(meta=True)
953
+ def datasource_update_trigger_clause(self, meta: Meta, args):
954
+ trigger_type = DatasourceUpdateTrigger(args[0].lower())
955
+ columns = [ConceptRef(address=arg) for arg in args[1]]
956
+ return DatasourceUpdateTriggerClause(trigger_type=trigger_type, columns=columns)
957
+
958
+ @v_args(meta=True)
959
+ def datasource(self, meta: Meta, args):
960
+ is_root = False
961
+ if isinstance(args[0], Token) and args[0].lower() == "root":
962
+ is_root = True
963
+ args = args[1:]
964
+ name = args[0]
965
+ columns: List[ColumnAssignment] = args[1]
966
+ grain: Optional[Grain] = None
967
+ address: Optional[Address] = None
968
+ where: Optional[WhereClause] = None
969
+ non_partial_for: Optional[WhereClause] = None
970
+ incremental_by: List[ConceptRef] = []
971
+ partition_by: List[ConceptRef] = []
972
+ freshness_by: List[ConceptRef] = []
973
+ datasource_status: DatasourceState = DatasourceState.PUBLISHED
974
+ for val in args[1:]:
975
+ if isinstance(val, Address):
976
+ address = val
977
+ elif isinstance(val, Grain):
978
+ grain = val
979
+ elif isinstance(val, WholeGrainWrapper):
980
+ non_partial_for = val.where
981
+ elif isinstance(val, Query):
982
+ address = Address(location=val.text, type=AddressType.QUERY)
983
+ elif isinstance(val, File):
984
+ address = Address(location=val.path, type=val.type)
985
+ elif isinstance(val, WhereClause):
986
+ where = val
987
+ elif isinstance(val, DatasourceState):
988
+ datasource_status = val
989
+ elif isinstance(val, DatasourceUpdateTriggerClause):
990
+ if val.trigger_type == DatasourceUpdateTrigger.INCREMENTAL:
991
+ incremental_by = val.columns
992
+ elif val.trigger_type == DatasourceUpdateTrigger.FRESHNESS:
993
+ freshness_by = val.columns
994
+ elif isinstance(val, DatasourcePartitionClause):
995
+ partition_by = val.columns
996
+ if not address:
997
+ raise ValueError(
998
+ "Malformed datasource, missing address or query declaration"
999
+ )
1000
+
1001
+ datasource = Datasource(
1002
+ name=name,
1003
+ columns=columns,
1004
+ # grain will be set by default from args
1005
+ # TODO: move to factory
1006
+ grain=grain, # type: ignore
1007
+ address=address,
1008
+ namespace=self.environment.namespace,
1009
+ where=where,
1010
+ non_partial_for=non_partial_for,
1011
+ status=datasource_status,
1012
+ incremental_by=incremental_by,
1013
+ partition_by=partition_by,
1014
+ freshness_by=freshness_by,
1015
+ is_root=is_root,
1016
+ )
1017
+ if datasource.where:
1018
+ for x in datasource.where.concept_arguments:
1019
+ if x.address not in datasource.output_concepts:
1020
+ raise ValueError(
1021
+ f"Datasource {name} where condition depends on concept {x.address} that does not exist on the datasource, line {meta.line}."
1022
+ )
1023
+ if self.parse_pass == ParsePass.VALIDATION:
1024
+ self.environment.add_datasource(datasource, meta=meta)
1025
+ # if we have any foreign keys on the datasource, we can
1026
+ # at this point optimize them to properties if they do not have other usage.
1027
+ for column in columns:
1028
+ # skip partial for now
1029
+ if not grain:
1030
+ continue
1031
+ if column.concept.address in grain.components:
1032
+ continue
1033
+ target_c = self.environment.concepts[column.concept.address]
1034
+ if target_c.purpose != Purpose.KEY:
1035
+ continue
1036
+
1037
+ key_inputs = grain.components
1038
+ eligible = True
1039
+ for key in key_inputs:
1040
+ # never overwrite a key with a dependency on a property
1041
+ # for example - binding a datasource with a grain of <x>.fun should
1042
+ # never override the grain of x to <fun>
1043
+ if column.concept.address in (
1044
+ self.environment.concepts[key].keys or set()
1045
+ ):
1046
+ eligible = False
1047
+ if not eligible:
1048
+ continue
1049
+ keys = [self.environment.concepts[grain] for grain in key_inputs]
1050
+ # target_c.purpose = Purpose.PROPERTY
1051
+ target_c.keys = set([x.address for x in keys])
1052
+ # target_c.grain = Grain(components={x.address for x in keys})
1053
+
1054
+ return datasource
1055
+
1056
+ @v_args(meta=True)
1057
+ def comment(self, meta: Meta, args):
1058
+ assert len(args) == 1
1059
+ return Comment(text=args[0].value)
1060
+
1061
+ def PARSE_COMMENT(self, args):
1062
+ return Comment(text=args.value.rstrip())
1063
+
1064
+ @v_args(meta=True)
1065
+ def select_transform(self, meta: Meta, args) -> ConceptTransform:
1066
+ output: str = args[1]
1067
+ transformation = unwrap_transformation(args[0], self.environment)
1068
+ lookup, namespace, output, parent = parse_concept_reference(
1069
+ output, self.environment
1070
+ )
1071
+
1072
+ metadata = Metadata(line_number=meta.line, concept_source=ConceptSource.SELECT)
1073
+ concept = arbitrary_to_concept(
1074
+ transformation,
1075
+ environment=self.environment,
1076
+ namespace=namespace,
1077
+ name=output,
1078
+ metadata=metadata,
1079
+ )
1080
+ return ConceptTransform(function=transformation, output=concept)
1081
+
1082
+ @v_args(meta=True)
1083
+ def concept_nullable_modifier(self, meta: Meta, args) -> Modifier:
1084
+ return Modifier.NULLABLE
1085
+
1086
+ @v_args(meta=True)
1087
+ def select_hide_modifier(self, meta: Meta, args) -> Modifier:
1088
+ return Modifier.HIDDEN
1089
+
1090
+ @v_args(meta=True)
1091
+ def select_partial_modifier(self, meta: Meta, args) -> Modifier:
1092
+ return Modifier.PARTIAL
1093
+
1094
+ @v_args(meta=True)
1095
+ def select_item(self, meta: Meta, args) -> Optional[SelectItem]:
1096
+ modifiers = [arg for arg in args if isinstance(arg, Modifier)]
1097
+ args = [arg for arg in args if not isinstance(arg, (Modifier, Comment))]
1098
+
1099
+ if not args:
1100
+ return None
1101
+ if len(args) != 1:
1102
+ raise ParseError(
1103
+ "Malformed select statement"
1104
+ f" {args} {self.text_lookup[self.parse_address][meta.start_pos:meta.end_pos]}"
1105
+ )
1106
+ content = args[0]
1107
+ if isinstance(content, ConceptTransform):
1108
+ return SelectItem(content=content, modifiers=modifiers)
1109
+ return SelectItem(
1110
+ content=content,
1111
+ modifiers=modifiers,
1112
+ )
1113
+
1114
+ def select_list(self, args):
1115
+ return [arg for arg in args if arg]
1116
+
1117
+ def limit(self, args):
1118
+ return Limit(count=int(args[0].value))
1119
+
1120
+ def ordering(self, args: list[str]):
1121
+ base = args[0].lower()
1122
+ if len(args) > 1:
1123
+ null_sort = args[-1]
1124
+ return Ordering(" ".join([base, "nulls", null_sort.lower()]))
1125
+ return Ordering(base)
1126
+
1127
+ def order_list(self, args) -> List[OrderItem]:
1128
+ return [
1129
+ OrderItem(
1130
+ expr=x,
1131
+ order=y,
1132
+ )
1133
+ for x, y in zip(args[::2], args[1::2])
1134
+ ]
1135
+
1136
+ def order_by(self, args):
1137
+ return OrderBy(items=args[0])
1138
+
1139
+ def over_component(self, args):
1140
+ return ConceptRef(address=args[0].value.lstrip(",").strip())
1141
+
1142
+ def over_list(self, args):
1143
+ return [x for x in args]
1144
+
1145
+ def PUBLISH_ACTION(self, args) -> PublishAction:
1146
+ action = args.value.lower()
1147
+ if action == "publish":
1148
+ return PublishAction.PUBLISH
1149
+ elif action == "unpublish":
1150
+ return PublishAction.UNPUBLISH
1151
+ else:
1152
+ raise SyntaxError(f"Unknown publish action: {action}")
1153
+
1154
+ @v_args(meta=True)
1155
+ def publish_statement(self, meta: Meta, args) -> PublishStatement:
1156
+ targets = []
1157
+ scope = ValidationScope.DATASOURCES
1158
+ publish_action = PublishAction.PUBLISH
1159
+ for arg in args:
1160
+ if isinstance(arg, str):
1161
+ targets.append(arg)
1162
+ elif isinstance(arg, PublishAction):
1163
+ publish_action = arg
1164
+ elif isinstance(arg, ValidationScope):
1165
+ scope = arg
1166
+ if arg != ValidationScope.DATASOURCES:
1167
+ raise SyntaxError(
1168
+ f"Publishing is only supported for Datasources, got {arg} on line {meta.line}"
1169
+ )
1170
+ return PublishStatement(
1171
+ scope=scope,
1172
+ targets=targets,
1173
+ action=publish_action,
1174
+ )
1175
+
1176
+ def create_modifier_clause(self, args):
1177
+ token = args[0]
1178
+ if token.type == "CREATE_IF_NOT_EXISTS":
1179
+ return CreateMode.CREATE_IF_NOT_EXISTS
1180
+ elif token.type == "CREATE_OR_REPLACE":
1181
+ return CreateMode.CREATE_OR_REPLACE
1182
+
1183
+ @v_args(meta=True)
1184
+ def create_statement(self, meta: Meta, args) -> CreateStatement:
1185
+ targets = []
1186
+ scope = ValidationScope.DATASOURCES
1187
+ create_mode = CreateMode.CREATE
1188
+ for arg in args:
1189
+ if isinstance(arg, str):
1190
+ targets.append(arg)
1191
+ elif isinstance(arg, ValidationScope):
1192
+ scope = arg
1193
+ if arg != ValidationScope.DATASOURCES:
1194
+ raise SyntaxError(
1195
+ f"Creating is only supported for Datasources, got {arg} on line {meta.line}"
1196
+ )
1197
+ elif isinstance(arg, CreateMode):
1198
+ create_mode = arg
1199
+
1200
+ return CreateStatement(scope=scope, targets=targets, create_mode=create_mode)
1201
+
1202
+ def VALIDATE_SCOPE(self, args) -> ValidationScope:
1203
+ base: str = args.lower()
1204
+ if not base.endswith("s"):
1205
+ base += "s"
1206
+ return ValidationScope(base)
1207
+
1208
+ @v_args(meta=True)
1209
+ def validate_statement(self, meta: Meta, args) -> ValidateStatement:
1210
+ if len(args) > 1:
1211
+ scope = args[0]
1212
+ targets = args[1:]
1213
+ elif len(args) == 0:
1214
+ scope = ValidationScope.ALL
1215
+ targets = None
1216
+ else:
1217
+ scope = args[0]
1218
+ targets = None
1219
+ return ValidateStatement(
1220
+ scope=scope,
1221
+ targets=targets,
1222
+ )
1223
+
1224
+ @v_args(meta=True)
1225
+ def mock_statement(self, meta: Meta, args) -> MockStatement:
1226
+ return MockStatement(scope=args[0], targets=args[1:])
1227
+
1228
+ @v_args(meta=True)
1229
+ def merge_statement(self, meta: Meta, args) -> MergeStatementV2 | None:
1230
+ modifiers = []
1231
+ cargs: list[str] = []
1232
+ source_wildcard = None
1233
+ target_wildcard = None
1234
+ for arg in args:
1235
+ if isinstance(arg, Modifier):
1236
+ modifiers.append(arg)
1237
+ else:
1238
+ cargs.append(arg)
1239
+ source, target = cargs
1240
+ if source.endswith(".*"):
1241
+ if not target.endswith(".*"):
1242
+ raise ValueError("Invalid merge, source is wildcard, target is not")
1243
+ source_wildcard = source[:-2]
1244
+ target_wildcard = target[:-2]
1245
+ sources: list[Concept] = [
1246
+ v
1247
+ for k, v in self.environment.concepts.items()
1248
+ if v.namespace == source_wildcard
1249
+ ]
1250
+ targets: dict[str, Concept] = {}
1251
+ for x in sources:
1252
+ target = target_wildcard + "." + x.name
1253
+ if target in self.environment.concepts:
1254
+ targets[x.address] = self.environment.concepts[target]
1255
+ sources = [x for x in sources if x.address in targets]
1256
+ else:
1257
+ sources = [self.environment.concepts[source]]
1258
+ targets = {sources[0].address: self.environment.concepts[target]}
1259
+
1260
+ if self.parse_pass == ParsePass.VALIDATION:
1261
+ for source_c in sources:
1262
+ if isinstance(source_c, UndefinedConceptFull):
1263
+ raise SyntaxError(
1264
+ f"Cannot merge non-existent source concept {source_c.address} on line: {meta.line}"
1265
+ )
1266
+ new = MergeStatementV2(
1267
+ sources=sources,
1268
+ targets=targets,
1269
+ modifiers=modifiers,
1270
+ source_wildcard=source_wildcard,
1271
+ target_wildcard=target_wildcard,
1272
+ )
1273
+ for source_c in new.sources:
1274
+ self.environment.merge_concept(
1275
+ source_c, targets[source_c.address], modifiers
1276
+ )
1277
+
1278
+ return new
1279
+ return None
1280
+
1281
+ @v_args(meta=True)
1282
+ def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
1283
+ statement = RawSQLStatement(meta=Metadata(line_number=meta.line), text=args[0])
1284
+ return statement
1285
+
1286
+ def COPY_TYPE(self, args) -> IOType:
1287
+ return IOType(args.value)
1288
+
1289
+ @v_args(meta=True)
1290
+ def copy_statement(self, meta: Meta, args) -> CopyStatement:
1291
+ return CopyStatement(
1292
+ target=args[1],
1293
+ target_type=args[0],
1294
+ meta=Metadata(line_number=meta.line),
1295
+ select=args[-1],
1296
+ )
1297
+
1298
+ def resolve_import_address(self, address: str, is_stdlib: bool = False) -> str:
1299
+ if (
1300
+ isinstance(
1301
+ self.environment.config.import_resolver, FileSystemImportResolver
1302
+ )
1303
+ or is_stdlib
1304
+ ):
1305
+ with open(address, "r", encoding="utf-8") as f:
1306
+ text = f.read()
1307
+ elif isinstance(self.environment.config.import_resolver, DictImportResolver):
1308
+ lookup = address
1309
+ if lookup not in self.environment.config.import_resolver.content:
1310
+ raise ImportError(
1311
+ f"Unable to import file {lookup}, not resolvable from provided source files."
1312
+ )
1313
+ text = self.environment.config.import_resolver.content[lookup]
1314
+ else:
1315
+ raise ImportError(
1316
+ f"Unable to import file {address}, resolver type {type(self.environment.config.import_resolver)} not supported"
1317
+ )
1318
+ return text
1319
+
1320
+ def IMPORT_DOT(self, args) -> str:
1321
+ return "."
1322
+
1323
+ def import_statement(self, args: list[str]) -> ImportStatement:
1324
+ start = datetime.now()
1325
+ is_file_resolver = isinstance(
1326
+ self.environment.config.import_resolver, FileSystemImportResolver
1327
+ )
1328
+ parent_dirs = -1
1329
+ parsed_args = []
1330
+ for x in args:
1331
+ if x == ".":
1332
+ parent_dirs += 1
1333
+ else:
1334
+ parsed_args.append(x)
1335
+ parent_dirs = max(parent_dirs, 0)
1336
+ args = parsed_args
1337
+ if len(args) == 2:
1338
+ alias = args[-1]
1339
+ cache_key = args[-1]
1340
+ else:
1341
+ alias = self.environment.namespace
1342
+ cache_key = args[0]
1343
+ input_path = args[0]
1344
+ # lstrip off '.' from parent if they exist;
1345
+ # each one is an extra directory up after the first
1346
+
1347
+ path = input_path.split(".")
1348
+ is_stdlib = False
1349
+ if path[0] == "std":
1350
+ is_stdlib = True
1351
+ target = join(STDLIB_ROOT, *path) + ".preql"
1352
+ token_lookup: Path | str = Path(target)
1353
+ elif is_file_resolver:
1354
+ troot = Path(self.environment.working_path)
1355
+ if parent_dirs > 0:
1356
+ for _ in range(parent_dirs):
1357
+ troot = troot.parent
1358
+ target = join(troot, *path) + ".preql"
1359
+ # tokens + text are cached by path
1360
+ token_lookup = Path(target)
1361
+ elif isinstance(self.environment.config.import_resolver, DictImportResolver):
1362
+ target = ".".join(path)
1363
+ token_lookup = target
1364
+ else:
1365
+ raise NotImplementedError
1366
+
1367
+ # parser + env has to be cached by prior import path + current key
1368
+ key_path = self.import_keys + [cache_key]
1369
+ cache_lookup = "-".join(key_path)
1370
+
1371
+ # we don't iterate past the max parse depth
1372
+ if len(key_path) > MAX_PARSE_DEPTH:
1373
+ return ImportStatement(
1374
+ alias=alias, input_path=input_path, path=Path(target)
1375
+ )
1376
+
1377
+ if token_lookup in self.tokens:
1378
+ perf_logger.debug(f"\tTokens cached for {token_lookup}")
1379
+ raw_tokens = self.tokens[token_lookup]
1380
+ text = self.text_lookup[token_lookup]
1381
+ else:
1382
+ perf_logger.debug(f"\tTokens not cached for {token_lookup}, resolving")
1383
+ text = self.resolve_import_address(target, is_stdlib)
1384
+ self.text_lookup[token_lookup] = text
1385
+
1386
+ try:
1387
+ raw_tokens = PARSER.parse(text)
1388
+ except Exception as e:
1389
+ raise ImportError(
1390
+ f"Unable to import '{target}', parsing error: {e}"
1391
+ ) from e
1392
+ self.tokens[token_lookup] = raw_tokens
1393
+
1394
+ if cache_lookup in self.parsed:
1395
+ perf_logger.debug(f"\tEnvironment cached for {token_lookup}")
1396
+ nparser = self.parsed[cache_lookup]
1397
+ new_env = nparser.environment
1398
+ if nparser.parse_pass != ParsePass.VALIDATION:
1399
+ # nparser.transform(raw_tokens)
1400
+ second_pass_start = datetime.now()
1401
+ nparser.run_second_parse_pass()
1402
+ second_pass_end = datetime.now()
1403
+ perf_logger.debug(
1404
+ f"{second_pass_end - second_pass_start} seconds | Import {alias} key ({cache_key}) second pass took {second_pass_end - second_pass_start} to parse, {len(new_env.concepts)} concepts"
1405
+ )
1406
+ else:
1407
+ perf_logger.debug(f"\tParsing new for {token_lookup}")
1408
+ root = None
1409
+ if "." in str(token_lookup):
1410
+ root = str(token_lookup).rsplit(".", 1)[0]
1411
+ try:
1412
+ new_env = Environment(
1413
+ working_path=dirname(target),
1414
+ env_file_path=token_lookup,
1415
+ config=self.environment.config.copy_for_root(root=root),
1416
+ parameters=self.environment.parameters,
1417
+ )
1418
+ new_env.concepts.fail_on_missing = False
1419
+ self.parsed[self.parse_address] = self
1420
+ nparser = ParseToObjects(
1421
+ environment=new_env,
1422
+ parse_address=cache_lookup,
1423
+ token_address=token_lookup,
1424
+ parsed=self.parsed,
1425
+ tokens=self.tokens,
1426
+ text_lookup=self.text_lookup,
1427
+ import_keys=self.import_keys + [cache_key],
1428
+ parse_config=self.parse_config,
1429
+ )
1430
+ nparser.transform(raw_tokens)
1431
+ self.parsed[cache_lookup] = nparser
1432
+ except Exception as e:
1433
+ raise ImportError(
1434
+ f"Unable to import file {target}, parsing error: {e}"
1435
+ ) from e
1436
+
1437
+ parsed_path = Path(args[0])
1438
+ imps = ImportStatement(alias=alias, input_path=input_path, path=parsed_path)
1439
+
1440
+ self.environment.add_import(
1441
+ alias,
1442
+ new_env,
1443
+ Import(
1444
+ alias=alias,
1445
+ path=parsed_path,
1446
+ input_path=Path(target) if is_file_resolver else None,
1447
+ ),
1448
+ )
1449
+ end = datetime.now()
1450
+ perf_logger.debug(
1451
+ f"{end - start} seconds | Import {alias} key ({cache_key}) took to parse, {len(new_env.concepts)} concepts"
1452
+ )
1453
+ return imps
1454
+
1455
+ @v_args(meta=True)
1456
+ def show_category(self, meta: Meta, args) -> ShowCategory:
1457
+ return ShowCategory(args[0])
1458
+
1459
+ @v_args(meta=True)
1460
+ def show_statement(self, meta: Meta, args) -> ShowStatement:
1461
+ return ShowStatement(content=args[0])
1462
+
1463
+ @v_args(meta=True)
1464
+ def persist_partition_clause(self, meta: Meta, args) -> DatasourcePartitionClause:
1465
+ return DatasourcePartitionClause([ConceptRef(address=a) for a in args[0]])
1466
+
1467
+ @v_args(meta=True)
1468
+ def PERSIST_MODE(self, args) -> PersistMode:
1469
+ base = args.value.lower()
1470
+ if base == "persist":
1471
+ return PersistMode.OVERWRITE
1472
+ return PersistMode(base)
1473
+
1474
+ @v_args(meta=True)
1475
+ def auto_persist(self, meta: Meta, args) -> PersistStatement | None:
1476
+ if self.parse_pass != ParsePass.VALIDATION:
1477
+ return None
1478
+ persist_mode = args[0]
1479
+ target_name = args[1]
1480
+ where = args[2] if len(args) > 2 else None
1481
+
1482
+ if target_name not in self.environment.datasources:
1483
+ raise SyntaxError(
1484
+ f"Auto persist target datasource {target_name} does not exist in environment on line {meta.line}. Have {list(self.environment.datasources.keys())}"
1485
+ )
1486
+ target = self.environment.datasources[target_name]
1487
+ select: SelectStatement = target.create_update_statement(
1488
+ self.environment, where, line_no=meta.line
1489
+ )
1490
+ return PersistStatement(
1491
+ select=select,
1492
+ datasource=target,
1493
+ persist_mode=persist_mode,
1494
+ partition_by=target.incremental_by,
1495
+ meta=Metadata(line_number=meta.line),
1496
+ )
1497
+
1498
+ @v_args(meta=True)
1499
+ def full_persist(self, meta: Meta, args) -> PersistStatement | None:
1500
+ if self.parse_pass != ParsePass.VALIDATION:
1501
+ return None
1502
+ partition_clause = DatasourcePartitionClause([])
1503
+ labels = [x for x in args if isinstance(x, str)]
1504
+ for x in args:
1505
+ if isinstance(x, DatasourcePartitionClause):
1506
+ partition_clause = x
1507
+ if len(labels) == 2:
1508
+ identifier = labels[0]
1509
+ address = labels[1]
1510
+ else:
1511
+ identifier = labels[0]
1512
+ address = None
1513
+ target: Datasource | None = self.environment.datasources.get(identifier)
1514
+
1515
+ if not address and not target:
1516
+ raise SyntaxError(
1517
+ f'Append statement without concrete table address on line {meta.line} attempts to insert into datasource "{identifier}" that cannot be found in the environment. Add a physical address to create a new datasource, or check the name.'
1518
+ )
1519
+ elif target:
1520
+ address = target.safe_address
1521
+
1522
+ assert address is not None
1523
+
1524
+ modes = [x for x in args if isinstance(x, PersistMode)]
1525
+ mode = modes[0] if modes else PersistMode.OVERWRITE
1526
+ select: SelectStatement = [x for x in args if isinstance(x, SelectStatement)][0]
1527
+
1528
+ if mode == PersistMode.APPEND:
1529
+ if target is None:
1530
+ raise SyntaxError(
1531
+ f"Cannot append to non-existent datasource {identifier} on line {meta.line}."
1532
+ )
1533
+ new_datasource: Datasource = target
1534
+ if not new_datasource.partition_by == partition_clause.columns:
1535
+ raise SyntaxError(
1536
+ f"Cannot append to datasource {identifier} with different partitioning scheme then insert on line {meta.line}. Datasource partitioning: {new_datasource.partition_by}, insert partitioning: {partition_clause.columns if partition_clause else '[]'}"
1537
+ )
1538
+ if len(partition_clause.columns) > 1:
1539
+ raise NotImplementedError(
1540
+ "Incremental partition overwrites by more than 1 column are not yet supported."
1541
+ )
1542
+ for x in partition_clause.columns:
1543
+ concept = self.environment.concepts[x.address]
1544
+ if concept.output_datatype not in SUPPORTED_INCREMENTAL_TYPES:
1545
+ raise SyntaxError(
1546
+ f"Cannot incremental persist on concept {concept.address} of type {concept.output_datatype} on line {meta.line}."
1547
+ )
1548
+ elif target:
1549
+ new_datasource = target
1550
+ else:
1551
+ new_datasource = select.to_datasource(
1552
+ namespace=(
1553
+ self.environment.namespace
1554
+ if self.environment.namespace
1555
+ else DEFAULT_NAMESPACE
1556
+ ),
1557
+ name=identifier,
1558
+ address=Address(location=address),
1559
+ grain=select.grain,
1560
+ environment=self.environment,
1561
+ )
1562
+ return PersistStatement(
1563
+ select=select,
1564
+ datasource=new_datasource,
1565
+ persist_mode=mode,
1566
+ partition_by=partition_clause.columns if partition_clause else [],
1567
+ meta=Metadata(line_number=meta.line),
1568
+ )
1569
+
1570
+ @v_args(meta=True)
1571
+ def persist_statement(self, meta: Meta, args) -> PersistStatement:
1572
+ return args[0]
1573
+
1574
+ @v_args(meta=True)
1575
+ def align_item(self, meta: Meta, args) -> AlignItem:
1576
+ return AlignItem(
1577
+ alias=args[0],
1578
+ namespace=self.environment.namespace,
1579
+ concepts=[self.environment.concepts[arg].reference for arg in args[1:]],
1580
+ )
1581
+
1582
+ @v_args(meta=True)
1583
+ def align_clause(self, meta: Meta, args) -> AlignClause:
1584
+ return AlignClause(items=args)
1585
+
1586
+ @v_args(meta=True)
1587
+ def derive_item(self, meta: Meta, args) -> DeriveItem:
1588
+ return DeriveItem(
1589
+ expr=args[0], name=args[1], namespace=self.environment.namespace
1590
+ )
1591
+
1592
+ @v_args(meta=True)
1593
+ def derive_clause(self, meta: Meta, args) -> DeriveClause:
1594
+
1595
+ return DeriveClause(items=args)
1596
+
1597
+ @v_args(meta=True)
1598
+ def multi_select_statement(self, meta: Meta, args) -> MultiSelectStatement:
1599
+
1600
+ selects: list[SelectStatement] = []
1601
+ align: AlignClause | None = None
1602
+ limit: int | None = None
1603
+ order_by: OrderBy | None = None
1604
+ where: WhereClause | None = None
1605
+ having: HavingClause | None = None
1606
+ derive: DeriveClause | None = None
1607
+ for arg in args:
1608
+ atype = type(arg)
1609
+ if atype is SelectStatement:
1610
+ selects.append(arg)
1611
+ elif atype is Limit:
1612
+ limit = arg.count
1613
+ elif atype is OrderBy:
1614
+ order_by = arg
1615
+ elif atype is WhereClause:
1616
+ where = arg
1617
+ elif atype is HavingClause:
1618
+ having = arg
1619
+ elif atype is AlignClause:
1620
+ align = arg
1621
+ elif atype is DeriveClause:
1622
+ derive = arg
1623
+
1624
+ assert align
1625
+ assert align is not None
1626
+
1627
+ derived_concepts = []
1628
+ new_selects = [x.as_lineage(self.environment) for x in selects]
1629
+ lineage = MultiSelectLineage(
1630
+ selects=new_selects,
1631
+ align=align,
1632
+ derive=derive,
1633
+ namespace=self.environment.namespace,
1634
+ where_clause=where,
1635
+ having_clause=having,
1636
+ limit=limit,
1637
+ hidden_components=set(y for x in new_selects for y in x.hidden_components),
1638
+ )
1639
+ for x in align.items:
1640
+ concept = align_item_to_concept(
1641
+ x,
1642
+ align,
1643
+ selects,
1644
+ where=where,
1645
+ having=having,
1646
+ limit=limit,
1647
+ environment=self.environment,
1648
+ )
1649
+ derived_concepts.append(concept)
1650
+ self.environment.add_concept(concept, meta=meta)
1651
+ if derive:
1652
+ for derived in derive.items:
1653
+ derivation = derived.expr
1654
+ name = derived.name
1655
+ if not isinstance(derivation, (Function, Comparison, WindowItem)):
1656
+ raise SyntaxError(
1657
+ f"Invalid derive expression {derivation} in {meta.line}, must be a function or conditional"
1658
+ )
1659
+ concept = derive_item_to_concept(
1660
+ derivation, name, lineage, self.environment.namespace
1661
+ )
1662
+ derived_concepts.append(concept)
1663
+ self.environment.add_concept(concept, meta=meta)
1664
+ multi = MultiSelectStatement(
1665
+ selects=selects,
1666
+ align=align,
1667
+ namespace=self.environment.namespace,
1668
+ where_clause=where,
1669
+ order_by=order_by,
1670
+ limit=limit,
1671
+ meta=Metadata(line_number=meta.line),
1672
+ derived_concepts=derived_concepts,
1673
+ derive=derive,
1674
+ )
1675
+ return multi
1676
+
1677
+ @v_args(meta=True)
1678
+ def select_statement(self, meta: Meta, args) -> SelectStatement:
1679
+ select_items: List[SelectItem] | None = None
1680
+ limit: int | None = None
1681
+ order_by: OrderBy | None = None
1682
+ where = None
1683
+ having = None
1684
+ for arg in args:
1685
+ atype = type(arg)
1686
+ if atype is list:
1687
+ select_items = arg
1688
+ elif atype is Limit:
1689
+ limit = arg.count
1690
+ elif atype is OrderBy:
1691
+ order_by = arg
1692
+ elif atype is WhereClause:
1693
+ if where is not None:
1694
+ raise ParseError(
1695
+ "Multiple where clauses defined are not supported!"
1696
+ )
1697
+ where = arg
1698
+ elif atype is HavingClause:
1699
+ having = arg
1700
+ if not select_items:
1701
+ raise ParseError("Malformed select, missing select items")
1702
+ pre_keys = set(self.environment.concepts.keys())
1703
+ base = SelectStatement.from_inputs(
1704
+ environment=self.environment,
1705
+ selection=select_items,
1706
+ order_by=order_by,
1707
+ where_clause=where,
1708
+ having_clause=having,
1709
+ limit=limit,
1710
+ meta=Metadata(line_number=meta.line),
1711
+ )
1712
+ if (
1713
+ self.parse_pass == ParsePass.INITIAL
1714
+ and self.parse_config.strict_name_shadow_enforcement
1715
+ ):
1716
+ intersection = base.locally_derived.intersection(pre_keys)
1717
+ if intersection:
1718
+ for x in intersection:
1719
+ if str(base.local_concepts[x].lineage) == str(
1720
+ self.environment.concepts[x].lineage
1721
+ ):
1722
+ local = base.local_concepts[x]
1723
+ friendly_name = (
1724
+ local.name
1725
+ if local.namespace == DEFAULT_NAMESPACE
1726
+ else local.namespace
1727
+ )
1728
+ raise NameShadowError(
1729
+ f"Select statement {base} creates a new concept '{friendly_name}' with identical definition as the existing concept '{friendly_name}'. Replace {base.local_concepts[x].lineage} with a direct reference to {friendly_name}."
1730
+ )
1731
+ else:
1732
+ raise NameShadowError(
1733
+ f"Select statement {base} creates new named concepts from calculations {list(intersection)} with identical name(s) to existing concept(s). Use new unique names for these."
1734
+ )
1735
+ return base
1736
+
1737
+ @v_args(meta=True)
1738
+ def address(self, meta: Meta, args):
1739
+ return args[0]
1740
+
1741
+ @v_args(meta=True)
1742
+ def query(self, meta: Meta, args):
1743
+ return Query(text=args[0])
1744
+
1745
+ @v_args(meta=True)
1746
+ def file(self, meta: Meta, args):
1747
+ raw_path = args[0][1:-1]
1748
+
1749
+ # Cloud storage URLs should be used as-is without path resolution
1750
+ cloud_prefixes = ("gcs://", "gs://", "s3://", "https://", "http://")
1751
+ is_cloud = raw_path.startswith(cloud_prefixes)
1752
+
1753
+ if is_cloud:
1754
+ base = raw_path
1755
+ suffix = "." + raw_path.rsplit(".", 1)[-1] if "." in raw_path else ""
1756
+ else:
1757
+ path = Path(raw_path)
1758
+ # if it's a relative path, look it up relative to current parsing directory
1759
+ if path.is_relative_to("."):
1760
+ path = Path(self.environment.working_path) / path
1761
+ base = str(path.resolve().absolute())
1762
+ suffix = path.suffix
1763
+
1764
+ def check_exists():
1765
+ if not is_cloud and not Path(base).exists():
1766
+ raise FileNotFoundError(
1767
+ f"File path {base} does not exist on line {meta.line}"
1768
+ )
1769
+
1770
+ if suffix == ".sql":
1771
+ check_exists()
1772
+ return File(path=base, type=AddressType.SQL)
1773
+ elif suffix == ".py":
1774
+ check_exists()
1775
+ return File(path=base, type=AddressType.PYTHON_SCRIPT)
1776
+ elif suffix == ".csv":
1777
+ return File(path=base, type=AddressType.CSV)
1778
+ elif suffix == ".tsv":
1779
+ return File(path=base, type=AddressType.TSV)
1780
+ elif suffix == ".parquet":
1781
+ return File(path=base, type=AddressType.PARQUET)
1782
+ else:
1783
+ raise ParseError(
1784
+ f"Unsupported file type {suffix} for path {raw_path} on line {meta.line}"
1785
+ )
1786
+
1787
+ def where(self, args):
1788
+ root = args[0]
1789
+ root = expr_to_boolean(root, self.function_factory)
1790
+ return WhereClause(conditional=root)
1791
+
1792
+ def having(self, args):
1793
+ root = args[0]
1794
+ if not isinstance(root, (Comparison, Conditional, Parenthetical)):
1795
+ if arg_to_datatype(root) == DataType.BOOL:
1796
+ root = Comparison(left=root, right=True, operator=ComparisonOperator.EQ)
1797
+ else:
1798
+ root = Comparison(
1799
+ left=root,
1800
+ right=MagicConstants.NULL,
1801
+ operator=ComparisonOperator.IS_NOT,
1802
+ )
1803
+ return HavingClause(conditional=root)
1804
+
1805
+ @v_args(meta=True)
1806
+ def function_binding_list(self, meta: Meta, args) -> list[ArgBinding]:
1807
+ return args
1808
+
1809
+ @v_args(meta=True)
1810
+ def function_binding_type(self, meta: Meta, args) -> FunctionBindingType:
1811
+ return FunctionBindingType(type=args[0])
1812
+
1813
+ @v_args(meta=True)
1814
+ def function_binding_default(self, meta: Meta, args):
1815
+ return args[1]
1816
+
1817
+ @v_args(meta=True)
1818
+ def function_binding_item(self, meta: Meta, args) -> ArgBinding:
1819
+ default = None
1820
+ type = None
1821
+ for arg in args[1:]:
1822
+ if isinstance(arg, FunctionBindingType):
1823
+ type = arg.type
1824
+ else:
1825
+ default = arg
1826
+ return ArgBinding.model_construct(name=args[0], datatype=type, default=default)
1827
+
1828
+ @v_args(meta=True)
1829
+ def raw_function(self, meta: Meta, args) -> FunctionDeclaration:
1830
+ identity = args[0]
1831
+ function_arguments: list[ArgBinding] = args[1]
1832
+ output = args[2]
1833
+
1834
+ self.environment.functions[identity] = CustomFunctionFactory(
1835
+ function=output,
1836
+ namespace=self.environment.namespace,
1837
+ function_arguments=function_arguments,
1838
+ name=identity,
1839
+ )
1840
+ return FunctionDeclaration(name=identity, args=function_arguments, expr=output)
1841
+
1842
+ def custom_function(self, args) -> FunctionCallWrapper:
1843
+ name = args[0]
1844
+ args = args[1:]
1845
+ remapped = FunctionCallWrapper(
1846
+ content=self.environment.functions[name](*args), name=name, args=args
1847
+ )
1848
+
1849
+ return remapped
1850
+
1851
+ @v_args(meta=True)
1852
+ def function(self, meta: Meta, args) -> Function:
1853
+ return args[0]
1854
+
1855
+ @v_args(meta=True)
1856
+ def type_drop_clause(self, meta: Meta, args) -> DropOn:
1857
+ return DropOn([FunctionType(x) for x in args])
1858
+
1859
+ @v_args(meta=True)
1860
+ def type_add_clause(self, meta: Meta, args) -> AddOn:
1861
+ return AddOn([FunctionType(x) for x in args])
1862
+
1863
+ @v_args(meta=True)
1864
+ def type_declaration(self, meta: Meta, args) -> TypeDeclaration:
1865
+ key = args[0]
1866
+ datatype: list[DataType] = [x for x in args[1:] if isinstance(x, DataType)]
1867
+ if len(datatype) == 1:
1868
+ final_datatype: list[DataType] | DataType = datatype[0]
1869
+ else:
1870
+ final_datatype = datatype
1871
+ add_on = None
1872
+ drop_on = None
1873
+ for x in args[1:]:
1874
+ if isinstance(x, AddOn):
1875
+ add_on = x
1876
+ elif isinstance(x, DropOn):
1877
+ drop_on = x
1878
+ new = CustomType(
1879
+ name=key,
1880
+ type=final_datatype,
1881
+ drop_on=drop_on.functions if drop_on else [],
1882
+ add_on=add_on.functions if add_on else [],
1883
+ )
1884
+ self.environment.data_types[key] = new
1885
+ return TypeDeclaration(type=new)
1886
+
1887
+ def int_lit(self, args):
1888
+ return int("".join(args))
1889
+
1890
+ def bool_lit(self, args):
1891
+ return args[0].capitalize() == "True"
1892
+
1893
+ def null_lit(self, args):
1894
+ return NULL_VALUE
1895
+
1896
+ def float_lit(self, args):
1897
+ return float(args[0])
1898
+
1899
+ def array_lit(self, args):
1900
+ return list_to_wrapper(args)
1901
+
1902
+ def tuple_lit(self, args):
1903
+ return tuple_to_wrapper(args)
1904
+
1905
+ def string_lit(self, args) -> str:
1906
+ if not args:
1907
+ return ""
1908
+
1909
+ return args[0]
1910
+
1911
+ @v_args(meta=True)
1912
+ def struct_lit(self, meta, args):
1913
+ return self.function_factory.create_function(
1914
+ args, operator=FunctionType.STRUCT, meta=meta
1915
+ )
1916
+
1917
+ def map_lit(self, args):
1918
+ parsed = dict(zip(args[::2], args[1::2]))
1919
+ wrapped = dict_to_map_wrapper(parsed)
1920
+ return wrapped
1921
+
1922
+ def literal(self, args):
1923
+ return args[0]
1924
+
1925
+ def product_operator(self, args) -> Function | Any:
1926
+ if len(args) == 1:
1927
+ return args[0]
1928
+ result = args[0]
1929
+ for i in range(1, len(args), 2):
1930
+ new_result = None
1931
+ op = args[i]
1932
+ right = args[i + 1]
1933
+ if op == "*":
1934
+ new_result = self.function_factory.create_function(
1935
+ [result, right], operator=FunctionType.MULTIPLY
1936
+ )
1937
+ elif op == "**":
1938
+ new_result = self.function_factory.create_function(
1939
+ [result, right], operator=FunctionType.POWER
1940
+ )
1941
+ elif op == "/":
1942
+ new_result = self.function_factory.create_function(
1943
+ [result, right], operator=FunctionType.DIVIDE
1944
+ )
1945
+ elif op == "%":
1946
+ new_result = self.function_factory.create_function(
1947
+ [result, right], operator=FunctionType.MOD
1948
+ )
1949
+ else:
1950
+ raise ValueError(f"Unknown operator: {op}")
1951
+ result = new_result
1952
+ return new_result
1953
+
1954
+ def PLUS_OR_MINUS(self, args) -> str:
1955
+ return args.value
1956
+
1957
+ def MULTIPLY_DIVIDE_PERCENT(self, args) -> str:
1958
+ return args.value
1959
+
1960
+ @v_args(meta=True)
1961
+ def sum_operator(self, meta: Meta, args) -> Function | Any:
1962
+ if len(args) == 1:
1963
+ return args[0]
1964
+ result = args[0]
1965
+ for i in range(1, len(args), 2):
1966
+ new_result = None
1967
+ op = args[i].lower()
1968
+ right = args[i + 1]
1969
+ if op == "+":
1970
+ new_result = self.function_factory.create_function(
1971
+ [result, right], operator=FunctionType.ADD, meta=meta
1972
+ )
1973
+ elif op == "-":
1974
+ new_result = self.function_factory.create_function(
1975
+ [result, right], operator=FunctionType.SUBTRACT, meta=meta
1976
+ )
1977
+ elif op == "||":
1978
+ new_result = self.function_factory.create_function(
1979
+ [result, right], operator=FunctionType.CONCAT, meta=meta
1980
+ )
1981
+ elif op == "like":
1982
+ new_result = self.function_factory.create_function(
1983
+ [result, right], operator=FunctionType.LIKE, meta=meta
1984
+ )
1985
+ else:
1986
+ raise ValueError(f"Unknown operator: {op}")
1987
+ result = new_result
1988
+ return result
1989
+
1990
+ def comparison(self, args) -> Comparison:
1991
+ if len(args) == 1:
1992
+ return args[0]
1993
+ left = args[0]
1994
+ right = args[2]
1995
+ if args[1] in (ComparisonOperator.IN, ComparisonOperator.NOT_IN):
1996
+ return SubselectComparison(
1997
+ left=left,
1998
+ right=right,
1999
+ operator=args[1],
2000
+ )
2001
+ return Comparison(left=left, right=right, operator=args[1])
2002
+
2003
+ def between_comparison(self, args) -> Conditional:
2004
+ left_bound = args[1]
2005
+ right_bound = args[2]
2006
+ return Conditional(
2007
+ left=Comparison(
2008
+ left=args[0], right=left_bound, operator=ComparisonOperator.GTE
2009
+ ),
2010
+ right=Comparison(
2011
+ left=args[0], right=right_bound, operator=ComparisonOperator.LTE
2012
+ ),
2013
+ operator=BooleanOperator.AND,
2014
+ )
2015
+
2016
+ @v_args(meta=True)
2017
+ def subselect_comparison(self, meta: Meta, args) -> SubselectComparison:
2018
+ right = args[2]
2019
+
2020
+ while isinstance(right, Parenthetical) and isinstance(
2021
+ right.content,
2022
+ (
2023
+ Concept,
2024
+ Function,
2025
+ FilterItem,
2026
+ WindowItem,
2027
+ AggregateWrapper,
2028
+ ListWrapper,
2029
+ TupleWrapper,
2030
+ ),
2031
+ ):
2032
+ right = right.content
2033
+ if isinstance(right, (Function, FilterItem, WindowItem, AggregateWrapper)):
2034
+ right_concept = arbitrary_to_concept(right, environment=self.environment)
2035
+ self.environment.add_concept(right_concept, meta=meta)
2036
+ right = right_concept.reference
2037
+ return SubselectComparison(
2038
+ left=args[0],
2039
+ right=right,
2040
+ operator=args[1],
2041
+ )
2042
+
2043
+ def expr_tuple(self, args):
2044
+ datatypes = set([arg_to_datatype(x) for x in args])
2045
+ if len(datatypes) != 1:
2046
+ raise ParseError("Tuple must have same type for all elements")
2047
+ return TupleWrapper(val=tuple(args), type=datatypes.pop())
2048
+
2049
+ def parenthetical(self, args):
2050
+ return Parenthetical(content=args[0])
2051
+
2052
+ @v_args(meta=True)
2053
+ def condition_parenthetical(self, meta, args):
2054
+ if len(args) == 2:
2055
+ return Comparison(
2056
+ left=Parenthetical(content=args[1]),
2057
+ right=False,
2058
+ operator=ComparisonOperator.EQ,
2059
+ )
2060
+ return Parenthetical(content=args[0])
2061
+
2062
+ def conditional(self, args):
2063
+ def munch_args(args):
2064
+ while args:
2065
+ if len(args) == 1:
2066
+ return args[0]
2067
+ else:
2068
+ return Conditional(
2069
+ left=args[0], operator=args[1], right=munch_args(args[2:])
2070
+ )
2071
+
2072
+ return munch_args(args)
2073
+
2074
+ def window_order(self, args):
2075
+ return WindowOrder(args[0])
2076
+
2077
+ def window_order_by(self, args):
2078
+ # flatten tree
2079
+ return args[0]
2080
+
2081
+ def window(self, args):
2082
+
2083
+ return Window(count=args[1].value, window_order=args[0])
2084
+
2085
+ def WINDOW_TYPE(self, args):
2086
+ return WindowType(args.strip())
2087
+
2088
+ def window_item_over(self, args):
2089
+
2090
+ return WindowItemOver(contents=args[0])
2091
+
2092
+ def window_item_order(self, args):
2093
+ return WindowItemOrder(contents=args[0])
2094
+
2095
+ def logical_operator(self, args):
2096
+ return BooleanOperator(args[0].value.lower())
2097
+
2098
+ def DATE_PART(self, args):
2099
+ return DatePart(args.value)
2100
+
2101
+ @v_args(meta=True)
2102
+ def window_item(self, meta: Meta, args) -> WindowItem:
2103
+ type: WindowType = args[0]
2104
+ order_by = []
2105
+ over = []
2106
+ index = None
2107
+ concept: Concept | None = None
2108
+ for item in args:
2109
+ if isinstance(item, int):
2110
+ index = item
2111
+ elif isinstance(item, WindowItemOrder):
2112
+ order_by = item.contents
2113
+ elif isinstance(item, WindowItemOver):
2114
+ over = item.contents
2115
+ elif isinstance(item, str):
2116
+ concept = self.environment.concepts[item]
2117
+ elif isinstance(item, ConceptRef):
2118
+ concept = self.environment.concepts[item.address]
2119
+ elif isinstance(item, WindowType):
2120
+ type = item
2121
+ else:
2122
+ concept = arbitrary_to_concept(item, environment=self.environment)
2123
+ self.environment.add_concept(concept, meta=meta)
2124
+ if not concept:
2125
+ raise ParseError(
2126
+ f"Window statements must be on fields, not constants - error in: `{self.text_lookup[self.parse_address][meta.start_pos:meta.end_pos]}`"
2127
+ )
2128
+ return WindowItem(
2129
+ type=type,
2130
+ content=concept.reference,
2131
+ over=over,
2132
+ order_by=order_by,
2133
+ index=index,
2134
+ )
2135
+
2136
+ def filter_item(self, args) -> FilterItem:
2137
+ where: WhereClause
2138
+ expr, raw = args
2139
+ if isinstance(raw, WhereClause):
2140
+ where = raw
2141
+ else:
2142
+ where = WhereClause.model_construct(
2143
+ conditional=expr_to_boolean(raw, self.function_factory)
2144
+ )
2145
+ if isinstance(expr, str):
2146
+ expr = self.environment.concepts[expr].reference
2147
+ return FilterItem(content=expr, where=where)
2148
+
2149
+ # BEGIN FUNCTIONS
2150
+ @v_args(meta=True)
2151
+ def expr_reference(self, meta, args) -> Concept:
2152
+ return self.environment.concepts.__getitem__(args[0], meta.line)
2153
+
2154
+ def expr(self, args):
2155
+ if len(args) > 1:
2156
+ raise ParseError("Expression should have one child only.")
2157
+ return args[0]
2158
+
2159
+ def aggregate_over(self, args):
2160
+ return args[0]
2161
+
2162
+ def aggregate_all(self, args):
2163
+ return [
2164
+ ConceptRef(
2165
+ address=f"{INTERNAL_NAMESPACE}.{ALL_ROWS_CONCEPT}",
2166
+ datatype=DataType.INTEGER,
2167
+ )
2168
+ ]
2169
+
2170
+ def aggregate_functions(self, args):
2171
+ if len(args) == 2:
2172
+ return AggregateWrapper(function=args[0], by=args[1])
2173
+ return AggregateWrapper(function=args[0])
2174
+
2175
+ @v_args(meta=True)
2176
+ def index_access(self, meta, args):
2177
+ args = process_function_args(args, meta=meta, environment=self.environment)
2178
+ base = args[0]
2179
+ if base.datatype == DataType.MAP or isinstance(base.datatype, MapType):
2180
+ return self.function_factory.create_function(
2181
+ args, FunctionType.MAP_ACCESS, meta
2182
+ )
2183
+ return self.function_factory.create_function(
2184
+ args, FunctionType.INDEX_ACCESS, meta
2185
+ )
2186
+
2187
+ @v_args(meta=True)
2188
+ def map_key_access(self, meta, args):
2189
+ return self.function_factory.create_function(
2190
+ args, FunctionType.MAP_ACCESS, meta
2191
+ )
2192
+
2193
+ @v_args(meta=True)
2194
+ def attr_access(self, meta, args):
2195
+ return self.function_factory.create_function(
2196
+ args, FunctionType.ATTR_ACCESS, meta
2197
+ )
2198
+
2199
+ @v_args(meta=True)
2200
+ def fcoalesce(self, meta, args):
2201
+ return self.function_factory.create_function(args, FunctionType.COALESCE, meta)
2202
+
2203
+ @v_args(meta=True)
2204
+ def fnullif(self, meta, args):
2205
+ return self.function_factory.create_function(args, FunctionType.NULLIF, meta)
2206
+
2207
+ @v_args(meta=True)
2208
+ def frecurse_edge(self, meta, args):
2209
+ return self.function_factory.create_function(
2210
+ args, FunctionType.RECURSE_EDGE, meta
2211
+ )
2212
+
2213
+ @v_args(meta=True)
2214
+ def unnest(self, meta, args):
2215
+
2216
+ return self.function_factory.create_function(args, FunctionType.UNNEST, meta)
2217
+
2218
+ @v_args(meta=True)
2219
+ def count(self, meta, args):
2220
+ return self.function_factory.create_function(args, FunctionType.COUNT, meta)
2221
+
2222
+ @v_args(meta=True)
2223
+ def fgroup(self, meta, args):
2224
+ if len(args) == 2:
2225
+ fargs = [args[0]] + list(args[1])
2226
+ else:
2227
+ fargs = [args[0]]
2228
+ return self.function_factory.create_function(fargs, FunctionType.GROUP, meta)
2229
+
2230
+ @v_args(meta=True)
2231
+ def fabs(self, meta, args):
2232
+ return self.function_factory.create_function(args, FunctionType.ABS, meta)
2233
+
2234
+ @v_args(meta=True)
2235
+ def count_distinct(self, meta, args):
2236
+ return self.function_factory.create_function(
2237
+ args, FunctionType.COUNT_DISTINCT, meta
2238
+ )
2239
+
2240
+ @v_args(meta=True)
2241
+ def sum(self, meta, args):
2242
+ return self.function_factory.create_function(args, FunctionType.SUM, meta)
2243
+
2244
+ @v_args(meta=True)
2245
+ def array_agg(self, meta, args):
2246
+ return self.function_factory.create_function(args, FunctionType.ARRAY_AGG, meta)
2247
+
2248
+ @v_args(meta=True)
2249
+ def any(self, meta, args):
2250
+ return self.function_factory.create_function(args, FunctionType.ANY, meta)
2251
+
2252
+ @v_args(meta=True)
2253
+ def bool_and(self, meta, args):
2254
+ return self.function_factory.create_function(args, FunctionType.BOOL_AND, meta)
2255
+
2256
+ @v_args(meta=True)
2257
+ def bool_or(self, meta, args):
2258
+ return self.function_factory.create_function(args, FunctionType.BOOL_OR, meta)
2259
+
2260
+ @v_args(meta=True)
2261
+ def avg(self, meta, args):
2262
+ return self.function_factory.create_function(args, FunctionType.AVG, meta)
2263
+
2264
+ @v_args(meta=True)
2265
+ def max(self, meta, args):
2266
+ return self.function_factory.create_function(args, FunctionType.MAX, meta)
2267
+
2268
+ @v_args(meta=True)
2269
+ def min(self, meta, args):
2270
+ return self.function_factory.create_function(args, FunctionType.MIN, meta)
2271
+
2272
+ @v_args(meta=True)
2273
+ def len(self, meta, args):
2274
+ return self.function_factory.create_function(args, FunctionType.LENGTH, meta)
2275
+
2276
+ @v_args(meta=True)
2277
+ def fsplit(self, meta, args):
2278
+ return self.function_factory.create_function(args, FunctionType.SPLIT, meta)
2279
+
2280
+ @v_args(meta=True)
2281
+ def concat(self, meta, args):
2282
+ return self.function_factory.create_function(args, FunctionType.CONCAT, meta)
2283
+
2284
+ @v_args(meta=True)
2285
+ def union(self, meta, args):
2286
+ return self.function_factory.create_function(args, FunctionType.UNION, meta)
2287
+
2288
+ @v_args(meta=True)
2289
+ def like(self, meta, args):
2290
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
2291
+
2292
+ @v_args(meta=True)
2293
+ def alt_like(self, meta, args):
2294
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
2295
+
2296
+ @v_args(meta=True)
2297
+ def ilike(self, meta, args):
2298
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
2299
+
2300
+ @v_args(meta=True)
2301
+ def upper(self, meta, args):
2302
+ return self.function_factory.create_function(args, FunctionType.UPPER, meta)
2303
+
2304
+ @v_args(meta=True)
2305
+ def fstrpos(self, meta, args):
2306
+ return self.function_factory.create_function(args, FunctionType.STRPOS, meta)
2307
+
2308
+ @v_args(meta=True)
2309
+ def freplace(self, meta, args):
2310
+ return self.function_factory.create_function(args, FunctionType.REPLACE, meta)
2311
+
2312
+ @v_args(meta=True)
2313
+ def fcontains(self, meta, args):
2314
+ return self.function_factory.create_function(args, FunctionType.CONTAINS, meta)
2315
+
2316
+ @v_args(meta=True)
2317
+ def ftrim(self, meta, args):
2318
+ return self.function_factory.create_function(args, FunctionType.TRIM, meta)
2319
+
2320
+ @v_args(meta=True)
2321
+ def fhash(self, meta, args):
2322
+ return self.function_factory.create_function(args, FunctionType.HASH, meta)
2323
+
2324
+ @v_args(meta=True)
2325
+ def fsubstring(self, meta, args):
2326
+ return self.function_factory.create_function(args, FunctionType.SUBSTRING, meta)
2327
+
2328
+ @v_args(meta=True)
2329
+ def flower(self, meta, args):
2330
+ return self.function_factory.create_function(args, FunctionType.LOWER, meta)
2331
+
2332
+ @v_args(meta=True)
2333
+ def fregexp_contains(self, meta, args):
2334
+ return self.function_factory.create_function(
2335
+ args, FunctionType.REGEXP_CONTAINS, meta
2336
+ )
2337
+
2338
+ @v_args(meta=True)
2339
+ def fregexp_extract(self, meta, args):
2340
+ if len(args) == 2:
2341
+ # this is a magic value to represent the default behavior
2342
+ args.append(-1)
2343
+ return self.function_factory.create_function(
2344
+ args, FunctionType.REGEXP_EXTRACT, meta
2345
+ )
2346
+
2347
+ @v_args(meta=True)
2348
+ def fregexp_replace(self, meta, args):
2349
+ return self.function_factory.create_function(
2350
+ args, FunctionType.REGEXP_REPLACE, meta
2351
+ )
2352
+
2353
+ # date functions
2354
+ @v_args(meta=True)
2355
+ def fdate(self, meta, args):
2356
+ return self.function_factory.create_function(args, FunctionType.DATE, meta)
2357
+
2358
+ @v_args(meta=True)
2359
+ def fdate_trunc(self, meta, args):
2360
+ return self.function_factory.create_function(
2361
+ args, FunctionType.DATE_TRUNCATE, meta
2362
+ )
2363
+
2364
+ @v_args(meta=True)
2365
+ def fdate_part(self, meta, args):
2366
+ return self.function_factory.create_function(args, FunctionType.DATE_PART, meta)
2367
+
2368
+ @v_args(meta=True)
2369
+ def fdate_add(self, meta, args):
2370
+ return self.function_factory.create_function(args, FunctionType.DATE_ADD, meta)
2371
+
2372
+ @v_args(meta=True)
2373
+ def fdate_sub(self, meta, args):
2374
+ return self.function_factory.create_function(args, FunctionType.DATE_SUB, meta)
2375
+
2376
+ @v_args(meta=True)
2377
+ def fdate_diff(self, meta, args):
2378
+ return self.function_factory.create_function(args, FunctionType.DATE_DIFF, meta)
2379
+
2380
+ @v_args(meta=True)
2381
+ def fdatetime(self, meta, args):
2382
+ return self.function_factory.create_function(args, FunctionType.DATETIME, meta)
2383
+
2384
+ @v_args(meta=True)
2385
+ def ftimestamp(self, meta, args):
2386
+ return self.function_factory.create_function(args, FunctionType.TIMESTAMP, meta)
2387
+
2388
+ @v_args(meta=True)
2389
+ def fsecond(self, meta, args):
2390
+ return self.function_factory.create_function(args, FunctionType.SECOND, meta)
2391
+
2392
+ @v_args(meta=True)
2393
+ def fminute(self, meta, args):
2394
+ return self.function_factory.create_function(args, FunctionType.MINUTE, meta)
2395
+
2396
+ @v_args(meta=True)
2397
+ def fhour(self, meta, args):
2398
+ return self.function_factory.create_function(args, FunctionType.HOUR, meta)
2399
+
2400
+ @v_args(meta=True)
2401
+ def fday(self, meta, args):
2402
+ return self.function_factory.create_function(args, FunctionType.DAY, meta)
2403
+
2404
+ @v_args(meta=True)
2405
+ def fday_name(self, meta, args):
2406
+ return self.function_factory.create_function(args, FunctionType.DAY_NAME, meta)
2407
+
2408
+ @v_args(meta=True)
2409
+ def fday_of_week(self, meta, args):
2410
+ return self.function_factory.create_function(
2411
+ args, FunctionType.DAY_OF_WEEK, meta
2412
+ )
2413
+
2414
+ @v_args(meta=True)
2415
+ def fweek(self, meta, args):
2416
+ return self.function_factory.create_function(args, FunctionType.WEEK, meta)
2417
+
2418
+ @v_args(meta=True)
2419
+ def fmonth(self, meta, args):
2420
+ return self.function_factory.create_function(args, FunctionType.MONTH, meta)
2421
+
2422
+ @v_args(meta=True)
2423
+ def fmonth_name(self, meta, args):
2424
+ return self.function_factory.create_function(
2425
+ args, FunctionType.MONTH_NAME, meta
2426
+ )
2427
+
2428
+ @v_args(meta=True)
2429
+ def fquarter(self, meta, args):
2430
+ return self.function_factory.create_function(args, FunctionType.QUARTER, meta)
2431
+
2432
+ @v_args(meta=True)
2433
+ def fyear(self, meta, args):
2434
+ return self.function_factory.create_function(args, FunctionType.YEAR, meta)
2435
+
2436
+ def internal_fcast(self, meta, args) -> Function:
2437
+ args = process_function_args(args, meta=meta, environment=self.environment)
2438
+
2439
+ # Destructure for readability
2440
+ value, dtype = args[0], args[1]
2441
+ processed: Any
2442
+ if isinstance(value, str):
2443
+ match dtype:
2444
+ case DataType.DATE:
2445
+ processed = date.fromisoformat(value)
2446
+ case DataType.DATETIME | DataType.TIMESTAMP:
2447
+ processed = datetime.fromisoformat(value)
2448
+ case DataType.INTEGER:
2449
+ processed = int(value)
2450
+ case DataType.FLOAT:
2451
+ processed = float(value)
2452
+ case DataType.BOOL:
2453
+ processed = value.capitalize() == "True"
2454
+ case DataType.STRING:
2455
+ processed = value
2456
+ case _:
2457
+ raise SyntaxError(f"Invalid cast type {dtype}")
2458
+
2459
+ # Determine function type and arguments
2460
+ if isinstance(dtype, TraitDataType):
2461
+ return self.function_factory.create_function(
2462
+ [processed, dtype], FunctionType.TYPED_CONSTANT, meta
2463
+ )
2464
+
2465
+ return self.function_factory.create_function(
2466
+ [processed], FunctionType.CONSTANT, meta
2467
+ )
2468
+
2469
+ return self.function_factory.create_function(args, FunctionType.CAST, meta)
2470
+
2471
+ @v_args(meta=True)
2472
+ def fdate_spine(self, meta, args) -> Function:
2473
+ return self.function_factory.create_function(
2474
+ args, FunctionType.DATE_SPINE, meta
2475
+ )
2476
+
2477
+ # utility functions
2478
+ @v_args(meta=True)
2479
+ def fcast(self, meta, args) -> Function:
2480
+ return self.internal_fcast(meta, args)
2481
+
2482
+ # math functions
2483
+ @v_args(meta=True)
2484
+ def fadd(self, meta, args) -> Function:
2485
+
2486
+ return self.function_factory.create_function(args, FunctionType.ADD, meta)
2487
+
2488
+ @v_args(meta=True)
2489
+ def fsub(self, meta, args) -> Function:
2490
+ return self.function_factory.create_function(args, FunctionType.SUBTRACT, meta)
2491
+
2492
+ @v_args(meta=True)
2493
+ def fmul(self, meta, args) -> Function:
2494
+ return self.function_factory.create_function(args, FunctionType.MULTIPLY, meta)
2495
+
2496
+ @v_args(meta=True)
2497
+ def fdiv(self, meta: Meta, args) -> Function:
2498
+ return self.function_factory.create_function(args, FunctionType.DIVIDE, meta)
2499
+
2500
+ @v_args(meta=True)
2501
+ def fmod(self, meta: Meta, args) -> Function:
2502
+ return self.function_factory.create_function(args, FunctionType.MOD, meta)
2503
+
2504
+ @v_args(meta=True)
2505
+ def fsqrt(self, meta: Meta, args) -> Function:
2506
+ return self.function_factory.create_function(args, FunctionType.SQRT, meta)
2507
+
2508
+ @v_args(meta=True)
2509
+ def frandom(self, meta: Meta, args) -> Function:
2510
+ return self.function_factory.create_function(args, FunctionType.RANDOM, meta)
2511
+
2512
+ @v_args(meta=True)
2513
+ def fround(self, meta, args) -> Function:
2514
+ if len(args) == 1:
2515
+ args.append(0)
2516
+ return self.function_factory.create_function(args, FunctionType.ROUND, meta)
2517
+
2518
+ @v_args(meta=True)
2519
+ def flog(self, meta, args) -> Function:
2520
+ if len(args) == 1:
2521
+ args.append(10)
2522
+ return self.function_factory.create_function(args, FunctionType.LOG, meta)
2523
+
2524
+ @v_args(meta=True)
2525
+ def ffloor(self, meta, args) -> Function:
2526
+ return self.function_factory.create_function(args, FunctionType.FLOOR, meta)
2527
+
2528
+ @v_args(meta=True)
2529
+ def fceil(self, meta, args) -> Function:
2530
+ return self.function_factory.create_function(args, FunctionType.CEIL, meta)
2531
+
2532
+ @v_args(meta=True)
2533
+ def fcase(self, meta, args: List[Union[CaseWhen, CaseElse]]) -> Function:
2534
+ return self.function_factory.create_function(args, FunctionType.CASE, meta)
2535
+
2536
+ @v_args(meta=True)
2537
+ def fcase_when(self, meta, args) -> CaseWhen:
2538
+ args = process_function_args(args, meta=meta, environment=self.environment)
2539
+ root = expr_to_boolean(args[0], self.function_factory)
2540
+ return CaseWhen(comparison=root, expr=args[1])
2541
+
2542
+ @v_args(meta=True)
2543
+ def fcase_else(self, meta, args) -> CaseElse:
2544
+ args = process_function_args(args, meta=meta, environment=self.environment)
2545
+ return CaseElse(expr=args[0])
2546
+
2547
+ @v_args(meta=True)
2548
+ def fcurrent_date(self, meta, args):
2549
+ return CurrentDate([])
2550
+
2551
+ @v_args(meta=True)
2552
+ def fcurrent_datetime(self, meta, args):
2553
+ return self.function_factory.create_function(
2554
+ args=[], operator=FunctionType.CURRENT_DATETIME, meta=meta
2555
+ )
2556
+
2557
+ @v_args(meta=True)
2558
+ def fcurrent_timestamp(self, meta, args):
2559
+ return self.function_factory.create_function(
2560
+ args=[], operator=FunctionType.CURRENT_TIMESTAMP, meta=meta
2561
+ )
2562
+
2563
+ @v_args(meta=True)
2564
+ def fnot(self, meta, args):
2565
+ if arg_to_datatype(args[0]) == DataType.BOOL:
2566
+ return Comparison(
2567
+ left=self.function_factory.create_function(
2568
+ [args[0], False], FunctionType.COALESCE, meta
2569
+ ),
2570
+ operator=ComparisonOperator.EQ,
2571
+ right=False,
2572
+ meta=meta,
2573
+ )
2574
+ return self.function_factory.create_function(args, FunctionType.IS_NULL, meta)
2575
+
2576
+ @v_args(meta=True)
2577
+ def fbool(self, meta, args):
2578
+ return self.function_factory.create_function(args, FunctionType.BOOL, meta)
2579
+
2580
+ @v_args(meta=True)
2581
+ def fmap_keys(self, meta, args):
2582
+ return self.function_factory.create_function(args, FunctionType.MAP_KEYS, meta)
2583
+
2584
+ @v_args(meta=True)
2585
+ def fmap_values(self, meta, args):
2586
+ return self.function_factory.create_function(
2587
+ args, FunctionType.MAP_VALUES, meta
2588
+ )
2589
+
2590
+ @v_args(meta=True)
2591
+ def farray_sum(self, meta, args):
2592
+ return self.function_factory.create_function(args, FunctionType.ARRAY_SUM, meta)
2593
+
2594
+ @v_args(meta=True)
2595
+ def fgenerate_array(self, meta, args):
2596
+ return self.function_factory.create_function(
2597
+ args, FunctionType.GENERATE_ARRAY, meta
2598
+ )
2599
+
2600
+ @v_args(meta=True)
2601
+ def farray_distinct(self, meta, args):
2602
+ return self.function_factory.create_function(
2603
+ args, FunctionType.ARRAY_DISTINCT, meta
2604
+ )
2605
+
2606
+ @v_args(meta=True)
2607
+ def farray_to_string(self, meta, args):
2608
+ return self.function_factory.create_function(
2609
+ args, FunctionType.ARRAY_TO_STRING, meta
2610
+ )
2611
+
2612
+ @v_args(meta=True)
2613
+ def farray_sort(self, meta, args):
2614
+ if len(args) == 1:
2615
+ # this is a magic value to represent the default behavior
2616
+ args = [args[0], Ordering.ASCENDING]
2617
+ return self.function_factory.create_function(
2618
+ args, FunctionType.ARRAY_SORT, meta
2619
+ )
2620
+
2621
+ @v_args(meta=True)
2622
+ def transform_lambda(self, meta, args):
2623
+ return self.environment.functions[args[0]]
2624
+
2625
+ @v_args(meta=True)
2626
+ def farray_transform(self, meta, args) -> Function:
2627
+ factory: CustomFunctionFactory = args[1]
2628
+ if not len(factory.function_arguments) == 1:
2629
+ raise InvalidSyntaxException(
2630
+ "Array transform function must have exactly one argument;"
2631
+ )
2632
+ array_type = arg_to_datatype(args[0])
2633
+ if not isinstance(array_type, ArrayType):
2634
+ raise InvalidSyntaxException(
2635
+ f"Array transform function must be applied to an array, not {array_type}"
2636
+ )
2637
+ return self.function_factory.create_function(
2638
+ [
2639
+ args[0],
2640
+ factory.function_arguments[0],
2641
+ factory(
2642
+ ArgBinding(
2643
+ name=factory.function_arguments[0].name,
2644
+ datatype=array_type.value_data_type,
2645
+ )
2646
+ ),
2647
+ ],
2648
+ FunctionType.ARRAY_TRANSFORM,
2649
+ meta,
2650
+ )
2651
+
2652
+ @v_args(meta=True)
2653
+ def farray_filter(self, meta, args) -> Function:
2654
+ factory: CustomFunctionFactory = args[1]
2655
+ if not len(factory.function_arguments) == 1:
2656
+ raise InvalidSyntaxException(
2657
+ "Array filter function must have exactly one argument;"
2658
+ )
2659
+ array_type = arg_to_datatype(args[0])
2660
+ if not isinstance(array_type, ArrayType):
2661
+ raise InvalidSyntaxException(
2662
+ f"Array filter function must be applied to an array, not {array_type}"
2663
+ )
2664
+ return self.function_factory.create_function(
2665
+ [
2666
+ args[0],
2667
+ factory.function_arguments[0],
2668
+ factory(
2669
+ ArgBinding(
2670
+ name=factory.function_arguments[0].name,
2671
+ datatype=array_type.value_data_type,
2672
+ )
2673
+ ),
2674
+ ],
2675
+ FunctionType.ARRAY_FILTER,
2676
+ meta,
2677
+ )
2678
+
2679
+
2680
+ def unpack_visit_error(e: VisitError, text: str | None = None):
2681
+ """This is required to get exceptions from imports, which would
2682
+ raise nested VisitErrors"""
2683
+ if isinstance(e.orig_exc, VisitError):
2684
+ unpack_visit_error(e.orig_exc, text)
2685
+ elif isinstance(e.orig_exc, (UndefinedConceptException, ImportError)):
2686
+ raise e.orig_exc
2687
+ elif isinstance(e.orig_exc, InvalidSyntaxException):
2688
+ raise e.orig_exc
2689
+ elif isinstance(e.orig_exc, (SyntaxError, TypeError)):
2690
+ if isinstance(e.obj, Tree):
2691
+ if text:
2692
+ extract = text[e.obj.meta.start_pos - 5 : e.obj.meta.end_pos + 5]
2693
+ raise InvalidSyntaxException(
2694
+ str(e.orig_exc)
2695
+ + " Raised when parsing rule: "
2696
+ + str(e.rule)
2697
+ + f' Line: {e.obj.meta.line} "...{extract}..."'
2698
+ )
2699
+ InvalidSyntaxException(
2700
+ str(e.orig_exc) + " in " + str(e.rule) + f" Line: {e.obj.meta.line}"
2701
+ )
2702
+ raise InvalidSyntaxException(str(e.orig_exc)).with_traceback(
2703
+ e.orig_exc.__traceback__
2704
+ )
2705
+ raise e.orig_exc
2706
+
2707
+
2708
+ def parse_text_raw(text: str, environment: Optional[Environment] = None):
2709
+ PARSER.parse(text)
2710
+
2711
+
2712
+ ERROR_CODES: dict[int, str] = {
2713
+ # 100 code are SQL compatability errors
2714
+ 101: "Using FROM keyword? Trilogy does not have a FROM clause (Datasource resolution is automatic).",
2715
+ # 200 codes relate to required explicit syntax (we could loosen these?)
2716
+ 201: 'Missing alias? Alias must be specified with "AS" - e.g. `SELECT x+1 AS y`',
2717
+ 202: "Missing closing semicolon? Statements must be terminated with a semicolon `;`.",
2718
+ 210: "Missing order direction? Order by must be explicit about direction - specify `asc` or `desc`.",
2719
+ }
2720
+
2721
+ DEFAULT_ERROR_SPAN: int = 30
2722
+
2723
+
2724
+ def inject_context_maker(pos: int, text: str, span: int = 40) -> str:
2725
+ """Returns a pretty string pinpointing the error in the text,
2726
+ with span amount of context characters around it.
2727
+
2728
+ Note:
2729
+ The parser doesn't hold a copy of the text it has to parse,
2730
+ so you have to provide it again
2731
+ """
2732
+
2733
+ start = max(pos - span, 0)
2734
+ end = pos + span
2735
+ if not isinstance(text, bytes):
2736
+
2737
+ before = text[start:pos].rsplit("\n", 1)[-1]
2738
+ after = text[pos:end].split("\n", 1)[0]
2739
+ rcap = ""
2740
+ # if it goes beyond the end of text, no ...
2741
+ # if it terminates on a space, no need for ...
2742
+ if after and not after[-1].isspace() and not (end > len(text)):
2743
+ rcap = "..."
2744
+ lcap = ""
2745
+ if start > 0 and not before[0].isspace():
2746
+ lcap = "..."
2747
+ lpad = " "
2748
+ rpad = " "
2749
+ if before.endswith(" "):
2750
+ lpad = ""
2751
+ if after.startswith(" "):
2752
+ rpad = ""
2753
+ return f"{lcap}{before}{lpad}???{rpad}{after}{rcap}"
2754
+ else:
2755
+ before = text[start:pos].rsplit(b"\n", 1)[-1]
2756
+ after = text[pos:end].split(b"\n", 1)[0]
2757
+ return (before + b" ??? " + after).decode("ascii", "backslashreplace")
2758
+
2759
+
2760
+ def parse_text(
2761
+ text: str,
2762
+ environment: Optional[Environment] = None,
2763
+ root: Path | None = None,
2764
+ parse_config: Parsing | None = None,
2765
+ ) -> Tuple[
2766
+ Environment,
2767
+ List[
2768
+ Datasource
2769
+ | ImportStatement
2770
+ | SelectStatement
2771
+ | PersistStatement
2772
+ | ShowStatement
2773
+ | RawSQLStatement
2774
+ | ValidateStatement
2775
+ | None
2776
+ ],
2777
+ ]:
2778
+ def _create_syntax_error(code: int, pos: int, text: str) -> InvalidSyntaxException:
2779
+ """Helper to create standardized syntax error with context."""
2780
+ return InvalidSyntaxException(
2781
+ f"Syntax [{code}]: "
2782
+ + ERROR_CODES[code]
2783
+ + "\nLocation:\n"
2784
+ + inject_context_maker(pos, text.replace("\n", " "), DEFAULT_ERROR_SPAN)
2785
+ )
2786
+
2787
+ def _create_generic_syntax_error(
2788
+ message: str, pos: int, text: str
2789
+ ) -> InvalidSyntaxException:
2790
+ """Helper to create generic syntax error with context."""
2791
+ return InvalidSyntaxException(
2792
+ message
2793
+ + "\nLocation:\n"
2794
+ + inject_context_maker(pos, text.replace("\n", " "), DEFAULT_ERROR_SPAN)
2795
+ )
2796
+
2797
+ def _handle_unexpected_token(e: UnexpectedToken, text: str) -> None:
2798
+ """Handle UnexpectedToken errors to make friendlier error messages."""
2799
+ # Handle ordering direction error
2800
+ pos = e.pos_in_stream or 0
2801
+ if e.interactive_parser.lexer_thread.state:
2802
+ last_token = e.interactive_parser.lexer_thread.state.last_token
2803
+ else:
2804
+ last_token = None
2805
+ if e.expected == {"ORDERING_DIRECTION"}:
2806
+ raise _create_syntax_error(210, pos, text)
2807
+
2808
+ # Handle FROM token error
2809
+ parsed_tokens = (
2810
+ [x.value for x in e.token_history if x] if e.token_history else []
2811
+ )
2812
+
2813
+ if parsed_tokens == ["FROM"]:
2814
+ raise _create_syntax_error(101, pos, text)
2815
+ # check if they are missing a semicolon
2816
+ if last_token and e.token.type == "$END":
2817
+ try:
2818
+
2819
+ e.interactive_parser.feed_token(Token("_TERMINATOR", ";"))
2820
+ state = e.interactive_parser.lexer_thread.state
2821
+ if state and state.last_token:
2822
+ new_pos = state.last_token.end_pos or pos
2823
+ else:
2824
+ new_pos = pos
2825
+ raise _create_syntax_error(202, new_pos, text)
2826
+ except UnexpectedToken:
2827
+ pass
2828
+ # check if they forgot an as
2829
+ try:
2830
+ e.interactive_parser.feed_token(Token("AS", "AS"))
2831
+ state = e.interactive_parser.lexer_thread.state
2832
+ if state and state.last_token:
2833
+ new_pos = state.last_token.end_pos or pos
2834
+ else:
2835
+ new_pos = pos
2836
+ e.interactive_parser.feed_token(Token("IDENTIFIER", e.token.value))
2837
+ raise _create_syntax_error(201, new_pos, text)
2838
+ except UnexpectedToken:
2839
+ pass
2840
+
2841
+ # Default UnexpectedToken handling
2842
+ raise _create_generic_syntax_error(str(e), pos, text)
2843
+
2844
+ environment = environment or (
2845
+ Environment(working_path=root) if root else Environment()
2846
+ )
2847
+ parser = ParseToObjects(
2848
+ environment=environment, import_keys=["root"], parse_config=parse_config
2849
+ )
2850
+ start = datetime.now()
2851
+
2852
+ try:
2853
+ parser.set_text(text)
2854
+ # disable fail on missing to allow for circular dependencies
2855
+ parser.prepare_parse()
2856
+ parser.transform(PARSER.parse(text))
2857
+ # this will reset fail on missing
2858
+ pass_two = parser.run_second_parse_pass()
2859
+ output = [v for v in pass_two if v]
2860
+ environment.concepts.fail_on_missing = True
2861
+ end = datetime.now()
2862
+ perf_logger.debug(
2863
+ f"Parse time: {end - start} for {len(text)} characters, {len(output)} objects"
2864
+ )
2865
+ except VisitError as e:
2866
+ unpack_visit_error(e, text)
2867
+ # this will never be reached
2868
+ raise e
2869
+ except UnexpectedToken as e:
2870
+ _handle_unexpected_token(e, text)
2871
+ except (UnexpectedCharacters, UnexpectedEOF, UnexpectedInput) as e:
2872
+ raise _create_generic_syntax_error(str(e), e.pos_in_stream or 0, text)
2873
+ except (ValidationError, TypeError) as e:
2874
+ raise InvalidSyntaxException(str(e))
2875
+
2876
+ return environment, output