pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.148.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.148.dist-info/RECORD +206 -0
  6. pytrilogy-0.3.148.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2662 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +434 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +786 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1431 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +376 -0
  117. trilogy/dialect/enums.py +149 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/file_state_store.py +0 -0
  130. trilogy/execution/state/sqllite_state_store.py +0 -0
  131. trilogy/execution/state/state_store.py +301 -0
  132. trilogy/executor.py +656 -0
  133. trilogy/hooks/__init__.py +4 -0
  134. trilogy/hooks/base_hook.py +40 -0
  135. trilogy/hooks/graph_hook.py +135 -0
  136. trilogy/hooks/query_debugger.py +166 -0
  137. trilogy/metadata/__init__.py +0 -0
  138. trilogy/parser.py +10 -0
  139. trilogy/parsing/README.md +21 -0
  140. trilogy/parsing/__init__.py +0 -0
  141. trilogy/parsing/common.py +1069 -0
  142. trilogy/parsing/config.py +5 -0
  143. trilogy/parsing/exceptions.py +8 -0
  144. trilogy/parsing/helpers.py +1 -0
  145. trilogy/parsing/parse_engine.py +2863 -0
  146. trilogy/parsing/render.py +773 -0
  147. trilogy/parsing/trilogy.lark +544 -0
  148. trilogy/py.typed +0 -0
  149. trilogy/render.py +45 -0
  150. trilogy/scripts/README.md +9 -0
  151. trilogy/scripts/__init__.py +0 -0
  152. trilogy/scripts/agent.py +41 -0
  153. trilogy/scripts/agent_info.py +306 -0
  154. trilogy/scripts/common.py +430 -0
  155. trilogy/scripts/dependency/Cargo.lock +617 -0
  156. trilogy/scripts/dependency/Cargo.toml +39 -0
  157. trilogy/scripts/dependency/README.md +131 -0
  158. trilogy/scripts/dependency/build.sh +25 -0
  159. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  160. trilogy/scripts/dependency/src/lib.rs +16 -0
  161. trilogy/scripts/dependency/src/main.rs +770 -0
  162. trilogy/scripts/dependency/src/parser.rs +435 -0
  163. trilogy/scripts/dependency/src/preql.pest +208 -0
  164. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  165. trilogy/scripts/dependency/src/resolver.rs +716 -0
  166. trilogy/scripts/dependency/tests/base.preql +3 -0
  167. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  168. trilogy/scripts/dependency/tests/customer.preql +6 -0
  169. trilogy/scripts/dependency/tests/main.preql +9 -0
  170. trilogy/scripts/dependency/tests/orders.preql +7 -0
  171. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  172. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  173. trilogy/scripts/dependency.py +323 -0
  174. trilogy/scripts/display.py +555 -0
  175. trilogy/scripts/environment.py +59 -0
  176. trilogy/scripts/fmt.py +32 -0
  177. trilogy/scripts/ingest.py +472 -0
  178. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  179. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  180. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  181. trilogy/scripts/ingest_helpers/typing.py +161 -0
  182. trilogy/scripts/init.py +105 -0
  183. trilogy/scripts/parallel_execution.py +748 -0
  184. trilogy/scripts/plan.py +189 -0
  185. trilogy/scripts/refresh.py +106 -0
  186. trilogy/scripts/run.py +79 -0
  187. trilogy/scripts/serve.py +202 -0
  188. trilogy/scripts/serve_helpers/__init__.py +41 -0
  189. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  190. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  191. trilogy/scripts/serve_helpers/models.py +38 -0
  192. trilogy/scripts/single_execution.py +131 -0
  193. trilogy/scripts/testing.py +129 -0
  194. trilogy/scripts/trilogy.py +75 -0
  195. trilogy/std/__init__.py +0 -0
  196. trilogy/std/color.preql +3 -0
  197. trilogy/std/date.preql +13 -0
  198. trilogy/std/display.preql +18 -0
  199. trilogy/std/geography.preql +22 -0
  200. trilogy/std/metric.preql +15 -0
  201. trilogy/std/money.preql +67 -0
  202. trilogy/std/net.preql +14 -0
  203. trilogy/std/ranking.preql +7 -0
  204. trilogy/std/report.preql +5 -0
  205. trilogy/std/semantic.preql +6 -0
  206. trilogy/utility.py +34 -0
@@ -0,0 +1,2863 @@
1
+ from dataclasses import dataclass
2
+ from datetime import date, datetime
3
+ from enum import Enum
4
+ from logging import getLogger
5
+ from os.path import dirname, join
6
+ from pathlib import Path
7
+ from re import IGNORECASE
8
+ from typing import Any, List, Optional, Tuple, Union
9
+
10
+ from lark import Lark, ParseTree, Token, Transformer, Tree, v_args
11
+ from lark.exceptions import (
12
+ UnexpectedCharacters,
13
+ UnexpectedEOF,
14
+ UnexpectedInput,
15
+ UnexpectedToken,
16
+ VisitError,
17
+ )
18
+ from lark.tree import Meta
19
+ from pydantic import ValidationError
20
+
21
+ from trilogy.constants import (
22
+ CONFIG,
23
+ DEFAULT_NAMESPACE,
24
+ NULL_VALUE,
25
+ MagicConstants,
26
+ Parsing,
27
+ )
28
+ from trilogy.core.enums import (
29
+ AddressType,
30
+ BooleanOperator,
31
+ ComparisonOperator,
32
+ ConceptSource,
33
+ CreateMode,
34
+ DatasourceState,
35
+ DatePart,
36
+ Derivation,
37
+ FunctionType,
38
+ Granularity,
39
+ IOType,
40
+ Modifier,
41
+ Ordering,
42
+ PersistMode,
43
+ PublishAction,
44
+ Purpose,
45
+ ShowCategory,
46
+ ValidationScope,
47
+ WindowOrder,
48
+ WindowType,
49
+ )
50
+ from trilogy.core.exceptions import (
51
+ InvalidSyntaxException,
52
+ MissingParameterException,
53
+ UndefinedConceptException,
54
+ )
55
+ from trilogy.core.functions import (
56
+ CurrentDate,
57
+ FunctionFactory,
58
+ )
59
+ from trilogy.core.internal import ALL_ROWS_CONCEPT, INTERNAL_NAMESPACE
60
+ from trilogy.core.models.author import (
61
+ AggregateWrapper,
62
+ AlignClause,
63
+ AlignItem,
64
+ ArgBinding,
65
+ CaseElse,
66
+ CaseWhen,
67
+ Comment,
68
+ Comparison,
69
+ Concept,
70
+ ConceptRef,
71
+ Conditional,
72
+ CustomFunctionFactory,
73
+ CustomType,
74
+ DeriveClause,
75
+ DeriveItem,
76
+ Expr,
77
+ FilterItem,
78
+ Function,
79
+ FunctionCallWrapper,
80
+ Grain,
81
+ HavingClause,
82
+ Metadata,
83
+ MultiSelectLineage,
84
+ OrderBy,
85
+ OrderItem,
86
+ Parenthetical,
87
+ RowsetItem,
88
+ SubselectComparison,
89
+ UndefinedConceptFull,
90
+ WhereClause,
91
+ Window,
92
+ WindowItem,
93
+ WindowItemOrder,
94
+ WindowItemOver,
95
+ )
96
+ from trilogy.core.models.core import (
97
+ ArrayType,
98
+ DataType,
99
+ DataTyped,
100
+ ListWrapper,
101
+ MapType,
102
+ MapWrapper,
103
+ NumericType,
104
+ StructComponent,
105
+ StructType,
106
+ TraitDataType,
107
+ TupleWrapper,
108
+ arg_to_datatype,
109
+ dict_to_map_wrapper,
110
+ is_compatible_datatype,
111
+ list_to_wrapper,
112
+ tuple_to_wrapper,
113
+ )
114
+ from trilogy.core.models.datasource import (
115
+ Address,
116
+ ColumnAssignment,
117
+ Datasource,
118
+ File,
119
+ Query,
120
+ RawColumnExpr,
121
+ )
122
+ from trilogy.core.models.environment import (
123
+ DictImportResolver,
124
+ Environment,
125
+ FileSystemImportResolver,
126
+ Import,
127
+ )
128
+ from trilogy.core.statements.author import (
129
+ ConceptDeclarationStatement,
130
+ ConceptDerivationStatement,
131
+ ConceptTransform,
132
+ CopyStatement,
133
+ CreateStatement,
134
+ FunctionDeclaration,
135
+ ImportStatement,
136
+ Limit,
137
+ MergeStatementV2,
138
+ MockStatement,
139
+ MultiSelectStatement,
140
+ PersistStatement,
141
+ PublishStatement,
142
+ RawSQLStatement,
143
+ RowsetDerivationStatement,
144
+ SelectItem,
145
+ SelectStatement,
146
+ ShowStatement,
147
+ TypeDeclaration,
148
+ ValidateStatement,
149
+ )
150
+ from trilogy.parsing.common import (
151
+ align_item_to_concept,
152
+ arbitrary_to_concept,
153
+ constant_to_concept,
154
+ derive_item_to_concept,
155
+ process_function_args,
156
+ rowset_to_concepts,
157
+ )
158
+ from trilogy.parsing.exceptions import NameShadowError, ParseError
159
+
160
+ perf_logger = getLogger("trilogy.parse.performance")
161
+
162
+
163
+ class ParsePass(Enum):
164
+ INITIAL = 1
165
+ VALIDATION = 2
166
+
167
+
168
+ CONSTANT_TYPES = (int, float, str, bool, ListWrapper, TupleWrapper, MapWrapper)
169
+
170
+ SELF_LABEL = "root"
171
+
172
+ MAX_PARSE_DEPTH = 10
173
+
174
+ SUPPORTED_INCREMENTAL_TYPES: set[DataType] = set([DataType.DATE, DataType.TIMESTAMP])
175
+
176
+ STDLIB_ROOT = Path(__file__).parent.parent
177
+
178
+
179
+ @dataclass
180
+ class WholeGrainWrapper:
181
+ where: WhereClause
182
+
183
+
184
+ @dataclass
185
+ class FunctionBindingType:
186
+ type: DataType | TraitDataType | None = None
187
+
188
+
189
+ @dataclass
190
+ class DropOn:
191
+ functions: List[FunctionType]
192
+
193
+
194
+ @dataclass
195
+ class AddOn:
196
+ functions: List[FunctionType]
197
+
198
+
199
+ @dataclass
200
+ class DatasourcePartitionClause:
201
+ columns: List[ConceptRef]
202
+
203
+
204
+ @dataclass
205
+ class DatasourceIncrementalClause:
206
+ columns: List[ConceptRef]
207
+
208
+
209
+ with open(join(dirname(__file__), "trilogy.lark"), "r") as f:
210
+ PARSER = Lark(
211
+ f.read(),
212
+ start="start",
213
+ propagate_positions=True,
214
+ g_regex_flags=IGNORECASE,
215
+ parser="lalr",
216
+ cache=True,
217
+ )
218
+
219
+
220
+ def parse_concept_reference(
221
+ name: str, environment: Environment, purpose: Optional[Purpose] = None
222
+ ) -> Tuple[str, str, str, str | None]:
223
+ parent = None
224
+ if "." in name:
225
+ if purpose == Purpose.PROPERTY:
226
+ parent, name = name.rsplit(".", 1)
227
+ namespace = environment.concepts[parent].namespace or DEFAULT_NAMESPACE
228
+ lookup = f"{namespace}.{name}"
229
+ else:
230
+ namespace, name = name.rsplit(".", 1)
231
+ lookup = f"{namespace}.{name}"
232
+ else:
233
+ namespace = environment.namespace or DEFAULT_NAMESPACE
234
+ lookup = name
235
+ return lookup, namespace, name, parent
236
+
237
+
238
+ def expr_to_boolean(
239
+ root,
240
+ function_factory: FunctionFactory,
241
+ ) -> Union[Comparison, SubselectComparison, Conditional]:
242
+ if not isinstance(root, (Comparison, SubselectComparison, Conditional)):
243
+ if arg_to_datatype(root) == DataType.BOOL:
244
+ root = Comparison(left=root, right=True, operator=ComparisonOperator.EQ)
245
+ elif arg_to_datatype(root) == DataType.INTEGER:
246
+ root = Comparison(
247
+ left=function_factory.create_function(
248
+ [root],
249
+ FunctionType.BOOL,
250
+ ),
251
+ right=True,
252
+ operator=ComparisonOperator.EQ,
253
+ )
254
+ else:
255
+ root = Comparison(
256
+ left=root, right=NULL_VALUE, operator=ComparisonOperator.IS_NOT
257
+ )
258
+
259
+ return root
260
+
261
+
262
+ def unwrap_transformation(
263
+ input: Expr,
264
+ environment: Environment,
265
+ ) -> (
266
+ Function
267
+ | FilterItem
268
+ | WindowItem
269
+ | AggregateWrapper
270
+ | FunctionCallWrapper
271
+ | Parenthetical
272
+ ):
273
+ if isinstance(input, Function):
274
+ return input
275
+ elif isinstance(input, AggregateWrapper):
276
+ return input
277
+ elif isinstance(input, ConceptRef):
278
+ concept = environment.concepts[input.address]
279
+ return Function(
280
+ operator=FunctionType.ALIAS,
281
+ output_datatype=concept.datatype,
282
+ output_purpose=concept.purpose,
283
+ arguments=[input],
284
+ )
285
+ elif isinstance(input, FilterItem):
286
+ return input
287
+ elif isinstance(input, WindowItem):
288
+ return input
289
+ elif isinstance(input, FunctionCallWrapper):
290
+ return input
291
+ elif isinstance(input, Parenthetical):
292
+ return input
293
+ else:
294
+ return Function.model_construct(
295
+ operator=FunctionType.CONSTANT,
296
+ output_datatype=arg_to_datatype(input),
297
+ output_purpose=Purpose.CONSTANT,
298
+ arguments=[input],
299
+ )
300
+
301
+
302
+ def rehydrate_lineage(
303
+ lineage: Any, environment: Environment, function_factory: FunctionFactory
304
+ ) -> Any:
305
+ """Fix datatype propagation. This is a hack to fix the fact that we don't know the datatypes of functions until we've parsed all concepts"""
306
+ if isinstance(lineage, Function):
307
+ rehydrated = [
308
+ rehydrate_lineage(x, environment, function_factory)
309
+ for x in lineage.arguments
310
+ ]
311
+ return function_factory.create_function(
312
+ rehydrated,
313
+ operator=lineage.operator,
314
+ )
315
+ elif isinstance(lineage, Parenthetical):
316
+ lineage.content = rehydrate_lineage(
317
+ lineage.content, environment, function_factory
318
+ )
319
+ return lineage
320
+ elif isinstance(lineage, WindowItem):
321
+ # this is temporarily guaranteed until we do some upstream work
322
+ assert isinstance(lineage.content, ConceptRef)
323
+ lineage.content.datatype = environment.concepts[
324
+ lineage.content.address
325
+ ].datatype
326
+ return lineage
327
+ elif isinstance(lineage, AggregateWrapper):
328
+ lineage.function = rehydrate_lineage(
329
+ lineage.function, environment, function_factory
330
+ )
331
+ return lineage
332
+ elif isinstance(lineage, RowsetItem):
333
+ lineage.content.datatype = environment.concepts[
334
+ lineage.content.address
335
+ ].datatype
336
+ return lineage
337
+ else:
338
+ return lineage
339
+
340
+
341
+ def rehydrate_concept_lineage(
342
+ concept: Concept, environment: Environment, function_factory: FunctionFactory
343
+ ) -> Concept:
344
+ concept.lineage = rehydrate_lineage(concept.lineage, environment, function_factory)
345
+ if isinstance(concept.lineage, DataTyped):
346
+ concept.datatype = concept.lineage.output_datatype
347
+ return concept
348
+
349
+
350
+ class ParseToObjects(Transformer):
351
+ def __init__(
352
+ self,
353
+ environment: Environment,
354
+ parse_address: str | None = None,
355
+ token_address: Path | str | None = None,
356
+ parsed: dict[str, "ParseToObjects"] | None = None,
357
+ tokens: dict[Path | str, ParseTree] | None = None,
358
+ text_lookup: dict[Path | str, str] | None = None,
359
+ environment_lookup: dict[str, Environment] | None = None,
360
+ import_keys: list[str] | None = None,
361
+ parse_config: Parsing | None = None,
362
+ ):
363
+ Transformer.__init__(self, True)
364
+ self.environment: Environment = environment
365
+ self.parse_address: str = parse_address or SELF_LABEL
366
+ self.token_address: Path | str = token_address or SELF_LABEL
367
+ self.parsed: dict[str, ParseToObjects] = parsed if parsed is not None else {}
368
+ self.tokens: dict[Path | str, ParseTree] = tokens if tokens is not None else {}
369
+ self.environments: dict[str, Environment] = environment_lookup or {}
370
+ self.text_lookup: dict[Path | str, str] = (
371
+ text_lookup if text_lookup is not None else {}
372
+ )
373
+ # we do a second pass to pick up circular dependencies
374
+ # after initial parsing
375
+ self.parse_pass = ParsePass.INITIAL
376
+ self.function_factory = FunctionFactory(self.environment)
377
+ self.import_keys: list[str] = import_keys or ["root"]
378
+ self.parse_config: Parsing = parse_config or CONFIG.parsing
379
+
380
+ def set_text(self, text: str):
381
+ self.text_lookup[self.token_address] = text
382
+
383
+ def transform(self, tree: Tree):
384
+ results = super().transform(tree)
385
+ self.tokens[self.token_address] = tree
386
+ return results
387
+
388
+ def prepare_parse(self):
389
+ self.parse_pass = ParsePass.INITIAL
390
+ self.environment.concepts.fail_on_missing = False
391
+ for _, v in self.parsed.items():
392
+ v.prepare_parse()
393
+
394
+ def run_second_parse_pass(self, force: bool = False):
395
+ if self.token_address not in self.tokens:
396
+ return []
397
+ self.parse_pass = ParsePass.VALIDATION
398
+ for _, v in list(self.parsed.items()):
399
+ if v.parse_pass == ParsePass.VALIDATION:
400
+ continue
401
+ v.run_second_parse_pass()
402
+ reparsed = self.transform(self.tokens[self.token_address])
403
+ self.environment.concepts.undefined = {}
404
+ passed = False
405
+ passes = 0
406
+ # output datatypes for functions may have been wrong
407
+ # as they were derived from not fully understood upstream types
408
+ # so loop through to recreate function lineage until all datatypes are known
409
+
410
+ while not passed:
411
+ new_passed = True
412
+ for x, y in self.environment.concepts.items():
413
+ if y.datatype == DataType.UNKNOWN and y.lineage:
414
+ self.environment.concepts[x] = rehydrate_concept_lineage(
415
+ y, self.environment, self.function_factory
416
+ )
417
+ new_passed = False
418
+ passes += 1
419
+ if passes > MAX_PARSE_DEPTH:
420
+ break
421
+ passed = new_passed
422
+
423
+ return reparsed
424
+
425
+ def start(self, args):
426
+ return args
427
+
428
+ def LINE_SEPARATOR(self, args):
429
+ return MagicConstants.LINE_SEPARATOR
430
+
431
+ def block(self, args):
432
+ output = args[0]
433
+ if isinstance(output, ConceptDeclarationStatement):
434
+ if len(args) > 1 and args[1] != MagicConstants.LINE_SEPARATOR:
435
+ comments = [x for x in args[1:] if isinstance(x, Comment)]
436
+ merged = "\n".join([x.text.split("#")[1].rstrip() for x in comments])
437
+ output.concept.metadata.description = merged
438
+ # this is a bad plan for now;
439
+ # because a comment after an import statement is very common
440
+ # and it's not intuitive that it modifies the import description
441
+ # if isinstance(output, ImportStatement):
442
+ # if len(args) > 1 and isinstance(args[1], Comment):
443
+ # comment = args[1].text.split("#")[1].strip()
444
+ # namespace = output.alias
445
+ # for _, v in self.environment.concepts.items():
446
+ # if v.namespace == namespace:
447
+ # if v.metadata.description:
448
+ # v.metadata.description = (
449
+ # f"{comment}: {v.metadata.description}"
450
+ # )
451
+ # else:
452
+ # v.metadata.description = comment
453
+
454
+ return args[0]
455
+
456
+ def metadata(self, args):
457
+ pairs = {key: val for key, val in zip(args[::2], args[1::2])}
458
+ return Metadata(**pairs)
459
+
460
+ def IDENTIFIER(self, args) -> str:
461
+ return args.value
462
+
463
+ def ORDER_IDENTIFIER(self, args) -> ConceptRef:
464
+ return self.environment.concepts[args.value.strip()].reference
465
+
466
+ def WILDCARD_IDENTIFIER(self, args) -> str:
467
+ return args.value
468
+
469
+ def QUOTED_IDENTIFIER(self, args) -> str:
470
+ return args.value[1:-1]
471
+
472
+ @v_args(meta=True)
473
+ def concept_lit(self, meta: Meta, args) -> ConceptRef:
474
+ address = args[0]
475
+ if "." not in address and self.environment.namespace == DEFAULT_NAMESPACE:
476
+ address = f"{DEFAULT_NAMESPACE}.{address}"
477
+ mapping = self.environment.concepts[address]
478
+ datatype = mapping.output_datatype
479
+ return ConceptRef(
480
+ # this is load-bearing to handle pseudonyms
481
+ address=mapping.address,
482
+ metadata=Metadata(line_number=meta.line),
483
+ datatype=datatype,
484
+ )
485
+
486
+ def ADDRESS(self, args) -> Address:
487
+ return Address(location=args.value, quoted=False)
488
+
489
+ def QUOTED_ADDRESS(self, args) -> Address:
490
+ return Address(location=args.value[1:-1], quoted=True)
491
+
492
+ def STRING_CHARS(self, args) -> str:
493
+ return args.value
494
+
495
+ def SINGLE_STRING_CHARS(self, args) -> str:
496
+ return args.value
497
+
498
+ def DOUBLE_STRING_CHARS(self, args) -> str:
499
+ return args.value
500
+
501
+ def MINUS(self, args) -> str:
502
+ return "-"
503
+
504
+ @v_args(meta=True)
505
+ def struct_component(self, meta: Meta, args) -> StructComponent:
506
+ modifiers = []
507
+ for arg in args:
508
+ if isinstance(arg, Modifier):
509
+ modifiers.append(arg)
510
+ return StructComponent(name=args[0], type=args[1], modifiers=modifiers)
511
+
512
+ @v_args(meta=True)
513
+ def struct_type(self, meta: Meta, args) -> StructType:
514
+ final: list[
515
+ DataType
516
+ | MapType
517
+ | ArrayType
518
+ | NumericType
519
+ | StructType
520
+ | StructComponent
521
+ | Concept
522
+ ] = []
523
+ for arg in args:
524
+ if isinstance(arg, StructComponent):
525
+ final.append(arg)
526
+ else:
527
+ new = self.environment.concepts.__getitem__( # type: ignore
528
+ key=arg, line_no=meta.line
529
+ )
530
+ final.append(new)
531
+
532
+ return StructType(
533
+ fields=final,
534
+ fields_map={
535
+ x.name: x for x in final if isinstance(x, (Concept, StructComponent))
536
+ },
537
+ )
538
+
539
+ def list_type(self, args) -> ArrayType:
540
+ content = args[0]
541
+ if isinstance(content, str):
542
+ content = self.environment.concepts[content]
543
+ return ArrayType(type=content)
544
+
545
+ def numeric_type(self, args) -> NumericType:
546
+ return NumericType(precision=args[0], scale=args[1])
547
+
548
+ def map_type(self, args) -> MapType:
549
+ key = args[0]
550
+ value = args[1]
551
+ if isinstance(key, str):
552
+ key = self.environment.concepts[key]
553
+ elif isinstance(value, str):
554
+ value = self.environment.concepts[value]
555
+ return MapType(key_type=key, value_type=value)
556
+
557
+ @v_args(meta=True)
558
+ def data_type(
559
+ self, meta: Meta, args
560
+ ) -> DataType | TraitDataType | ArrayType | StructType | MapType | NumericType:
561
+ resolved = args[0]
562
+ traits = args[2:]
563
+ base: DataType | TraitDataType | ArrayType | StructType | MapType | NumericType
564
+ if isinstance(resolved, StructType):
565
+ base = resolved
566
+ elif isinstance(resolved, ArrayType):
567
+ base = resolved
568
+ elif isinstance(resolved, NumericType):
569
+ base = resolved
570
+ elif isinstance(resolved, MapType):
571
+ base = resolved
572
+ else:
573
+ base = DataType(args[0].lower())
574
+ if traits:
575
+ for trait in traits:
576
+ if trait not in self.environment.data_types:
577
+ raise ParseError(
578
+ f"Invalid trait (type) {trait} for {base}, line {meta.line}."
579
+ )
580
+ matched = self.environment.data_types[trait]
581
+ if not is_compatible_datatype(matched.type, base):
582
+ raise ParseError(
583
+ f"Invalid trait (type) {trait} for {base}, line {meta.line}. Trait expects type {matched.type}, has {base}"
584
+ )
585
+ return TraitDataType(type=base, traits=traits)
586
+
587
+ return base
588
+
589
+ def array_comparison(self, args) -> ComparisonOperator:
590
+ return ComparisonOperator([x.value.lower() for x in args])
591
+
592
+ def COMPARISON_OPERATOR(self, args) -> ComparisonOperator:
593
+ return ComparisonOperator(args.strip())
594
+
595
+ def LOGICAL_OPERATOR(self, args) -> BooleanOperator:
596
+ return BooleanOperator(args.lower())
597
+
598
+ def concept_assignment(self, args):
599
+ return args
600
+
601
+ @v_args(meta=True)
602
+ def column_assignment(self, meta: Meta, args):
603
+ modifiers = []
604
+ if len(args) == 2:
605
+ alias = args[0]
606
+ concept_list = args[1]
607
+ else:
608
+ alias = args[0][-1]
609
+ concept_list = args[0]
610
+ # recursively collect modifiers
611
+ if len(concept_list) > 1:
612
+ modifiers += concept_list[:-1]
613
+ concept = concept_list[-1]
614
+ resolved = self.environment.concepts.__getitem__( # type: ignore
615
+ key=concept, line_no=meta.line, file=self.token_address
616
+ )
617
+ return ColumnAssignment(
618
+ alias=alias, modifiers=modifiers, concept=resolved.reference
619
+ )
620
+
621
+ def _TERMINATOR(self, args):
622
+ return None
623
+
624
+ def _static_functions(self, args):
625
+ return args[0]
626
+
627
+ def MODIFIER(self, args) -> Modifier:
628
+ return Modifier(args.value)
629
+
630
+ def SHORTHAND_MODIFIER(self, args) -> Modifier:
631
+ return Modifier(args.value)
632
+
633
+ def PURPOSE(self, args) -> Purpose:
634
+ return Purpose(args.value)
635
+
636
+ def AUTO(self, args) -> Purpose:
637
+ return Purpose.AUTO
638
+
639
+ def CONST(self, args) -> Purpose:
640
+ return Purpose.CONSTANT
641
+
642
+ def CONSTANT(self, args) -> Purpose:
643
+ return Purpose.CONSTANT
644
+
645
+ def PROPERTY(self, args):
646
+ return Purpose.PROPERTY
647
+
648
+ def HASH_TYPE(self, args):
649
+ return args.value
650
+
651
+ @v_args(meta=True)
652
+ def prop_ident(self, meta: Meta, args) -> Tuple[List[Concept], str]:
653
+ return [self.environment.concepts[grain] for grain in args[:-1]], args[-1]
654
+
655
+ @v_args(meta=True)
656
+ def concept_property_declaration(self, meta: Meta, args) -> Concept:
657
+ unique = False
658
+ if not args[0] == Purpose.PROPERTY:
659
+ unique = True
660
+ args = args[1:]
661
+ metadata = Metadata()
662
+ modifiers = []
663
+ for arg in args:
664
+ if isinstance(arg, Metadata):
665
+ metadata = arg
666
+ if isinstance(arg, Modifier):
667
+ modifiers.append(arg)
668
+
669
+ declaration = args[1]
670
+ if isinstance(declaration, (tuple)):
671
+ parents, name = declaration
672
+ if "." in name:
673
+ namespace, name = name.split(".", 1)
674
+ else:
675
+ namespace = self.environment.namespace or DEFAULT_NAMESPACE
676
+ else:
677
+ if "." not in declaration:
678
+ raise ParseError(
679
+ f"Property declaration {args[1]} must be fully qualified with a parent key"
680
+ )
681
+ grain, name = declaration.rsplit(".", 1)
682
+ parent = self.environment.concepts[grain]
683
+ parents = [parent]
684
+ namespace = parent.namespace
685
+ concept = Concept(
686
+ name=name,
687
+ datatype=args[2],
688
+ purpose=Purpose.PROPERTY if not unique else Purpose.UNIQUE_PROPERTY,
689
+ metadata=metadata,
690
+ grain=Grain(components={x.address for x in parents}),
691
+ namespace=namespace,
692
+ keys=set([x.address for x in parents]),
693
+ modifiers=modifiers,
694
+ )
695
+
696
+ self.environment.add_concept(concept, meta)
697
+ return concept
698
+
699
+ @v_args(meta=True)
700
+ def concept_declaration(self, meta: Meta, args) -> ConceptDeclarationStatement:
701
+ metadata = Metadata()
702
+ modifiers = []
703
+ purpose = args[0]
704
+ datatype = args[2]
705
+ for arg in args:
706
+ if isinstance(arg, Metadata):
707
+ metadata = arg
708
+ if isinstance(arg, Modifier):
709
+ modifiers.append(arg)
710
+ name = args[1]
711
+ _, namespace, name, _ = parse_concept_reference(name, self.environment)
712
+ if purpose == Purpose.PARAMETER:
713
+ value = self.environment.parameters.get(name, None)
714
+ if not value:
715
+ raise MissingParameterException(
716
+ f'This script requires parameter "{name}" to be set in environment.'
717
+ )
718
+ if datatype == DataType.INTEGER:
719
+ value = int(value)
720
+ elif datatype == DataType.FLOAT:
721
+ value = float(value)
722
+ elif datatype == DataType.BOOL:
723
+ value = bool(value)
724
+ elif datatype == DataType.STRING:
725
+ value = str(value)
726
+ elif datatype == DataType.DATE:
727
+ if isinstance(value, date):
728
+ value = value
729
+ else:
730
+ value = date.fromisoformat(value)
731
+ elif datatype == DataType.DATETIME:
732
+ if isinstance(value, datetime):
733
+ value = value
734
+ else:
735
+ value = datetime.fromisoformat(value)
736
+ else:
737
+ raise ParseError(
738
+ f"Unsupported datatype {datatype} for parameter {name}."
739
+ )
740
+ rval = self.constant_derivation(
741
+ meta, [Purpose.CONSTANT, name, value, metadata]
742
+ )
743
+ return rval
744
+
745
+ concept = Concept(
746
+ name=name,
747
+ datatype=datatype,
748
+ purpose=purpose,
749
+ metadata=metadata,
750
+ namespace=namespace,
751
+ modifiers=modifiers,
752
+ derivation=Derivation.ROOT,
753
+ granularity=Granularity.MULTI_ROW,
754
+ )
755
+ if concept.metadata:
756
+ concept.metadata.line_number = meta.line
757
+ self.environment.add_concept(concept, meta=meta)
758
+ return ConceptDeclarationStatement(concept=concept)
759
+
760
+ @v_args(meta=True)
761
+ def concept_derivation(self, meta: Meta, args) -> ConceptDerivationStatement:
762
+
763
+ if len(args) > 3:
764
+ metadata = args[3]
765
+ else:
766
+ metadata = None
767
+ purpose = args[0]
768
+ raw_name = args[1]
769
+ # abc.def.property pattern
770
+ if isinstance(raw_name, str):
771
+ lookup, namespace, name, parent_concept = parse_concept_reference(
772
+ raw_name, self.environment, purpose
773
+ )
774
+ # <abc.def,zef.gf>.property pattern
775
+ else:
776
+ keys, name = raw_name
777
+ keys = [x.address for x in keys]
778
+ namespaces = set([x.rsplit(".", 1)[0] for x in keys])
779
+ if not len(namespaces) == 1:
780
+ namespace = self.environment.namespace or DEFAULT_NAMESPACE
781
+ else:
782
+ namespace = namespaces.pop()
783
+ source_value = args[2]
784
+ # we need to strip off every parenthetical to see what is being assigned.
785
+ while isinstance(source_value, Parenthetical):
786
+ source_value = source_value.content
787
+
788
+ if isinstance(
789
+ source_value,
790
+ (
791
+ FilterItem,
792
+ WindowItem,
793
+ AggregateWrapper,
794
+ Function,
795
+ FunctionCallWrapper,
796
+ Comparison,
797
+ ),
798
+ ):
799
+ concept = arbitrary_to_concept(
800
+ source_value,
801
+ name=name,
802
+ namespace=namespace,
803
+ environment=self.environment,
804
+ metadata=metadata,
805
+ )
806
+
807
+ # let constant purposes exist to support round-tripping
808
+ # as a build concept may end up with a constant based on constant inlining happening recursively
809
+ if purpose == Purpose.KEY and concept.purpose != Purpose.KEY:
810
+ concept.purpose = Purpose.KEY
811
+ elif (
812
+ purpose
813
+ and purpose != Purpose.AUTO
814
+ and concept.purpose != purpose
815
+ and purpose != Purpose.CONSTANT
816
+ ):
817
+ raise SyntaxError(
818
+ f'Concept {name} purpose {concept.purpose} does not match declared purpose {purpose}. Suggest defaulting to "auto"'
819
+ )
820
+
821
+ if concept.metadata:
822
+ concept.metadata.line_number = meta.line
823
+ self.environment.add_concept(concept, meta=meta)
824
+ return ConceptDerivationStatement(concept=concept)
825
+
826
+ elif isinstance(source_value, CONSTANT_TYPES):
827
+ concept = constant_to_concept(
828
+ source_value,
829
+ name=name,
830
+ namespace=namespace,
831
+ metadata=metadata,
832
+ )
833
+ if concept.metadata:
834
+ concept.metadata.line_number = meta.line
835
+ self.environment.add_concept(concept, meta=meta)
836
+ return ConceptDerivationStatement(concept=concept)
837
+
838
+ raise SyntaxError(
839
+ f"Received invalid type {type(args[2])} {args[2]} as input to concept derivation: `{self.text_lookup[self.token_address][meta.start_pos:meta.end_pos]}`"
840
+ )
841
+
842
+ @v_args(meta=True)
843
+ def rowset_derivation_statement(
844
+ self, meta: Meta, args
845
+ ) -> RowsetDerivationStatement:
846
+ name = args[0]
847
+ select: SelectStatement | MultiSelectStatement = args[1]
848
+ output = RowsetDerivationStatement(
849
+ name=name,
850
+ select=select,
851
+ namespace=self.environment.namespace or DEFAULT_NAMESPACE,
852
+ )
853
+
854
+ for new_concept in rowset_to_concepts(output, self.environment):
855
+ if new_concept.metadata:
856
+ new_concept.metadata.line_number = meta.line
857
+ self.environment.add_concept(new_concept, force=True)
858
+
859
+ self.environment.add_rowset(
860
+ output.name, output.select.as_lineage(self.environment)
861
+ )
862
+ return output
863
+
864
+ @v_args(meta=True)
865
+ def constant_derivation(
866
+ self, meta: Meta, args: tuple[Purpose, str, Any, Optional[Metadata]]
867
+ ) -> Concept:
868
+
869
+ if len(args) > 3:
870
+ metadata = args[3]
871
+ else:
872
+ metadata = None
873
+ name = args[1]
874
+ constant: Union[str, float, int, bool, MapWrapper, ListWrapper] = args[2]
875
+ lookup, namespace, name, parent = parse_concept_reference(
876
+ name, self.environment
877
+ )
878
+ concept = Concept(
879
+ name=name,
880
+ datatype=arg_to_datatype(constant),
881
+ purpose=Purpose.CONSTANT,
882
+ metadata=Metadata(line_number=meta.line) if not metadata else metadata,
883
+ lineage=Function(
884
+ operator=FunctionType.CONSTANT,
885
+ output_datatype=arg_to_datatype(constant),
886
+ output_purpose=Purpose.CONSTANT,
887
+ arguments=[constant],
888
+ ),
889
+ grain=Grain(components=set()),
890
+ namespace=namespace,
891
+ granularity=Granularity.SINGLE_ROW,
892
+ )
893
+ if concept.metadata:
894
+ concept.metadata.line_number = meta.line
895
+ self.environment.add_concept(concept, meta)
896
+ return concept
897
+
898
+ @v_args(meta=True)
899
+ def concept(self, meta: Meta, args) -> ConceptDeclarationStatement:
900
+ if isinstance(args[0], Concept):
901
+ concept: Concept = args[0]
902
+ else:
903
+ concept = args[0].concept
904
+ if concept.metadata:
905
+ concept.metadata.line_number = meta.line
906
+ return ConceptDeclarationStatement(concept=concept)
907
+
908
+ def column_assignment_list(self, args):
909
+ return args
910
+
911
+ def column_list(self, args) -> List:
912
+ return args
913
+
914
+ def grain_clause(self, args) -> Grain:
915
+ return Grain(
916
+ components=set([self.environment.concepts[a].address for a in args[0]])
917
+ )
918
+
919
+ @v_args(meta=True)
920
+ def aggregate_by(self, meta: Meta, args):
921
+ base = args[0]
922
+ b_concept = base.value.split(" ")[-1]
923
+ args = [self.environment.concepts[a] for a in [b_concept] + args[1:]]
924
+ return self.function_factory.create_function(args, FunctionType.GROUP, meta)
925
+
926
+ def whole_grain_clause(self, args) -> WholeGrainWrapper:
927
+ return WholeGrainWrapper(where=args[0])
928
+
929
+ def MULTILINE_STRING(self, args) -> str:
930
+ return args[3:-3]
931
+
932
+ def raw_column_assignment(self, args):
933
+ return RawColumnExpr(text=args[1])
934
+
935
+ def DATASOURCE_STATUS(self, args) -> DatasourceState:
936
+ return DatasourceState(args.value.lower())
937
+
938
+ @v_args(meta=True)
939
+ def datasource_status_clause(self, meta: Meta, args):
940
+ return args[1]
941
+
942
+ @v_args(meta=True)
943
+ def datasource_partition_clause(self, meta: Meta, args):
944
+ return DatasourcePartitionClause([ConceptRef(address=arg) for arg in args[0]])
945
+
946
+ @v_args(meta=True)
947
+ def datasource_increment_clause(self, meta: Meta, args):
948
+ return DatasourceIncrementalClause([ConceptRef(address=arg) for arg in args[0]])
949
+
950
+ @v_args(meta=True)
951
+ def datasource(self, meta: Meta, args):
952
+ is_root = False
953
+ if isinstance(args[0], Token) and args[0].lower() == "root":
954
+ is_root = True
955
+ args = args[1:]
956
+ name = args[0]
957
+ columns: List[ColumnAssignment] = args[1]
958
+ grain: Optional[Grain] = None
959
+ address: Optional[Address] = None
960
+ where: Optional[WhereClause] = None
961
+ non_partial_for: Optional[WhereClause] = None
962
+ incremental_by: List[ConceptRef] = []
963
+ partition_by: List[ConceptRef] = []
964
+ datasource_status: DatasourceState = DatasourceState.PUBLISHED
965
+ for val in args[1:]:
966
+ if isinstance(val, Address):
967
+ address = val
968
+ elif isinstance(val, Grain):
969
+ grain = val
970
+ elif isinstance(val, WholeGrainWrapper):
971
+ non_partial_for = val.where
972
+ elif isinstance(val, Query):
973
+ address = Address(location=val.text, type=AddressType.QUERY)
974
+ elif isinstance(val, File):
975
+ address = Address(location=val.path, type=val.type)
976
+ elif isinstance(val, WhereClause):
977
+ where = val
978
+ elif isinstance(val, DatasourceState):
979
+ datasource_status = val
980
+ elif isinstance(val, DatasourceIncrementalClause):
981
+ incremental_by = val.columns
982
+ elif isinstance(val, DatasourcePartitionClause):
983
+ partition_by = val.columns
984
+ if not address:
985
+ raise ValueError(
986
+ "Malformed datasource, missing address or query declaration"
987
+ )
988
+
989
+ datasource = Datasource(
990
+ name=name,
991
+ columns=columns,
992
+ # grain will be set by default from args
993
+ # TODO: move to factory
994
+ grain=grain, # type: ignore
995
+ address=address,
996
+ namespace=self.environment.namespace,
997
+ where=where,
998
+ non_partial_for=non_partial_for,
999
+ status=datasource_status,
1000
+ incremental_by=incremental_by,
1001
+ partition_by=partition_by,
1002
+ is_root=is_root,
1003
+ )
1004
+ if datasource.where:
1005
+ for x in datasource.where.concept_arguments:
1006
+ if x.address not in datasource.output_concepts:
1007
+ raise ValueError(
1008
+ f"Datasource {name} where condition depends on concept {x.address} that does not exist on the datasource, line {meta.line}."
1009
+ )
1010
+ if self.parse_pass == ParsePass.VALIDATION:
1011
+ self.environment.add_datasource(datasource, meta=meta)
1012
+ # if we have any foreign keys on the datasource, we can
1013
+ # at this point optimize them to properties if they do not have other usage.
1014
+ for column in columns:
1015
+ # skip partial for now
1016
+ if not grain:
1017
+ continue
1018
+ if column.concept.address in grain.components:
1019
+ continue
1020
+ target_c = self.environment.concepts[column.concept.address]
1021
+ if target_c.purpose != Purpose.KEY:
1022
+ continue
1023
+
1024
+ key_inputs = grain.components
1025
+ eligible = True
1026
+ for key in key_inputs:
1027
+ # never overwrite a key with a dependency on a property
1028
+ # for example - binding a datasource with a grain of <x>.fun should
1029
+ # never override the grain of x to <fun>
1030
+ if column.concept.address in (
1031
+ self.environment.concepts[key].keys or set()
1032
+ ):
1033
+ eligible = False
1034
+ if not eligible:
1035
+ continue
1036
+ keys = [self.environment.concepts[grain] for grain in key_inputs]
1037
+ # target_c.purpose = Purpose.PROPERTY
1038
+ target_c.keys = set([x.address for x in keys])
1039
+ # target_c.grain = Grain(components={x.address for x in keys})
1040
+
1041
+ return datasource
1042
+
1043
+ @v_args(meta=True)
1044
+ def comment(self, meta: Meta, args):
1045
+ assert len(args) == 1
1046
+ return Comment(text=args[0].value)
1047
+
1048
+ def PARSE_COMMENT(self, args):
1049
+ return Comment(text=args.value.rstrip())
1050
+
1051
+ @v_args(meta=True)
1052
+ def select_transform(self, meta: Meta, args) -> ConceptTransform:
1053
+ output: str = args[1]
1054
+ transformation = unwrap_transformation(args[0], self.environment)
1055
+ lookup, namespace, output, parent = parse_concept_reference(
1056
+ output, self.environment
1057
+ )
1058
+
1059
+ metadata = Metadata(line_number=meta.line, concept_source=ConceptSource.SELECT)
1060
+ concept = arbitrary_to_concept(
1061
+ transformation,
1062
+ environment=self.environment,
1063
+ namespace=namespace,
1064
+ name=output,
1065
+ metadata=metadata,
1066
+ )
1067
+ return ConceptTransform(function=transformation, output=concept)
1068
+
1069
+ @v_args(meta=True)
1070
+ def concept_nullable_modifier(self, meta: Meta, args) -> Modifier:
1071
+ return Modifier.NULLABLE
1072
+
1073
+ @v_args(meta=True)
1074
+ def select_hide_modifier(self, meta: Meta, args) -> Modifier:
1075
+ return Modifier.HIDDEN
1076
+
1077
+ @v_args(meta=True)
1078
+ def select_partial_modifier(self, meta: Meta, args) -> Modifier:
1079
+ return Modifier.PARTIAL
1080
+
1081
+ @v_args(meta=True)
1082
+ def select_item(self, meta: Meta, args) -> Optional[SelectItem]:
1083
+ modifiers = [arg for arg in args if isinstance(arg, Modifier)]
1084
+ args = [arg for arg in args if not isinstance(arg, (Modifier, Comment))]
1085
+
1086
+ if not args:
1087
+ return None
1088
+ if len(args) != 1:
1089
+ raise ParseError(
1090
+ "Malformed select statement"
1091
+ f" {args} {self.text_lookup[self.parse_address][meta.start_pos:meta.end_pos]}"
1092
+ )
1093
+ content = args[0]
1094
+ if isinstance(content, ConceptTransform):
1095
+ return SelectItem(content=content, modifiers=modifiers)
1096
+ return SelectItem(
1097
+ content=content,
1098
+ modifiers=modifiers,
1099
+ )
1100
+
1101
+ def select_list(self, args):
1102
+ return [arg for arg in args if arg]
1103
+
1104
+ def limit(self, args):
1105
+ return Limit(count=int(args[0].value))
1106
+
1107
+ def ordering(self, args: list[str]):
1108
+ base = args[0].lower()
1109
+ if len(args) > 1:
1110
+ null_sort = args[-1]
1111
+ return Ordering(" ".join([base, "nulls", null_sort.lower()]))
1112
+ return Ordering(base)
1113
+
1114
+ def order_list(self, args) -> List[OrderItem]:
1115
+ return [
1116
+ OrderItem(
1117
+ expr=x,
1118
+ order=y,
1119
+ )
1120
+ for x, y in zip(args[::2], args[1::2])
1121
+ ]
1122
+
1123
+ def order_by(self, args):
1124
+ return OrderBy(items=args[0])
1125
+
1126
+ def over_component(self, args):
1127
+ return ConceptRef(address=args[0].value.lstrip(",").strip())
1128
+
1129
+ def over_list(self, args):
1130
+ return [x for x in args]
1131
+
1132
+ def PUBLISH_ACTION(self, args) -> PublishAction:
1133
+ action = args.value.lower()
1134
+ if action == "publish":
1135
+ return PublishAction.PUBLISH
1136
+ elif action == "unpublish":
1137
+ return PublishAction.UNPUBLISH
1138
+ else:
1139
+ raise SyntaxError(f"Unknown publish action: {action}")
1140
+
1141
+ @v_args(meta=True)
1142
+ def publish_statement(self, meta: Meta, args) -> PublishStatement:
1143
+ targets = []
1144
+ scope = ValidationScope.DATASOURCES
1145
+ publish_action = PublishAction.PUBLISH
1146
+ for arg in args:
1147
+ if isinstance(arg, str):
1148
+ targets.append(arg)
1149
+ elif isinstance(arg, PublishAction):
1150
+ publish_action = arg
1151
+ elif isinstance(arg, ValidationScope):
1152
+ scope = arg
1153
+ if arg != ValidationScope.DATASOURCES:
1154
+ raise SyntaxError(
1155
+ f"Publishing is only supported for Datasources, got {arg} on line {meta.line}"
1156
+ )
1157
+ return PublishStatement(
1158
+ scope=scope,
1159
+ targets=targets,
1160
+ action=publish_action,
1161
+ )
1162
+
1163
+ def create_modifier_clause(self, args):
1164
+ token = args[0]
1165
+ if token.type == "CREATE_IF_NOT_EXISTS":
1166
+ return CreateMode.CREATE_IF_NOT_EXISTS
1167
+ elif token.type == "CREATE_OR_REPLACE":
1168
+ return CreateMode.CREATE_OR_REPLACE
1169
+
1170
+ @v_args(meta=True)
1171
+ def create_statement(self, meta: Meta, args) -> CreateStatement:
1172
+ targets = []
1173
+ scope = ValidationScope.DATASOURCES
1174
+ create_mode = CreateMode.CREATE
1175
+ for arg in args:
1176
+ if isinstance(arg, str):
1177
+ targets.append(arg)
1178
+ elif isinstance(arg, ValidationScope):
1179
+ scope = arg
1180
+ if arg != ValidationScope.DATASOURCES:
1181
+ raise SyntaxError(
1182
+ f"Creating is only supported for Datasources, got {arg} on line {meta.line}"
1183
+ )
1184
+ elif isinstance(arg, CreateMode):
1185
+ create_mode = arg
1186
+
1187
+ return CreateStatement(scope=scope, targets=targets, create_mode=create_mode)
1188
+
1189
+ def VALIDATE_SCOPE(self, args) -> ValidationScope:
1190
+ base: str = args.lower()
1191
+ if not base.endswith("s"):
1192
+ base += "s"
1193
+ return ValidationScope(base)
1194
+
1195
+ @v_args(meta=True)
1196
+ def validate_statement(self, meta: Meta, args) -> ValidateStatement:
1197
+ if len(args) > 1:
1198
+ scope = args[0]
1199
+ targets = args[1:]
1200
+ elif len(args) == 0:
1201
+ scope = ValidationScope.ALL
1202
+ targets = None
1203
+ else:
1204
+ scope = args[0]
1205
+ targets = None
1206
+ return ValidateStatement(
1207
+ scope=scope,
1208
+ targets=targets,
1209
+ )
1210
+
1211
+ @v_args(meta=True)
1212
+ def mock_statement(self, meta: Meta, args) -> MockStatement:
1213
+ return MockStatement(scope=args[0], targets=args[1:])
1214
+
1215
+ @v_args(meta=True)
1216
+ def merge_statement(self, meta: Meta, args) -> MergeStatementV2 | None:
1217
+ modifiers = []
1218
+ cargs: list[str] = []
1219
+ source_wildcard = None
1220
+ target_wildcard = None
1221
+ for arg in args:
1222
+ if isinstance(arg, Modifier):
1223
+ modifiers.append(arg)
1224
+ else:
1225
+ cargs.append(arg)
1226
+ source, target = cargs
1227
+ if source.endswith(".*"):
1228
+ if not target.endswith(".*"):
1229
+ raise ValueError("Invalid merge, source is wildcard, target is not")
1230
+ source_wildcard = source[:-2]
1231
+ target_wildcard = target[:-2]
1232
+ sources: list[Concept] = [
1233
+ v
1234
+ for k, v in self.environment.concepts.items()
1235
+ if v.namespace == source_wildcard
1236
+ ]
1237
+ targets: dict[str, Concept] = {}
1238
+ for x in sources:
1239
+ target = target_wildcard + "." + x.name
1240
+ if target in self.environment.concepts:
1241
+ targets[x.address] = self.environment.concepts[target]
1242
+ sources = [x for x in sources if x.address in targets]
1243
+ else:
1244
+ sources = [self.environment.concepts[source]]
1245
+ targets = {sources[0].address: self.environment.concepts[target]}
1246
+
1247
+ if self.parse_pass == ParsePass.VALIDATION:
1248
+ for source_c in sources:
1249
+ if isinstance(source_c, UndefinedConceptFull):
1250
+ raise SyntaxError(
1251
+ f"Cannot merge non-existent source concept {source_c.address} on line: {meta.line}"
1252
+ )
1253
+ new = MergeStatementV2(
1254
+ sources=sources,
1255
+ targets=targets,
1256
+ modifiers=modifiers,
1257
+ source_wildcard=source_wildcard,
1258
+ target_wildcard=target_wildcard,
1259
+ )
1260
+ for source_c in new.sources:
1261
+ self.environment.merge_concept(
1262
+ source_c, targets[source_c.address], modifiers
1263
+ )
1264
+
1265
+ return new
1266
+ return None
1267
+
1268
+ @v_args(meta=True)
1269
+ def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
1270
+ statement = RawSQLStatement(meta=Metadata(line_number=meta.line), text=args[0])
1271
+ return statement
1272
+
1273
+ def COPY_TYPE(self, args) -> IOType:
1274
+ return IOType(args.value)
1275
+
1276
+ @v_args(meta=True)
1277
+ def copy_statement(self, meta: Meta, args) -> CopyStatement:
1278
+ return CopyStatement(
1279
+ target=args[1],
1280
+ target_type=args[0],
1281
+ meta=Metadata(line_number=meta.line),
1282
+ select=args[-1],
1283
+ )
1284
+
1285
+ def resolve_import_address(self, address: str, is_stdlib: bool = False) -> str:
1286
+ if (
1287
+ isinstance(
1288
+ self.environment.config.import_resolver, FileSystemImportResolver
1289
+ )
1290
+ or is_stdlib
1291
+ ):
1292
+ with open(address, "r", encoding="utf-8") as f:
1293
+ text = f.read()
1294
+ elif isinstance(self.environment.config.import_resolver, DictImportResolver):
1295
+ lookup = address
1296
+ if lookup not in self.environment.config.import_resolver.content:
1297
+ raise ImportError(
1298
+ f"Unable to import file {lookup}, not resolvable from provided source files."
1299
+ )
1300
+ text = self.environment.config.import_resolver.content[lookup]
1301
+ else:
1302
+ raise ImportError(
1303
+ f"Unable to import file {address}, resolver type {type(self.environment.config.import_resolver)} not supported"
1304
+ )
1305
+ return text
1306
+
1307
+ def IMPORT_DOT(self, args) -> str:
1308
+ return "."
1309
+
1310
+ def import_statement(self, args: list[str]) -> ImportStatement:
1311
+ start = datetime.now()
1312
+ is_file_resolver = isinstance(
1313
+ self.environment.config.import_resolver, FileSystemImportResolver
1314
+ )
1315
+ parent_dirs = -1
1316
+ parsed_args = []
1317
+ for x in args:
1318
+ if x == ".":
1319
+ parent_dirs += 1
1320
+ else:
1321
+ parsed_args.append(x)
1322
+ parent_dirs = max(parent_dirs, 0)
1323
+ args = parsed_args
1324
+ if len(args) == 2:
1325
+ alias = args[-1]
1326
+ cache_key = args[-1]
1327
+ else:
1328
+ alias = self.environment.namespace
1329
+ cache_key = args[0]
1330
+ input_path = args[0]
1331
+ # lstrip off '.' from parent if they exist;
1332
+ # each one is an extra directory up after the first
1333
+
1334
+ path = input_path.split(".")
1335
+ is_stdlib = False
1336
+ if path[0] == "std":
1337
+ is_stdlib = True
1338
+ target = join(STDLIB_ROOT, *path) + ".preql"
1339
+ token_lookup: Path | str = Path(target)
1340
+ elif is_file_resolver:
1341
+ troot = Path(self.environment.working_path)
1342
+ if parent_dirs > 0:
1343
+ for _ in range(parent_dirs):
1344
+ troot = troot.parent
1345
+ target = join(troot, *path) + ".preql"
1346
+ # tokens + text are cached by path
1347
+ token_lookup = Path(target)
1348
+ elif isinstance(self.environment.config.import_resolver, DictImportResolver):
1349
+ target = ".".join(path)
1350
+ token_lookup = target
1351
+ else:
1352
+ raise NotImplementedError
1353
+
1354
+ # parser + env has to be cached by prior import path + current key
1355
+ key_path = self.import_keys + [cache_key]
1356
+ cache_lookup = "-".join(key_path)
1357
+
1358
+ # we don't iterate past the max parse depth
1359
+ if len(key_path) > MAX_PARSE_DEPTH:
1360
+ return ImportStatement(
1361
+ alias=alias, input_path=input_path, path=Path(target)
1362
+ )
1363
+
1364
+ if token_lookup in self.tokens:
1365
+ perf_logger.debug(f"\tTokens cached for {token_lookup}")
1366
+ raw_tokens = self.tokens[token_lookup]
1367
+ text = self.text_lookup[token_lookup]
1368
+ else:
1369
+ perf_logger.debug(f"\tTokens not cached for {token_lookup}, resolving")
1370
+ text = self.resolve_import_address(target, is_stdlib)
1371
+ self.text_lookup[token_lookup] = text
1372
+
1373
+ try:
1374
+ raw_tokens = PARSER.parse(text)
1375
+ except Exception as e:
1376
+ raise ImportError(
1377
+ f"Unable to import '{target}', parsing error: {e}"
1378
+ ) from e
1379
+ self.tokens[token_lookup] = raw_tokens
1380
+
1381
+ if cache_lookup in self.parsed:
1382
+ perf_logger.debug(f"\tEnvironment cached for {token_lookup}")
1383
+ nparser = self.parsed[cache_lookup]
1384
+ new_env = nparser.environment
1385
+ if nparser.parse_pass != ParsePass.VALIDATION:
1386
+ # nparser.transform(raw_tokens)
1387
+ second_pass_start = datetime.now()
1388
+ nparser.run_second_parse_pass()
1389
+ second_pass_end = datetime.now()
1390
+ perf_logger.debug(
1391
+ f"{second_pass_end - second_pass_start} seconds | Import {alias} key ({cache_key}) second pass took {second_pass_end - second_pass_start} to parse, {len(new_env.concepts)} concepts"
1392
+ )
1393
+ else:
1394
+ perf_logger.debug(f"\tParsing new for {token_lookup}")
1395
+ root = None
1396
+ if "." in str(token_lookup):
1397
+ root = str(token_lookup).rsplit(".", 1)[0]
1398
+ try:
1399
+ new_env = Environment(
1400
+ working_path=dirname(target),
1401
+ env_file_path=token_lookup,
1402
+ config=self.environment.config.copy_for_root(root=root),
1403
+ parameters=self.environment.parameters,
1404
+ )
1405
+ new_env.concepts.fail_on_missing = False
1406
+ self.parsed[self.parse_address] = self
1407
+ nparser = ParseToObjects(
1408
+ environment=new_env,
1409
+ parse_address=cache_lookup,
1410
+ token_address=token_lookup,
1411
+ parsed=self.parsed,
1412
+ tokens=self.tokens,
1413
+ text_lookup=self.text_lookup,
1414
+ import_keys=self.import_keys + [cache_key],
1415
+ parse_config=self.parse_config,
1416
+ )
1417
+ nparser.transform(raw_tokens)
1418
+ self.parsed[cache_lookup] = nparser
1419
+ except Exception as e:
1420
+ raise ImportError(
1421
+ f"Unable to import file {target}, parsing error: {e}"
1422
+ ) from e
1423
+
1424
+ parsed_path = Path(args[0])
1425
+ imps = ImportStatement(alias=alias, input_path=input_path, path=parsed_path)
1426
+
1427
+ self.environment.add_import(
1428
+ alias,
1429
+ new_env,
1430
+ Import(
1431
+ alias=alias,
1432
+ path=parsed_path,
1433
+ input_path=Path(target) if is_file_resolver else None,
1434
+ ),
1435
+ )
1436
+ end = datetime.now()
1437
+ perf_logger.debug(
1438
+ f"{end - start} seconds | Import {alias} key ({cache_key}) took to parse, {len(new_env.concepts)} concepts"
1439
+ )
1440
+ return imps
1441
+
1442
+ @v_args(meta=True)
1443
+ def show_category(self, meta: Meta, args) -> ShowCategory:
1444
+ return ShowCategory(args[0])
1445
+
1446
+ @v_args(meta=True)
1447
+ def show_statement(self, meta: Meta, args) -> ShowStatement:
1448
+ return ShowStatement(content=args[0])
1449
+
1450
+ @v_args(meta=True)
1451
+ def persist_partition_clause(self, meta: Meta, args) -> DatasourcePartitionClause:
1452
+ return DatasourcePartitionClause([ConceptRef(address=a) for a in args[0]])
1453
+
1454
+ @v_args(meta=True)
1455
+ def PERSIST_MODE(self, args) -> PersistMode:
1456
+ base = args.value.lower()
1457
+ if base == "persist":
1458
+ return PersistMode.OVERWRITE
1459
+ return PersistMode(base)
1460
+
1461
+ @v_args(meta=True)
1462
+ def auto_persist(self, meta: Meta, args) -> PersistStatement | None:
1463
+ if self.parse_pass != ParsePass.VALIDATION:
1464
+ return None
1465
+ persist_mode = args[0]
1466
+ target_name = args[1]
1467
+ where = args[2] if len(args) > 2 else None
1468
+
1469
+ if target_name not in self.environment.datasources:
1470
+ raise SyntaxError(
1471
+ f"Auto persist target datasource {target_name} does not exist in environment on line {meta.line}. Have {list(self.environment.datasources.keys())}"
1472
+ )
1473
+ target = self.environment.datasources[target_name]
1474
+ select: SelectStatement = target.create_update_statement(
1475
+ self.environment, where, line_no=meta.line
1476
+ )
1477
+ return PersistStatement(
1478
+ select=select,
1479
+ datasource=target,
1480
+ persist_mode=persist_mode,
1481
+ partition_by=target.incremental_by,
1482
+ meta=Metadata(line_number=meta.line),
1483
+ )
1484
+
1485
+ @v_args(meta=True)
1486
+ def full_persist(self, meta: Meta, args) -> PersistStatement | None:
1487
+ if self.parse_pass != ParsePass.VALIDATION:
1488
+ return None
1489
+ partition_clause = DatasourcePartitionClause([])
1490
+ labels = [x for x in args if isinstance(x, str)]
1491
+ for x in args:
1492
+ if isinstance(x, DatasourcePartitionClause):
1493
+ partition_clause = x
1494
+ if len(labels) == 2:
1495
+ identifier = labels[0]
1496
+ address = labels[1]
1497
+ else:
1498
+ identifier = labels[0]
1499
+ address = None
1500
+ target: Datasource | None = self.environment.datasources.get(identifier)
1501
+
1502
+ if not address and not target:
1503
+ raise SyntaxError(
1504
+ f'Append statement without concrete table address on line {meta.line} attempts to insert into datasource "{identifier}" that cannot be found in the environment. Add a physical address to create a new datasource, or check the name.'
1505
+ )
1506
+ elif target:
1507
+ address = target.safe_address
1508
+
1509
+ assert address is not None
1510
+
1511
+ modes = [x for x in args if isinstance(x, PersistMode)]
1512
+ mode = modes[0] if modes else PersistMode.OVERWRITE
1513
+ select: SelectStatement = [x for x in args if isinstance(x, SelectStatement)][0]
1514
+
1515
+ if mode == PersistMode.APPEND:
1516
+ if target is None:
1517
+ raise SyntaxError(
1518
+ f"Cannot append to non-existent datasource {identifier} on line {meta.line}."
1519
+ )
1520
+ new_datasource: Datasource = target
1521
+ if not new_datasource.partition_by == partition_clause.columns:
1522
+ raise SyntaxError(
1523
+ f"Cannot append to datasource {identifier} with different partitioning scheme then insert on line {meta.line}. Datasource partitioning: {new_datasource.partition_by}, insert partitioning: {partition_clause.columns if partition_clause else '[]'}"
1524
+ )
1525
+ if len(partition_clause.columns) > 1:
1526
+ raise NotImplementedError(
1527
+ "Incremental partition overwrites by more than 1 column are not yet supported."
1528
+ )
1529
+ for x in partition_clause.columns:
1530
+ concept = self.environment.concepts[x.address]
1531
+ if concept.output_datatype not in SUPPORTED_INCREMENTAL_TYPES:
1532
+ raise SyntaxError(
1533
+ f"Cannot incremental persist on concept {concept.address} of type {concept.output_datatype} on line {meta.line}."
1534
+ )
1535
+ elif target:
1536
+ new_datasource = target
1537
+ else:
1538
+ new_datasource = select.to_datasource(
1539
+ namespace=(
1540
+ self.environment.namespace
1541
+ if self.environment.namespace
1542
+ else DEFAULT_NAMESPACE
1543
+ ),
1544
+ name=identifier,
1545
+ address=Address(location=address),
1546
+ grain=select.grain,
1547
+ environment=self.environment,
1548
+ )
1549
+ return PersistStatement(
1550
+ select=select,
1551
+ datasource=new_datasource,
1552
+ persist_mode=mode,
1553
+ partition_by=partition_clause.columns if partition_clause else [],
1554
+ meta=Metadata(line_number=meta.line),
1555
+ )
1556
+
1557
+ @v_args(meta=True)
1558
+ def persist_statement(self, meta: Meta, args) -> PersistStatement:
1559
+ return args[0]
1560
+
1561
+ @v_args(meta=True)
1562
+ def align_item(self, meta: Meta, args) -> AlignItem:
1563
+ return AlignItem(
1564
+ alias=args[0],
1565
+ namespace=self.environment.namespace,
1566
+ concepts=[self.environment.concepts[arg].reference for arg in args[1:]],
1567
+ )
1568
+
1569
+ @v_args(meta=True)
1570
+ def align_clause(self, meta: Meta, args) -> AlignClause:
1571
+ return AlignClause(items=args)
1572
+
1573
+ @v_args(meta=True)
1574
+ def derive_item(self, meta: Meta, args) -> DeriveItem:
1575
+ return DeriveItem(
1576
+ expr=args[0], name=args[1], namespace=self.environment.namespace
1577
+ )
1578
+
1579
+ @v_args(meta=True)
1580
+ def derive_clause(self, meta: Meta, args) -> DeriveClause:
1581
+
1582
+ return DeriveClause(items=args)
1583
+
1584
+ @v_args(meta=True)
1585
+ def multi_select_statement(self, meta: Meta, args) -> MultiSelectStatement:
1586
+
1587
+ selects: list[SelectStatement] = []
1588
+ align: AlignClause | None = None
1589
+ limit: int | None = None
1590
+ order_by: OrderBy | None = None
1591
+ where: WhereClause | None = None
1592
+ having: HavingClause | None = None
1593
+ derive: DeriveClause | None = None
1594
+ for arg in args:
1595
+ atype = type(arg)
1596
+ if atype is SelectStatement:
1597
+ selects.append(arg)
1598
+ elif atype is Limit:
1599
+ limit = arg.count
1600
+ elif atype is OrderBy:
1601
+ order_by = arg
1602
+ elif atype is WhereClause:
1603
+ where = arg
1604
+ elif atype is HavingClause:
1605
+ having = arg
1606
+ elif atype is AlignClause:
1607
+ align = arg
1608
+ elif atype is DeriveClause:
1609
+ derive = arg
1610
+
1611
+ assert align
1612
+ assert align is not None
1613
+
1614
+ derived_concepts = []
1615
+ new_selects = [x.as_lineage(self.environment) for x in selects]
1616
+ lineage = MultiSelectLineage(
1617
+ selects=new_selects,
1618
+ align=align,
1619
+ derive=derive,
1620
+ namespace=self.environment.namespace,
1621
+ where_clause=where,
1622
+ having_clause=having,
1623
+ limit=limit,
1624
+ hidden_components=set(y for x in new_selects for y in x.hidden_components),
1625
+ )
1626
+ for x in align.items:
1627
+ concept = align_item_to_concept(
1628
+ x,
1629
+ align,
1630
+ selects,
1631
+ where=where,
1632
+ having=having,
1633
+ limit=limit,
1634
+ environment=self.environment,
1635
+ )
1636
+ derived_concepts.append(concept)
1637
+ self.environment.add_concept(concept, meta=meta)
1638
+ if derive:
1639
+ for derived in derive.items:
1640
+ derivation = derived.expr
1641
+ name = derived.name
1642
+ if not isinstance(derivation, (Function, Comparison, WindowItem)):
1643
+ raise SyntaxError(
1644
+ f"Invalid derive expression {derivation} in {meta.line}, must be a function or conditional"
1645
+ )
1646
+ concept = derive_item_to_concept(
1647
+ derivation, name, lineage, self.environment.namespace
1648
+ )
1649
+ derived_concepts.append(concept)
1650
+ self.environment.add_concept(concept, meta=meta)
1651
+ multi = MultiSelectStatement(
1652
+ selects=selects,
1653
+ align=align,
1654
+ namespace=self.environment.namespace,
1655
+ where_clause=where,
1656
+ order_by=order_by,
1657
+ limit=limit,
1658
+ meta=Metadata(line_number=meta.line),
1659
+ derived_concepts=derived_concepts,
1660
+ derive=derive,
1661
+ )
1662
+ return multi
1663
+
1664
+ @v_args(meta=True)
1665
+ def select_statement(self, meta: Meta, args) -> SelectStatement:
1666
+ select_items: List[SelectItem] | None = None
1667
+ limit: int | None = None
1668
+ order_by: OrderBy | None = None
1669
+ where = None
1670
+ having = None
1671
+ for arg in args:
1672
+ atype = type(arg)
1673
+ if atype is list:
1674
+ select_items = arg
1675
+ elif atype is Limit:
1676
+ limit = arg.count
1677
+ elif atype is OrderBy:
1678
+ order_by = arg
1679
+ elif atype is WhereClause:
1680
+ if where is not None:
1681
+ raise ParseError(
1682
+ "Multiple where clauses defined are not supported!"
1683
+ )
1684
+ where = arg
1685
+ elif atype is HavingClause:
1686
+ having = arg
1687
+ if not select_items:
1688
+ raise ParseError("Malformed select, missing select items")
1689
+ pre_keys = set(self.environment.concepts.keys())
1690
+ base = SelectStatement.from_inputs(
1691
+ environment=self.environment,
1692
+ selection=select_items,
1693
+ order_by=order_by,
1694
+ where_clause=where,
1695
+ having_clause=having,
1696
+ limit=limit,
1697
+ meta=Metadata(line_number=meta.line),
1698
+ )
1699
+ if (
1700
+ self.parse_pass == ParsePass.INITIAL
1701
+ and self.parse_config.strict_name_shadow_enforcement
1702
+ ):
1703
+ intersection = base.locally_derived.intersection(pre_keys)
1704
+ if intersection:
1705
+ for x in intersection:
1706
+ if str(base.local_concepts[x].lineage) == str(
1707
+ self.environment.concepts[x].lineage
1708
+ ):
1709
+ local = base.local_concepts[x]
1710
+ friendly_name = (
1711
+ local.name
1712
+ if local.namespace == DEFAULT_NAMESPACE
1713
+ else local.namespace
1714
+ )
1715
+ raise NameShadowError(
1716
+ f"Select statement {base} creates a new concept '{friendly_name}' with identical definition as the existing concept '{friendly_name}'. Replace {base.local_concepts[x].lineage} with a direct reference to {friendly_name}."
1717
+ )
1718
+ else:
1719
+ raise NameShadowError(
1720
+ f"Select statement {base} creates new named concepts from calculations {list(intersection)} with identical name(s) to existing concept(s). Use new unique names for these."
1721
+ )
1722
+ return base
1723
+
1724
+ @v_args(meta=True)
1725
+ def address(self, meta: Meta, args):
1726
+ return args[0]
1727
+
1728
+ @v_args(meta=True)
1729
+ def query(self, meta: Meta, args):
1730
+ return Query(text=args[0])
1731
+
1732
+ @v_args(meta=True)
1733
+ def file(self, meta: Meta, args):
1734
+ raw_path = args[0][1:-1]
1735
+
1736
+ # Cloud storage URLs should be used as-is without path resolution
1737
+ cloud_prefixes = ("gcs://", "gs://", "s3://", "https://", "http://")
1738
+ is_cloud = raw_path.startswith(cloud_prefixes)
1739
+
1740
+ if is_cloud:
1741
+ base = raw_path
1742
+ suffix = "." + raw_path.rsplit(".", 1)[-1] if "." in raw_path else ""
1743
+ else:
1744
+ path = Path(raw_path)
1745
+ # if it's a relative path, look it up relative to current parsing directory
1746
+ if path.is_relative_to("."):
1747
+ path = Path(self.environment.working_path) / path
1748
+ base = str(path.resolve().absolute())
1749
+ suffix = path.suffix
1750
+
1751
+ def check_exists():
1752
+ if not is_cloud and not Path(base).exists():
1753
+ raise FileNotFoundError(
1754
+ f"File path {base} does not exist on line {meta.line}"
1755
+ )
1756
+
1757
+ if suffix == ".sql":
1758
+ check_exists()
1759
+ return File(path=base, type=AddressType.SQL)
1760
+ elif suffix == ".py":
1761
+ check_exists()
1762
+ return File(path=base, type=AddressType.PYTHON_SCRIPT)
1763
+ elif suffix == ".csv":
1764
+ return File(path=base, type=AddressType.CSV)
1765
+ elif suffix == ".tsv":
1766
+ return File(path=base, type=AddressType.TSV)
1767
+ elif suffix == ".parquet":
1768
+ return File(path=base, type=AddressType.PARQUET)
1769
+ else:
1770
+ raise ParseError(
1771
+ f"Unsupported file type {suffix} for path {raw_path} on line {meta.line}"
1772
+ )
1773
+
1774
+ def where(self, args):
1775
+ root = args[0]
1776
+ root = expr_to_boolean(root, self.function_factory)
1777
+ return WhereClause(conditional=root)
1778
+
1779
+ def having(self, args):
1780
+ root = args[0]
1781
+ if not isinstance(root, (Comparison, Conditional, Parenthetical)):
1782
+ if arg_to_datatype(root) == DataType.BOOL:
1783
+ root = Comparison(left=root, right=True, operator=ComparisonOperator.EQ)
1784
+ else:
1785
+ root = Comparison(
1786
+ left=root,
1787
+ right=MagicConstants.NULL,
1788
+ operator=ComparisonOperator.IS_NOT,
1789
+ )
1790
+ return HavingClause(conditional=root)
1791
+
1792
+ @v_args(meta=True)
1793
+ def function_binding_list(self, meta: Meta, args) -> list[ArgBinding]:
1794
+ return args
1795
+
1796
+ @v_args(meta=True)
1797
+ def function_binding_type(self, meta: Meta, args) -> FunctionBindingType:
1798
+ return FunctionBindingType(type=args[0])
1799
+
1800
+ @v_args(meta=True)
1801
+ def function_binding_default(self, meta: Meta, args):
1802
+ return args[1]
1803
+
1804
+ @v_args(meta=True)
1805
+ def function_binding_item(self, meta: Meta, args) -> ArgBinding:
1806
+ default = None
1807
+ type = None
1808
+ for arg in args[1:]:
1809
+ if isinstance(arg, FunctionBindingType):
1810
+ type = arg.type
1811
+ else:
1812
+ default = arg
1813
+ return ArgBinding.model_construct(name=args[0], datatype=type, default=default)
1814
+
1815
+ @v_args(meta=True)
1816
+ def raw_function(self, meta: Meta, args) -> FunctionDeclaration:
1817
+ identity = args[0]
1818
+ function_arguments: list[ArgBinding] = args[1]
1819
+ output = args[2]
1820
+
1821
+ self.environment.functions[identity] = CustomFunctionFactory(
1822
+ function=output,
1823
+ namespace=self.environment.namespace,
1824
+ function_arguments=function_arguments,
1825
+ name=identity,
1826
+ )
1827
+ return FunctionDeclaration(name=identity, args=function_arguments, expr=output)
1828
+
1829
+ def custom_function(self, args) -> FunctionCallWrapper:
1830
+ name = args[0]
1831
+ args = args[1:]
1832
+ remapped = FunctionCallWrapper(
1833
+ content=self.environment.functions[name](*args), name=name, args=args
1834
+ )
1835
+
1836
+ return remapped
1837
+
1838
+ @v_args(meta=True)
1839
+ def function(self, meta: Meta, args) -> Function:
1840
+ return args[0]
1841
+
1842
+ @v_args(meta=True)
1843
+ def type_drop_clause(self, meta: Meta, args) -> DropOn:
1844
+ return DropOn([FunctionType(x) for x in args])
1845
+
1846
+ @v_args(meta=True)
1847
+ def type_add_clause(self, meta: Meta, args) -> AddOn:
1848
+ return AddOn([FunctionType(x) for x in args])
1849
+
1850
+ @v_args(meta=True)
1851
+ def type_declaration(self, meta: Meta, args) -> TypeDeclaration:
1852
+ key = args[0]
1853
+ datatype: list[DataType] = [x for x in args[1:] if isinstance(x, DataType)]
1854
+ if len(datatype) == 1:
1855
+ final_datatype: list[DataType] | DataType = datatype[0]
1856
+ else:
1857
+ final_datatype = datatype
1858
+ add_on = None
1859
+ drop_on = None
1860
+ for x in args[1:]:
1861
+ if isinstance(x, AddOn):
1862
+ add_on = x
1863
+ elif isinstance(x, DropOn):
1864
+ drop_on = x
1865
+ new = CustomType(
1866
+ name=key,
1867
+ type=final_datatype,
1868
+ drop_on=drop_on.functions if drop_on else [],
1869
+ add_on=add_on.functions if add_on else [],
1870
+ )
1871
+ self.environment.data_types[key] = new
1872
+ return TypeDeclaration(type=new)
1873
+
1874
+ def int_lit(self, args):
1875
+ return int("".join(args))
1876
+
1877
+ def bool_lit(self, args):
1878
+ return args[0].capitalize() == "True"
1879
+
1880
+ def null_lit(self, args):
1881
+ return NULL_VALUE
1882
+
1883
+ def float_lit(self, args):
1884
+ return float(args[0])
1885
+
1886
+ def array_lit(self, args):
1887
+ return list_to_wrapper(args)
1888
+
1889
+ def tuple_lit(self, args):
1890
+ return tuple_to_wrapper(args)
1891
+
1892
+ def string_lit(self, args) -> str:
1893
+ if not args:
1894
+ return ""
1895
+
1896
+ return args[0]
1897
+
1898
+ @v_args(meta=True)
1899
+ def struct_lit(self, meta, args):
1900
+ return self.function_factory.create_function(
1901
+ args, operator=FunctionType.STRUCT, meta=meta
1902
+ )
1903
+
1904
+ def map_lit(self, args):
1905
+ parsed = dict(zip(args[::2], args[1::2]))
1906
+ wrapped = dict_to_map_wrapper(parsed)
1907
+ return wrapped
1908
+
1909
+ def literal(self, args):
1910
+ return args[0]
1911
+
1912
+ def product_operator(self, args) -> Function | Any:
1913
+ if len(args) == 1:
1914
+ return args[0]
1915
+ result = args[0]
1916
+ for i in range(1, len(args), 2):
1917
+ new_result = None
1918
+ op = args[i]
1919
+ right = args[i + 1]
1920
+ if op == "*":
1921
+ new_result = self.function_factory.create_function(
1922
+ [result, right], operator=FunctionType.MULTIPLY
1923
+ )
1924
+ elif op == "**":
1925
+ new_result = self.function_factory.create_function(
1926
+ [result, right], operator=FunctionType.POWER
1927
+ )
1928
+ elif op == "/":
1929
+ new_result = self.function_factory.create_function(
1930
+ [result, right], operator=FunctionType.DIVIDE
1931
+ )
1932
+ elif op == "%":
1933
+ new_result = self.function_factory.create_function(
1934
+ [result, right], operator=FunctionType.MOD
1935
+ )
1936
+ else:
1937
+ raise ValueError(f"Unknown operator: {op}")
1938
+ result = new_result
1939
+ return new_result
1940
+
1941
+ def PLUS_OR_MINUS(self, args) -> str:
1942
+ return args.value
1943
+
1944
+ def MULTIPLY_DIVIDE_PERCENT(self, args) -> str:
1945
+ return args.value
1946
+
1947
+ @v_args(meta=True)
1948
+ def sum_operator(self, meta: Meta, args) -> Function | Any:
1949
+ if len(args) == 1:
1950
+ return args[0]
1951
+ result = args[0]
1952
+ for i in range(1, len(args), 2):
1953
+ new_result = None
1954
+ op = args[i].lower()
1955
+ right = args[i + 1]
1956
+ if op == "+":
1957
+ new_result = self.function_factory.create_function(
1958
+ [result, right], operator=FunctionType.ADD, meta=meta
1959
+ )
1960
+ elif op == "-":
1961
+ new_result = self.function_factory.create_function(
1962
+ [result, right], operator=FunctionType.SUBTRACT, meta=meta
1963
+ )
1964
+ elif op == "||":
1965
+ new_result = self.function_factory.create_function(
1966
+ [result, right], operator=FunctionType.CONCAT, meta=meta
1967
+ )
1968
+ elif op == "like":
1969
+ new_result = self.function_factory.create_function(
1970
+ [result, right], operator=FunctionType.LIKE, meta=meta
1971
+ )
1972
+ else:
1973
+ raise ValueError(f"Unknown operator: {op}")
1974
+ result = new_result
1975
+ return result
1976
+
1977
+ def comparison(self, args) -> Comparison:
1978
+ if len(args) == 1:
1979
+ return args[0]
1980
+ left = args[0]
1981
+ right = args[2]
1982
+ if args[1] in (ComparisonOperator.IN, ComparisonOperator.NOT_IN):
1983
+ return SubselectComparison(
1984
+ left=left,
1985
+ right=right,
1986
+ operator=args[1],
1987
+ )
1988
+ return Comparison(left=left, right=right, operator=args[1])
1989
+
1990
+ def between_comparison(self, args) -> Conditional:
1991
+ left_bound = args[1]
1992
+ right_bound = args[2]
1993
+ return Conditional(
1994
+ left=Comparison(
1995
+ left=args[0], right=left_bound, operator=ComparisonOperator.GTE
1996
+ ),
1997
+ right=Comparison(
1998
+ left=args[0], right=right_bound, operator=ComparisonOperator.LTE
1999
+ ),
2000
+ operator=BooleanOperator.AND,
2001
+ )
2002
+
2003
+ @v_args(meta=True)
2004
+ def subselect_comparison(self, meta: Meta, args) -> SubselectComparison:
2005
+ right = args[2]
2006
+
2007
+ while isinstance(right, Parenthetical) and isinstance(
2008
+ right.content,
2009
+ (
2010
+ Concept,
2011
+ Function,
2012
+ FilterItem,
2013
+ WindowItem,
2014
+ AggregateWrapper,
2015
+ ListWrapper,
2016
+ TupleWrapper,
2017
+ ),
2018
+ ):
2019
+ right = right.content
2020
+ if isinstance(right, (Function, FilterItem, WindowItem, AggregateWrapper)):
2021
+ right_concept = arbitrary_to_concept(right, environment=self.environment)
2022
+ self.environment.add_concept(right_concept, meta=meta)
2023
+ right = right_concept.reference
2024
+ return SubselectComparison(
2025
+ left=args[0],
2026
+ right=right,
2027
+ operator=args[1],
2028
+ )
2029
+
2030
+ def expr_tuple(self, args):
2031
+ datatypes = set([arg_to_datatype(x) for x in args])
2032
+ if len(datatypes) != 1:
2033
+ raise ParseError("Tuple must have same type for all elements")
2034
+ return TupleWrapper(val=tuple(args), type=datatypes.pop())
2035
+
2036
+ def parenthetical(self, args):
2037
+ return Parenthetical(content=args[0])
2038
+
2039
+ @v_args(meta=True)
2040
+ def condition_parenthetical(self, meta, args):
2041
+ if len(args) == 2:
2042
+ return Comparison(
2043
+ left=Parenthetical(content=args[1]),
2044
+ right=False,
2045
+ operator=ComparisonOperator.EQ,
2046
+ )
2047
+ return Parenthetical(content=args[0])
2048
+
2049
+ def conditional(self, args):
2050
+ def munch_args(args):
2051
+ while args:
2052
+ if len(args) == 1:
2053
+ return args[0]
2054
+ else:
2055
+ return Conditional(
2056
+ left=args[0], operator=args[1], right=munch_args(args[2:])
2057
+ )
2058
+
2059
+ return munch_args(args)
2060
+
2061
+ def window_order(self, args):
2062
+ return WindowOrder(args[0])
2063
+
2064
+ def window_order_by(self, args):
2065
+ # flatten tree
2066
+ return args[0]
2067
+
2068
+ def window(self, args):
2069
+
2070
+ return Window(count=args[1].value, window_order=args[0])
2071
+
2072
+ def WINDOW_TYPE(self, args):
2073
+ return WindowType(args.strip())
2074
+
2075
+ def window_item_over(self, args):
2076
+
2077
+ return WindowItemOver(contents=args[0])
2078
+
2079
+ def window_item_order(self, args):
2080
+ return WindowItemOrder(contents=args[0])
2081
+
2082
+ def logical_operator(self, args):
2083
+ return BooleanOperator(args[0].value.lower())
2084
+
2085
+ def DATE_PART(self, args):
2086
+ return DatePart(args.value)
2087
+
2088
+ @v_args(meta=True)
2089
+ def window_item(self, meta: Meta, args) -> WindowItem:
2090
+ type: WindowType = args[0]
2091
+ order_by = []
2092
+ over = []
2093
+ index = None
2094
+ concept: Concept | None = None
2095
+ for item in args:
2096
+ if isinstance(item, int):
2097
+ index = item
2098
+ elif isinstance(item, WindowItemOrder):
2099
+ order_by = item.contents
2100
+ elif isinstance(item, WindowItemOver):
2101
+ over = item.contents
2102
+ elif isinstance(item, str):
2103
+ concept = self.environment.concepts[item]
2104
+ elif isinstance(item, ConceptRef):
2105
+ concept = self.environment.concepts[item.address]
2106
+ elif isinstance(item, WindowType):
2107
+ type = item
2108
+ else:
2109
+ concept = arbitrary_to_concept(item, environment=self.environment)
2110
+ self.environment.add_concept(concept, meta=meta)
2111
+ if not concept:
2112
+ raise ParseError(
2113
+ f"Window statements must be on fields, not constants - error in: `{self.text_lookup[self.parse_address][meta.start_pos:meta.end_pos]}`"
2114
+ )
2115
+ return WindowItem(
2116
+ type=type,
2117
+ content=concept.reference,
2118
+ over=over,
2119
+ order_by=order_by,
2120
+ index=index,
2121
+ )
2122
+
2123
+ def filter_item(self, args) -> FilterItem:
2124
+ where: WhereClause
2125
+ expr, raw = args
2126
+ if isinstance(raw, WhereClause):
2127
+ where = raw
2128
+ else:
2129
+ where = WhereClause.model_construct(
2130
+ conditional=expr_to_boolean(raw, self.function_factory)
2131
+ )
2132
+ if isinstance(expr, str):
2133
+ expr = self.environment.concepts[expr].reference
2134
+ return FilterItem(content=expr, where=where)
2135
+
2136
+ # BEGIN FUNCTIONS
2137
+ @v_args(meta=True)
2138
+ def expr_reference(self, meta, args) -> Concept:
2139
+ return self.environment.concepts.__getitem__(args[0], meta.line)
2140
+
2141
+ def expr(self, args):
2142
+ if len(args) > 1:
2143
+ raise ParseError("Expression should have one child only.")
2144
+ return args[0]
2145
+
2146
+ def aggregate_over(self, args):
2147
+ return args[0]
2148
+
2149
+ def aggregate_all(self, args):
2150
+ return [
2151
+ ConceptRef(
2152
+ address=f"{INTERNAL_NAMESPACE}.{ALL_ROWS_CONCEPT}",
2153
+ datatype=DataType.INTEGER,
2154
+ )
2155
+ ]
2156
+
2157
+ def aggregate_functions(self, args):
2158
+ if len(args) == 2:
2159
+ return AggregateWrapper(function=args[0], by=args[1])
2160
+ return AggregateWrapper(function=args[0])
2161
+
2162
+ @v_args(meta=True)
2163
+ def index_access(self, meta, args):
2164
+ args = process_function_args(args, meta=meta, environment=self.environment)
2165
+ base = args[0]
2166
+ if base.datatype == DataType.MAP or isinstance(base.datatype, MapType):
2167
+ return self.function_factory.create_function(
2168
+ args, FunctionType.MAP_ACCESS, meta
2169
+ )
2170
+ return self.function_factory.create_function(
2171
+ args, FunctionType.INDEX_ACCESS, meta
2172
+ )
2173
+
2174
+ @v_args(meta=True)
2175
+ def map_key_access(self, meta, args):
2176
+ return self.function_factory.create_function(
2177
+ args, FunctionType.MAP_ACCESS, meta
2178
+ )
2179
+
2180
+ @v_args(meta=True)
2181
+ def attr_access(self, meta, args):
2182
+ return self.function_factory.create_function(
2183
+ args, FunctionType.ATTR_ACCESS, meta
2184
+ )
2185
+
2186
+ @v_args(meta=True)
2187
+ def fcoalesce(self, meta, args):
2188
+ return self.function_factory.create_function(args, FunctionType.COALESCE, meta)
2189
+
2190
+ @v_args(meta=True)
2191
+ def fnullif(self, meta, args):
2192
+ return self.function_factory.create_function(args, FunctionType.NULLIF, meta)
2193
+
2194
+ @v_args(meta=True)
2195
+ def frecurse_edge(self, meta, args):
2196
+ return self.function_factory.create_function(
2197
+ args, FunctionType.RECURSE_EDGE, meta
2198
+ )
2199
+
2200
+ @v_args(meta=True)
2201
+ def unnest(self, meta, args):
2202
+
2203
+ return self.function_factory.create_function(args, FunctionType.UNNEST, meta)
2204
+
2205
+ @v_args(meta=True)
2206
+ def count(self, meta, args):
2207
+ return self.function_factory.create_function(args, FunctionType.COUNT, meta)
2208
+
2209
+ @v_args(meta=True)
2210
+ def fgroup(self, meta, args):
2211
+ if len(args) == 2:
2212
+ fargs = [args[0]] + list(args[1])
2213
+ else:
2214
+ fargs = [args[0]]
2215
+ return self.function_factory.create_function(fargs, FunctionType.GROUP, meta)
2216
+
2217
+ @v_args(meta=True)
2218
+ def fabs(self, meta, args):
2219
+ return self.function_factory.create_function(args, FunctionType.ABS, meta)
2220
+
2221
+ @v_args(meta=True)
2222
+ def count_distinct(self, meta, args):
2223
+ return self.function_factory.create_function(
2224
+ args, FunctionType.COUNT_DISTINCT, meta
2225
+ )
2226
+
2227
+ @v_args(meta=True)
2228
+ def sum(self, meta, args):
2229
+ return self.function_factory.create_function(args, FunctionType.SUM, meta)
2230
+
2231
+ @v_args(meta=True)
2232
+ def array_agg(self, meta, args):
2233
+ return self.function_factory.create_function(args, FunctionType.ARRAY_AGG, meta)
2234
+
2235
+ @v_args(meta=True)
2236
+ def any(self, meta, args):
2237
+ return self.function_factory.create_function(args, FunctionType.ANY, meta)
2238
+
2239
+ @v_args(meta=True)
2240
+ def bool_and(self, meta, args):
2241
+ return self.function_factory.create_function(args, FunctionType.BOOL_AND, meta)
2242
+
2243
+ @v_args(meta=True)
2244
+ def bool_or(self, meta, args):
2245
+ return self.function_factory.create_function(args, FunctionType.BOOL_OR, meta)
2246
+
2247
+ @v_args(meta=True)
2248
+ def avg(self, meta, args):
2249
+ return self.function_factory.create_function(args, FunctionType.AVG, meta)
2250
+
2251
+ @v_args(meta=True)
2252
+ def max(self, meta, args):
2253
+ return self.function_factory.create_function(args, FunctionType.MAX, meta)
2254
+
2255
+ @v_args(meta=True)
2256
+ def min(self, meta, args):
2257
+ return self.function_factory.create_function(args, FunctionType.MIN, meta)
2258
+
2259
+ @v_args(meta=True)
2260
+ def len(self, meta, args):
2261
+ return self.function_factory.create_function(args, FunctionType.LENGTH, meta)
2262
+
2263
+ @v_args(meta=True)
2264
+ def fsplit(self, meta, args):
2265
+ return self.function_factory.create_function(args, FunctionType.SPLIT, meta)
2266
+
2267
+ @v_args(meta=True)
2268
+ def concat(self, meta, args):
2269
+ return self.function_factory.create_function(args, FunctionType.CONCAT, meta)
2270
+
2271
+ @v_args(meta=True)
2272
+ def union(self, meta, args):
2273
+ return self.function_factory.create_function(args, FunctionType.UNION, meta)
2274
+
2275
+ @v_args(meta=True)
2276
+ def like(self, meta, args):
2277
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
2278
+
2279
+ @v_args(meta=True)
2280
+ def alt_like(self, meta, args):
2281
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
2282
+
2283
+ @v_args(meta=True)
2284
+ def ilike(self, meta, args):
2285
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
2286
+
2287
+ @v_args(meta=True)
2288
+ def upper(self, meta, args):
2289
+ return self.function_factory.create_function(args, FunctionType.UPPER, meta)
2290
+
2291
+ @v_args(meta=True)
2292
+ def fstrpos(self, meta, args):
2293
+ return self.function_factory.create_function(args, FunctionType.STRPOS, meta)
2294
+
2295
+ @v_args(meta=True)
2296
+ def freplace(self, meta, args):
2297
+ return self.function_factory.create_function(args, FunctionType.REPLACE, meta)
2298
+
2299
+ @v_args(meta=True)
2300
+ def fcontains(self, meta, args):
2301
+ return self.function_factory.create_function(args, FunctionType.CONTAINS, meta)
2302
+
2303
+ @v_args(meta=True)
2304
+ def ftrim(self, meta, args):
2305
+ return self.function_factory.create_function(args, FunctionType.TRIM, meta)
2306
+
2307
+ @v_args(meta=True)
2308
+ def fhash(self, meta, args):
2309
+ return self.function_factory.create_function(args, FunctionType.HASH, meta)
2310
+
2311
+ @v_args(meta=True)
2312
+ def fsubstring(self, meta, args):
2313
+ return self.function_factory.create_function(args, FunctionType.SUBSTRING, meta)
2314
+
2315
+ @v_args(meta=True)
2316
+ def flower(self, meta, args):
2317
+ return self.function_factory.create_function(args, FunctionType.LOWER, meta)
2318
+
2319
+ @v_args(meta=True)
2320
+ def fregexp_contains(self, meta, args):
2321
+ return self.function_factory.create_function(
2322
+ args, FunctionType.REGEXP_CONTAINS, meta
2323
+ )
2324
+
2325
+ @v_args(meta=True)
2326
+ def fregexp_extract(self, meta, args):
2327
+ if len(args) == 2:
2328
+ # this is a magic value to represent the default behavior
2329
+ args.append(-1)
2330
+ return self.function_factory.create_function(
2331
+ args, FunctionType.REGEXP_EXTRACT, meta
2332
+ )
2333
+
2334
+ @v_args(meta=True)
2335
+ def fregexp_replace(self, meta, args):
2336
+ return self.function_factory.create_function(
2337
+ args, FunctionType.REGEXP_REPLACE, meta
2338
+ )
2339
+
2340
+ # date functions
2341
+ @v_args(meta=True)
2342
+ def fdate(self, meta, args):
2343
+ return self.function_factory.create_function(args, FunctionType.DATE, meta)
2344
+
2345
+ @v_args(meta=True)
2346
+ def fdate_trunc(self, meta, args):
2347
+ return self.function_factory.create_function(
2348
+ args, FunctionType.DATE_TRUNCATE, meta
2349
+ )
2350
+
2351
+ @v_args(meta=True)
2352
+ def fdate_part(self, meta, args):
2353
+ return self.function_factory.create_function(args, FunctionType.DATE_PART, meta)
2354
+
2355
+ @v_args(meta=True)
2356
+ def fdate_add(self, meta, args):
2357
+ return self.function_factory.create_function(args, FunctionType.DATE_ADD, meta)
2358
+
2359
+ @v_args(meta=True)
2360
+ def fdate_sub(self, meta, args):
2361
+ return self.function_factory.create_function(args, FunctionType.DATE_SUB, meta)
2362
+
2363
+ @v_args(meta=True)
2364
+ def fdate_diff(self, meta, args):
2365
+ return self.function_factory.create_function(args, FunctionType.DATE_DIFF, meta)
2366
+
2367
+ @v_args(meta=True)
2368
+ def fdatetime(self, meta, args):
2369
+ return self.function_factory.create_function(args, FunctionType.DATETIME, meta)
2370
+
2371
+ @v_args(meta=True)
2372
+ def ftimestamp(self, meta, args):
2373
+ return self.function_factory.create_function(args, FunctionType.TIMESTAMP, meta)
2374
+
2375
+ @v_args(meta=True)
2376
+ def fsecond(self, meta, args):
2377
+ return self.function_factory.create_function(args, FunctionType.SECOND, meta)
2378
+
2379
+ @v_args(meta=True)
2380
+ def fminute(self, meta, args):
2381
+ return self.function_factory.create_function(args, FunctionType.MINUTE, meta)
2382
+
2383
+ @v_args(meta=True)
2384
+ def fhour(self, meta, args):
2385
+ return self.function_factory.create_function(args, FunctionType.HOUR, meta)
2386
+
2387
+ @v_args(meta=True)
2388
+ def fday(self, meta, args):
2389
+ return self.function_factory.create_function(args, FunctionType.DAY, meta)
2390
+
2391
+ @v_args(meta=True)
2392
+ def fday_name(self, meta, args):
2393
+ return self.function_factory.create_function(args, FunctionType.DAY_NAME, meta)
2394
+
2395
+ @v_args(meta=True)
2396
+ def fday_of_week(self, meta, args):
2397
+ return self.function_factory.create_function(
2398
+ args, FunctionType.DAY_OF_WEEK, meta
2399
+ )
2400
+
2401
+ @v_args(meta=True)
2402
+ def fweek(self, meta, args):
2403
+ return self.function_factory.create_function(args, FunctionType.WEEK, meta)
2404
+
2405
+ @v_args(meta=True)
2406
+ def fmonth(self, meta, args):
2407
+ return self.function_factory.create_function(args, FunctionType.MONTH, meta)
2408
+
2409
+ @v_args(meta=True)
2410
+ def fmonth_name(self, meta, args):
2411
+ return self.function_factory.create_function(
2412
+ args, FunctionType.MONTH_NAME, meta
2413
+ )
2414
+
2415
+ @v_args(meta=True)
2416
+ def fquarter(self, meta, args):
2417
+ return self.function_factory.create_function(args, FunctionType.QUARTER, meta)
2418
+
2419
+ @v_args(meta=True)
2420
+ def fyear(self, meta, args):
2421
+ return self.function_factory.create_function(args, FunctionType.YEAR, meta)
2422
+
2423
+ def internal_fcast(self, meta, args) -> Function:
2424
+ args = process_function_args(args, meta=meta, environment=self.environment)
2425
+
2426
+ # Destructure for readability
2427
+ value, dtype = args[0], args[1]
2428
+ processed: Any
2429
+ if isinstance(value, str):
2430
+ match dtype:
2431
+ case DataType.DATE:
2432
+ processed = date.fromisoformat(value)
2433
+ case DataType.DATETIME | DataType.TIMESTAMP:
2434
+ processed = datetime.fromisoformat(value)
2435
+ case DataType.INTEGER:
2436
+ processed = int(value)
2437
+ case DataType.FLOAT:
2438
+ processed = float(value)
2439
+ case DataType.BOOL:
2440
+ processed = value.capitalize() == "True"
2441
+ case DataType.STRING:
2442
+ processed = value
2443
+ case _:
2444
+ raise SyntaxError(f"Invalid cast type {dtype}")
2445
+
2446
+ # Determine function type and arguments
2447
+ if isinstance(dtype, TraitDataType):
2448
+ return self.function_factory.create_function(
2449
+ [processed, dtype], FunctionType.TYPED_CONSTANT, meta
2450
+ )
2451
+
2452
+ return self.function_factory.create_function(
2453
+ [processed], FunctionType.CONSTANT, meta
2454
+ )
2455
+
2456
+ return self.function_factory.create_function(args, FunctionType.CAST, meta)
2457
+
2458
+ @v_args(meta=True)
2459
+ def fdate_spine(self, meta, args) -> Function:
2460
+ return self.function_factory.create_function(
2461
+ args, FunctionType.DATE_SPINE, meta
2462
+ )
2463
+
2464
+ # utility functions
2465
+ @v_args(meta=True)
2466
+ def fcast(self, meta, args) -> Function:
2467
+ return self.internal_fcast(meta, args)
2468
+
2469
+ # math functions
2470
+ @v_args(meta=True)
2471
+ def fadd(self, meta, args) -> Function:
2472
+
2473
+ return self.function_factory.create_function(args, FunctionType.ADD, meta)
2474
+
2475
+ @v_args(meta=True)
2476
+ def fsub(self, meta, args) -> Function:
2477
+ return self.function_factory.create_function(args, FunctionType.SUBTRACT, meta)
2478
+
2479
+ @v_args(meta=True)
2480
+ def fmul(self, meta, args) -> Function:
2481
+ return self.function_factory.create_function(args, FunctionType.MULTIPLY, meta)
2482
+
2483
+ @v_args(meta=True)
2484
+ def fdiv(self, meta: Meta, args) -> Function:
2485
+ return self.function_factory.create_function(args, FunctionType.DIVIDE, meta)
2486
+
2487
+ @v_args(meta=True)
2488
+ def fmod(self, meta: Meta, args) -> Function:
2489
+ return self.function_factory.create_function(args, FunctionType.MOD, meta)
2490
+
2491
+ @v_args(meta=True)
2492
+ def fsqrt(self, meta: Meta, args) -> Function:
2493
+ return self.function_factory.create_function(args, FunctionType.SQRT, meta)
2494
+
2495
+ @v_args(meta=True)
2496
+ def frandom(self, meta: Meta, args) -> Function:
2497
+ return self.function_factory.create_function(args, FunctionType.RANDOM, meta)
2498
+
2499
+ @v_args(meta=True)
2500
+ def fround(self, meta, args) -> Function:
2501
+ if len(args) == 1:
2502
+ args.append(0)
2503
+ return self.function_factory.create_function(args, FunctionType.ROUND, meta)
2504
+
2505
+ @v_args(meta=True)
2506
+ def flog(self, meta, args) -> Function:
2507
+ if len(args) == 1:
2508
+ args.append(10)
2509
+ return self.function_factory.create_function(args, FunctionType.LOG, meta)
2510
+
2511
+ @v_args(meta=True)
2512
+ def ffloor(self, meta, args) -> Function:
2513
+ return self.function_factory.create_function(args, FunctionType.FLOOR, meta)
2514
+
2515
+ @v_args(meta=True)
2516
+ def fceil(self, meta, args) -> Function:
2517
+ return self.function_factory.create_function(args, FunctionType.CEIL, meta)
2518
+
2519
+ @v_args(meta=True)
2520
+ def fcase(self, meta, args: List[Union[CaseWhen, CaseElse]]) -> Function:
2521
+ return self.function_factory.create_function(args, FunctionType.CASE, meta)
2522
+
2523
+ @v_args(meta=True)
2524
+ def fcase_when(self, meta, args) -> CaseWhen:
2525
+ args = process_function_args(args, meta=meta, environment=self.environment)
2526
+ root = expr_to_boolean(args[0], self.function_factory)
2527
+ return CaseWhen(comparison=root, expr=args[1])
2528
+
2529
+ @v_args(meta=True)
2530
+ def fcase_else(self, meta, args) -> CaseElse:
2531
+ args = process_function_args(args, meta=meta, environment=self.environment)
2532
+ return CaseElse(expr=args[0])
2533
+
2534
+ @v_args(meta=True)
2535
+ def fcurrent_date(self, meta, args):
2536
+ return CurrentDate([])
2537
+
2538
+ @v_args(meta=True)
2539
+ def fcurrent_datetime(self, meta, args):
2540
+ return self.function_factory.create_function(
2541
+ args=[], operator=FunctionType.CURRENT_DATETIME, meta=meta
2542
+ )
2543
+
2544
+ @v_args(meta=True)
2545
+ def fcurrent_timestamp(self, meta, args):
2546
+ return self.function_factory.create_function(
2547
+ args=[], operator=FunctionType.CURRENT_TIMESTAMP, meta=meta
2548
+ )
2549
+
2550
+ @v_args(meta=True)
2551
+ def fnot(self, meta, args):
2552
+ if arg_to_datatype(args[0]) == DataType.BOOL:
2553
+ return Comparison(
2554
+ left=self.function_factory.create_function(
2555
+ [args[0], False], FunctionType.COALESCE, meta
2556
+ ),
2557
+ operator=ComparisonOperator.EQ,
2558
+ right=False,
2559
+ meta=meta,
2560
+ )
2561
+ return self.function_factory.create_function(args, FunctionType.IS_NULL, meta)
2562
+
2563
+ @v_args(meta=True)
2564
+ def fbool(self, meta, args):
2565
+ return self.function_factory.create_function(args, FunctionType.BOOL, meta)
2566
+
2567
+ @v_args(meta=True)
2568
+ def fmap_keys(self, meta, args):
2569
+ return self.function_factory.create_function(args, FunctionType.MAP_KEYS, meta)
2570
+
2571
+ @v_args(meta=True)
2572
+ def fmap_values(self, meta, args):
2573
+ return self.function_factory.create_function(
2574
+ args, FunctionType.MAP_VALUES, meta
2575
+ )
2576
+
2577
+ @v_args(meta=True)
2578
+ def farray_sum(self, meta, args):
2579
+ return self.function_factory.create_function(args, FunctionType.ARRAY_SUM, meta)
2580
+
2581
+ @v_args(meta=True)
2582
+ def fgenerate_array(self, meta, args):
2583
+ return self.function_factory.create_function(
2584
+ args, FunctionType.GENERATE_ARRAY, meta
2585
+ )
2586
+
2587
+ @v_args(meta=True)
2588
+ def farray_distinct(self, meta, args):
2589
+ return self.function_factory.create_function(
2590
+ args, FunctionType.ARRAY_DISTINCT, meta
2591
+ )
2592
+
2593
+ @v_args(meta=True)
2594
+ def farray_to_string(self, meta, args):
2595
+ return self.function_factory.create_function(
2596
+ args, FunctionType.ARRAY_TO_STRING, meta
2597
+ )
2598
+
2599
+ @v_args(meta=True)
2600
+ def farray_sort(self, meta, args):
2601
+ if len(args) == 1:
2602
+ # this is a magic value to represent the default behavior
2603
+ args = [args[0], Ordering.ASCENDING]
2604
+ return self.function_factory.create_function(
2605
+ args, FunctionType.ARRAY_SORT, meta
2606
+ )
2607
+
2608
+ @v_args(meta=True)
2609
+ def transform_lambda(self, meta, args):
2610
+ return self.environment.functions[args[0]]
2611
+
2612
+ @v_args(meta=True)
2613
+ def farray_transform(self, meta, args) -> Function:
2614
+ factory: CustomFunctionFactory = args[1]
2615
+ if not len(factory.function_arguments) == 1:
2616
+ raise InvalidSyntaxException(
2617
+ "Array transform function must have exactly one argument;"
2618
+ )
2619
+ array_type = arg_to_datatype(args[0])
2620
+ if not isinstance(array_type, ArrayType):
2621
+ raise InvalidSyntaxException(
2622
+ f"Array transform function must be applied to an array, not {array_type}"
2623
+ )
2624
+ return self.function_factory.create_function(
2625
+ [
2626
+ args[0],
2627
+ factory.function_arguments[0],
2628
+ factory(
2629
+ ArgBinding(
2630
+ name=factory.function_arguments[0].name,
2631
+ datatype=array_type.value_data_type,
2632
+ )
2633
+ ),
2634
+ ],
2635
+ FunctionType.ARRAY_TRANSFORM,
2636
+ meta,
2637
+ )
2638
+
2639
+ @v_args(meta=True)
2640
+ def farray_filter(self, meta, args) -> Function:
2641
+ factory: CustomFunctionFactory = args[1]
2642
+ if not len(factory.function_arguments) == 1:
2643
+ raise InvalidSyntaxException(
2644
+ "Array filter function must have exactly one argument;"
2645
+ )
2646
+ array_type = arg_to_datatype(args[0])
2647
+ if not isinstance(array_type, ArrayType):
2648
+ raise InvalidSyntaxException(
2649
+ f"Array filter function must be applied to an array, not {array_type}"
2650
+ )
2651
+ return self.function_factory.create_function(
2652
+ [
2653
+ args[0],
2654
+ factory.function_arguments[0],
2655
+ factory(
2656
+ ArgBinding(
2657
+ name=factory.function_arguments[0].name,
2658
+ datatype=array_type.value_data_type,
2659
+ )
2660
+ ),
2661
+ ],
2662
+ FunctionType.ARRAY_FILTER,
2663
+ meta,
2664
+ )
2665
+
2666
+
2667
+ def unpack_visit_error(e: VisitError, text: str | None = None):
2668
+ """This is required to get exceptions from imports, which would
2669
+ raise nested VisitErrors"""
2670
+ if isinstance(e.orig_exc, VisitError):
2671
+ unpack_visit_error(e.orig_exc, text)
2672
+ elif isinstance(e.orig_exc, (UndefinedConceptException, ImportError)):
2673
+ raise e.orig_exc
2674
+ elif isinstance(e.orig_exc, InvalidSyntaxException):
2675
+ raise e.orig_exc
2676
+ elif isinstance(e.orig_exc, (SyntaxError, TypeError)):
2677
+ if isinstance(e.obj, Tree):
2678
+ if text:
2679
+ extract = text[e.obj.meta.start_pos - 5 : e.obj.meta.end_pos + 5]
2680
+ raise InvalidSyntaxException(
2681
+ str(e.orig_exc)
2682
+ + " Raised when parsing rule: "
2683
+ + str(e.rule)
2684
+ + f' Line: {e.obj.meta.line} "...{extract}..."'
2685
+ )
2686
+ InvalidSyntaxException(
2687
+ str(e.orig_exc) + " in " + str(e.rule) + f" Line: {e.obj.meta.line}"
2688
+ )
2689
+ raise InvalidSyntaxException(str(e.orig_exc)).with_traceback(
2690
+ e.orig_exc.__traceback__
2691
+ )
2692
+ raise e.orig_exc
2693
+
2694
+
2695
+ def parse_text_raw(text: str, environment: Optional[Environment] = None):
2696
+ PARSER.parse(text)
2697
+
2698
+
2699
+ ERROR_CODES: dict[int, str] = {
2700
+ # 100 code are SQL compatability errors
2701
+ 101: "Using FROM keyword? Trilogy does not have a FROM clause (Datasource resolution is automatic).",
2702
+ # 200 codes relate to required explicit syntax (we could loosen these?)
2703
+ 201: 'Missing alias? Alias must be specified with "AS" - e.g. `SELECT x+1 AS y`',
2704
+ 202: "Missing closing semicolon? Statements must be terminated with a semicolon `;`.",
2705
+ 210: "Missing order direction? Order by must be explicit about direction - specify `asc` or `desc`.",
2706
+ }
2707
+
2708
+ DEFAULT_ERROR_SPAN: int = 30
2709
+
2710
+
2711
+ def inject_context_maker(pos: int, text: str, span: int = 40) -> str:
2712
+ """Returns a pretty string pinpointing the error in the text,
2713
+ with span amount of context characters around it.
2714
+
2715
+ Note:
2716
+ The parser doesn't hold a copy of the text it has to parse,
2717
+ so you have to provide it again
2718
+ """
2719
+
2720
+ start = max(pos - span, 0)
2721
+ end = pos + span
2722
+ if not isinstance(text, bytes):
2723
+
2724
+ before = text[start:pos].rsplit("\n", 1)[-1]
2725
+ after = text[pos:end].split("\n", 1)[0]
2726
+ rcap = ""
2727
+ # if it goes beyond the end of text, no ...
2728
+ # if it terminates on a space, no need for ...
2729
+ if after and not after[-1].isspace() and not (end > len(text)):
2730
+ rcap = "..."
2731
+ lcap = ""
2732
+ if start > 0 and not before[0].isspace():
2733
+ lcap = "..."
2734
+ lpad = " "
2735
+ rpad = " "
2736
+ if before.endswith(" "):
2737
+ lpad = ""
2738
+ if after.startswith(" "):
2739
+ rpad = ""
2740
+ return f"{lcap}{before}{lpad}???{rpad}{after}{rcap}"
2741
+ else:
2742
+ before = text[start:pos].rsplit(b"\n", 1)[-1]
2743
+ after = text[pos:end].split(b"\n", 1)[0]
2744
+ return (before + b" ??? " + after).decode("ascii", "backslashreplace")
2745
+
2746
+
2747
+ def parse_text(
2748
+ text: str,
2749
+ environment: Optional[Environment] = None,
2750
+ root: Path | None = None,
2751
+ parse_config: Parsing | None = None,
2752
+ ) -> Tuple[
2753
+ Environment,
2754
+ List[
2755
+ Datasource
2756
+ | ImportStatement
2757
+ | SelectStatement
2758
+ | PersistStatement
2759
+ | ShowStatement
2760
+ | RawSQLStatement
2761
+ | ValidateStatement
2762
+ | None
2763
+ ],
2764
+ ]:
2765
+ def _create_syntax_error(code: int, pos: int, text: str) -> InvalidSyntaxException:
2766
+ """Helper to create standardized syntax error with context."""
2767
+ return InvalidSyntaxException(
2768
+ f"Syntax [{code}]: "
2769
+ + ERROR_CODES[code]
2770
+ + "\nLocation:\n"
2771
+ + inject_context_maker(pos, text.replace("\n", " "), DEFAULT_ERROR_SPAN)
2772
+ )
2773
+
2774
+ def _create_generic_syntax_error(
2775
+ message: str, pos: int, text: str
2776
+ ) -> InvalidSyntaxException:
2777
+ """Helper to create generic syntax error with context."""
2778
+ return InvalidSyntaxException(
2779
+ message
2780
+ + "\nLocation:\n"
2781
+ + inject_context_maker(pos, text.replace("\n", " "), DEFAULT_ERROR_SPAN)
2782
+ )
2783
+
2784
+ def _handle_unexpected_token(e: UnexpectedToken, text: str) -> None:
2785
+ """Handle UnexpectedToken errors to make friendlier error messages."""
2786
+ # Handle ordering direction error
2787
+ pos = e.pos_in_stream or 0
2788
+ if e.interactive_parser.lexer_thread.state:
2789
+ last_token = e.interactive_parser.lexer_thread.state.last_token
2790
+ else:
2791
+ last_token = None
2792
+ if e.expected == {"ORDERING_DIRECTION"}:
2793
+ raise _create_syntax_error(210, pos, text)
2794
+
2795
+ # Handle FROM token error
2796
+ parsed_tokens = (
2797
+ [x.value for x in e.token_history if x] if e.token_history else []
2798
+ )
2799
+
2800
+ if parsed_tokens == ["FROM"]:
2801
+ raise _create_syntax_error(101, pos, text)
2802
+ # check if they are missing a semicolon
2803
+ if last_token and e.token.type == "$END":
2804
+ try:
2805
+
2806
+ e.interactive_parser.feed_token(Token("_TERMINATOR", ";"))
2807
+ state = e.interactive_parser.lexer_thread.state
2808
+ if state and state.last_token:
2809
+ new_pos = state.last_token.end_pos or pos
2810
+ else:
2811
+ new_pos = pos
2812
+ raise _create_syntax_error(202, new_pos, text)
2813
+ except UnexpectedToken:
2814
+ pass
2815
+ # check if they forgot an as
2816
+ try:
2817
+ e.interactive_parser.feed_token(Token("AS", "AS"))
2818
+ state = e.interactive_parser.lexer_thread.state
2819
+ if state and state.last_token:
2820
+ new_pos = state.last_token.end_pos or pos
2821
+ else:
2822
+ new_pos = pos
2823
+ e.interactive_parser.feed_token(Token("IDENTIFIER", e.token.value))
2824
+ raise _create_syntax_error(201, new_pos, text)
2825
+ except UnexpectedToken:
2826
+ pass
2827
+
2828
+ # Default UnexpectedToken handling
2829
+ raise _create_generic_syntax_error(str(e), pos, text)
2830
+
2831
+ environment = environment or (
2832
+ Environment(working_path=root) if root else Environment()
2833
+ )
2834
+ parser = ParseToObjects(
2835
+ environment=environment, import_keys=["root"], parse_config=parse_config
2836
+ )
2837
+ start = datetime.now()
2838
+
2839
+ try:
2840
+ parser.set_text(text)
2841
+ # disable fail on missing to allow for circular dependencies
2842
+ parser.prepare_parse()
2843
+ parser.transform(PARSER.parse(text))
2844
+ # this will reset fail on missing
2845
+ pass_two = parser.run_second_parse_pass()
2846
+ output = [v for v in pass_two if v]
2847
+ environment.concepts.fail_on_missing = True
2848
+ end = datetime.now()
2849
+ perf_logger.debug(
2850
+ f"Parse time: {end - start} for {len(text)} characters, {len(output)} objects"
2851
+ )
2852
+ except VisitError as e:
2853
+ unpack_visit_error(e, text)
2854
+ # this will never be reached
2855
+ raise e
2856
+ except UnexpectedToken as e:
2857
+ _handle_unexpected_token(e, text)
2858
+ except (UnexpectedCharacters, UnexpectedEOF, UnexpectedInput) as e:
2859
+ raise _create_generic_syntax_error(str(e), e.pos_in_stream or 0, text)
2860
+ except (ValidationError, TypeError) as e:
2861
+ raise InvalidSyntaxException(str(e))
2862
+
2863
+ return environment, output