pytrilogy 0.0.1.102__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (77) hide show
  1. pytrilogy-0.0.1.102.dist-info/LICENSE.md +19 -0
  2. pytrilogy-0.0.1.102.dist-info/METADATA +277 -0
  3. pytrilogy-0.0.1.102.dist-info/RECORD +77 -0
  4. pytrilogy-0.0.1.102.dist-info/WHEEL +5 -0
  5. pytrilogy-0.0.1.102.dist-info/entry_points.txt +2 -0
  6. pytrilogy-0.0.1.102.dist-info/top_level.txt +1 -0
  7. trilogy/__init__.py +8 -0
  8. trilogy/compiler.py +0 -0
  9. trilogy/constants.py +30 -0
  10. trilogy/core/__init__.py +0 -0
  11. trilogy/core/constants.py +3 -0
  12. trilogy/core/enums.py +270 -0
  13. trilogy/core/env_processor.py +33 -0
  14. trilogy/core/environment_helpers.py +156 -0
  15. trilogy/core/ergonomics.py +187 -0
  16. trilogy/core/exceptions.py +23 -0
  17. trilogy/core/functions.py +320 -0
  18. trilogy/core/graph_models.py +55 -0
  19. trilogy/core/internal.py +37 -0
  20. trilogy/core/models.py +3145 -0
  21. trilogy/core/processing/__init__.py +0 -0
  22. trilogy/core/processing/concept_strategies_v3.py +603 -0
  23. trilogy/core/processing/graph_utils.py +44 -0
  24. trilogy/core/processing/node_generators/__init__.py +25 -0
  25. trilogy/core/processing/node_generators/basic_node.py +71 -0
  26. trilogy/core/processing/node_generators/common.py +239 -0
  27. trilogy/core/processing/node_generators/concept_merge.py +152 -0
  28. trilogy/core/processing/node_generators/filter_node.py +83 -0
  29. trilogy/core/processing/node_generators/group_node.py +92 -0
  30. trilogy/core/processing/node_generators/group_to_node.py +99 -0
  31. trilogy/core/processing/node_generators/merge_node.py +148 -0
  32. trilogy/core/processing/node_generators/multiselect_node.py +189 -0
  33. trilogy/core/processing/node_generators/rowset_node.py +130 -0
  34. trilogy/core/processing/node_generators/select_node.py +328 -0
  35. trilogy/core/processing/node_generators/unnest_node.py +37 -0
  36. trilogy/core/processing/node_generators/window_node.py +85 -0
  37. trilogy/core/processing/nodes/__init__.py +76 -0
  38. trilogy/core/processing/nodes/base_node.py +251 -0
  39. trilogy/core/processing/nodes/filter_node.py +49 -0
  40. trilogy/core/processing/nodes/group_node.py +110 -0
  41. trilogy/core/processing/nodes/merge_node.py +326 -0
  42. trilogy/core/processing/nodes/select_node_v2.py +198 -0
  43. trilogy/core/processing/nodes/unnest_node.py +54 -0
  44. trilogy/core/processing/nodes/window_node.py +34 -0
  45. trilogy/core/processing/utility.py +278 -0
  46. trilogy/core/query_processor.py +331 -0
  47. trilogy/dialect/__init__.py +0 -0
  48. trilogy/dialect/base.py +679 -0
  49. trilogy/dialect/bigquery.py +80 -0
  50. trilogy/dialect/common.py +43 -0
  51. trilogy/dialect/config.py +55 -0
  52. trilogy/dialect/duckdb.py +83 -0
  53. trilogy/dialect/enums.py +95 -0
  54. trilogy/dialect/postgres.py +86 -0
  55. trilogy/dialect/presto.py +82 -0
  56. trilogy/dialect/snowflake.py +82 -0
  57. trilogy/dialect/sql_server.py +89 -0
  58. trilogy/docs/__init__.py +0 -0
  59. trilogy/engine.py +48 -0
  60. trilogy/executor.py +242 -0
  61. trilogy/hooks/__init__.py +0 -0
  62. trilogy/hooks/base_hook.py +37 -0
  63. trilogy/hooks/graph_hook.py +24 -0
  64. trilogy/hooks/query_debugger.py +133 -0
  65. trilogy/metadata/__init__.py +0 -0
  66. trilogy/parser.py +10 -0
  67. trilogy/parsing/__init__.py +0 -0
  68. trilogy/parsing/common.py +176 -0
  69. trilogy/parsing/config.py +5 -0
  70. trilogy/parsing/exceptions.py +2 -0
  71. trilogy/parsing/helpers.py +1 -0
  72. trilogy/parsing/parse_engine.py +1951 -0
  73. trilogy/parsing/render.py +483 -0
  74. trilogy/py.typed +0 -0
  75. trilogy/scripts/__init__.py +0 -0
  76. trilogy/scripts/trilogy.py +127 -0
  77. trilogy/utility.py +31 -0
@@ -0,0 +1,1951 @@
1
+ from os.path import dirname, join
2
+ from typing import List, Optional, Tuple, Union
3
+ from re import IGNORECASE
4
+ from lark import Lark, Transformer, v_args
5
+ from lark.exceptions import (
6
+ UnexpectedCharacters,
7
+ UnexpectedEOF,
8
+ UnexpectedInput,
9
+ UnexpectedToken,
10
+ VisitError,
11
+ )
12
+ from lark.tree import Meta
13
+ from pydantic import ValidationError
14
+ from trilogy.core.internal import INTERNAL_NAMESPACE, ALL_ROWS_CONCEPT
15
+ from trilogy.constants import (
16
+ DEFAULT_NAMESPACE,
17
+ NULL_VALUE,
18
+ VIRTUAL_CONCEPT_PREFIX,
19
+ )
20
+ from trilogy.core.enums import (
21
+ BooleanOperator,
22
+ ComparisonOperator,
23
+ FunctionType,
24
+ InfiniteFunctionArgs,
25
+ FunctionClass,
26
+ Modifier,
27
+ Ordering,
28
+ Purpose,
29
+ WindowOrder,
30
+ WindowType,
31
+ DatePart,
32
+ ShowCategory,
33
+ )
34
+ from trilogy.core.exceptions import InvalidSyntaxException, UndefinedConceptException
35
+ from trilogy.core.functions import (
36
+ Count,
37
+ CountDistinct,
38
+ Group,
39
+ Max,
40
+ Min,
41
+ Split,
42
+ IndexAccess,
43
+ AttrAccess,
44
+ Abs,
45
+ Unnest,
46
+ Coalesce,
47
+ function_args_to_output_purpose,
48
+ CurrentDate,
49
+ CurrentDatetime,
50
+ IsNull,
51
+ SubString,
52
+ StrPos,
53
+ )
54
+ from trilogy.core.models import (
55
+ Address,
56
+ AlignClause,
57
+ AlignItem,
58
+ AggregateWrapper,
59
+ CaseElse,
60
+ CaseWhen,
61
+ ColumnAssignment,
62
+ Comment,
63
+ Comparison,
64
+ Concept,
65
+ ConceptTransform,
66
+ Conditional,
67
+ Datasource,
68
+ MergeStatement,
69
+ Environment,
70
+ FilterItem,
71
+ Function,
72
+ Grain,
73
+ ImportStatement,
74
+ Limit,
75
+ Metadata,
76
+ MultiSelectStatement,
77
+ OrderBy,
78
+ OrderItem,
79
+ Parenthetical,
80
+ PersistStatement,
81
+ Query,
82
+ SelectStatement,
83
+ SelectItem,
84
+ WhereClause,
85
+ Window,
86
+ WindowItem,
87
+ WindowItemOrder,
88
+ WindowItemOver,
89
+ RawColumnExpr,
90
+ arg_to_datatype,
91
+ ListWrapper,
92
+ MapType,
93
+ ShowStatement,
94
+ DataType,
95
+ StructType,
96
+ ListType,
97
+ ConceptDeclarationStatement,
98
+ ConceptDerivation,
99
+ RowsetDerivationStatement,
100
+ LooseConceptList,
101
+ )
102
+ from trilogy.parsing.exceptions import ParseError
103
+ from trilogy.utility import string_to_hash
104
+ from trilogy.parsing.common import (
105
+ agg_wrapper_to_concept,
106
+ window_item_to_concept,
107
+ function_to_concept,
108
+ filter_item_to_concept,
109
+ constant_to_concept,
110
+ )
111
+
112
+ CONSTANT_TYPES = (int, float, str, bool, ListWrapper)
113
+
114
+ grammar = r"""
115
+ !start: ( block | show_statement | comment )*
116
+ block: statement _TERMINATOR comment?
117
+ ?statement: concept
118
+ | datasource
119
+ | function
120
+ | multi_select_statement
121
+ | select_statement
122
+ | persist_statement
123
+ | rowset_derivation_statement
124
+ | import_statement
125
+ | merge_statement
126
+
127
+ _TERMINATOR: ";"i /\s*/
128
+
129
+ comment: /#.*(\n|$)/ | /\/\/.*\n/
130
+
131
+ // property display_name string
132
+ concept_declaration: PURPOSE IDENTIFIER data_type concept_nullable_modifier? metadata?
133
+ //customer_id.property first_name STRING;
134
+ //<customer_id,country>.property local_alias STRING
135
+ concept_property_declaration: PROPERTY (prop_ident | IDENTIFIER) data_type concept_nullable_modifier? metadata?
136
+ //metric post_length <- len(post_text);
137
+ concept_derivation: (PURPOSE | AUTO | PROPERTY ) IDENTIFIER "<" "-" expr
138
+
139
+ rowset_derivation_statement: ("rowset"i IDENTIFIER "<" "-" (multi_select_statement | select_statement)) | ("with"i IDENTIFIER "as"i (multi_select_statement | select_statement))
140
+
141
+ constant_derivation: CONST IDENTIFIER "<" "-" literal
142
+ concept_nullable_modifier: "?"
143
+ concept: (concept_declaration | concept_derivation | concept_property_declaration | constant_derivation)
144
+
145
+ //concept property
146
+ prop_ident: "<" (IDENTIFIER ",")* IDENTIFIER ","? ">" "." IDENTIFIER
147
+
148
+ // datasource concepts
149
+ datasource: "datasource" IDENTIFIER "(" column_assignment_list ")" grain_clause? (address | query)
150
+
151
+ grain_clause: "grain" "(" column_list ")"
152
+
153
+ address: "address" ADDRESS
154
+
155
+ query: "query" MULTILINE_STRING
156
+
157
+ concept_assignment: IDENTIFIER | (MODIFIER "[" concept_assignment "]" ) | (SHORTHAND_MODIFIER concept_assignment )
158
+
159
+ column_assignment: ((IDENTIFIER | raw_column_assignment | _static_functions ) ":" concept_assignment)
160
+
161
+ raw_column_assignment: "raw" "(" MULTILINE_STRING ")"
162
+
163
+ column_assignment_list : (column_assignment "," )* column_assignment ","?
164
+
165
+ column_list : (IDENTIFIER "," )* IDENTIFIER ","?
166
+
167
+ import_statement: "import" (IDENTIFIER ".") * IDENTIFIER "as" IDENTIFIER
168
+
169
+ // persist_statement
170
+ persist_statement: "persist"i IDENTIFIER "into"i IDENTIFIER "from"i select_statement grain_clause?
171
+
172
+ // select statement
173
+ select_statement: "select"i select_list where? comment* order_by? comment* limit? comment*
174
+
175
+ // multiple_selects
176
+ multi_select_statement: select_statement ("merge" select_statement)+ "align"i align_clause where? comment* order_by? comment* limit? comment*
177
+
178
+
179
+ align_item: IDENTIFIER ":" IDENTIFIER ("," IDENTIFIER)* ","?
180
+
181
+ align_clause: align_item ("," align_item)* ","?
182
+
183
+ // merge statemment
184
+
185
+ merge_statement: "merge" IDENTIFIER ("," IDENTIFIER)* ","? comment*
186
+
187
+ // FUNCTION blocks
188
+ function: raw_function
189
+ function_binding_item: IDENTIFIER data_type
190
+ function_binding_list: (function_binding_item ",")* function_binding_item ","?
191
+ raw_function: "def" "rawsql" IDENTIFIER "(" function_binding_list ")" "-" ">" data_type "as"i MULTILINE_STRING
192
+
193
+
194
+ // user_id where state = Mexico
195
+ filter_item: "filter"i IDENTIFIER where
196
+
197
+ // rank/lag/lead
198
+ WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i) /[\s]+/
199
+
200
+ window_item: WINDOW_TYPE (IDENTIFIER | select_transform | comment+ ) window_item_over? window_item_order?
201
+
202
+ window_item_over: ("OVER"i over_list)
203
+
204
+ window_item_order: ("ORDER"i? "BY"i order_list)
205
+
206
+ select_hide_modifier: "--"
207
+ select_partial_modifier: "~"
208
+ select_item: (select_hide_modifier | select_partial_modifier)? (IDENTIFIER | select_transform | comment+ )
209
+
210
+ select_list: ( select_item "," )* select_item ","?
211
+
212
+ // count(post_id) -> post_count
213
+ _assignment: ("-" ">") | "as"
214
+ select_transform : expr _assignment IDENTIFIER metadata?
215
+
216
+ metadata: "metadata" "(" IDENTIFIER "=" _string_lit ")"
217
+
218
+ limit: "LIMIT"i /[0-9]+/
219
+
220
+ !window_order: ("TOP"i | "BOTTOM"i)
221
+
222
+ window: window_order /[0-9]+/
223
+
224
+ window_order_by: "BY"i column_list
225
+
226
+ order_list: (expr ORDERING "," )* expr ORDERING ","?
227
+
228
+ over_list: (IDENTIFIER "," )* IDENTIFIER ","?
229
+
230
+ ORDERING: ("ASC"i | "DESC"i)
231
+
232
+ order_by: "ORDER"i "BY"i order_list
233
+
234
+ //WHERE STATEMENT
235
+
236
+ LOGICAL_OPERATOR: "AND"i | "OR"i
237
+
238
+ conditional: expr LOGICAL_OPERATOR (conditional | expr)
239
+
240
+ where: "WHERE"i (expr | conditional)
241
+
242
+ expr_reference: IDENTIFIER
243
+
244
+ !array_comparison: ( ("NOT"i "IN"i) | "IN"i)
245
+
246
+ COMPARISON_OPERATOR: (/is[\s]+not/ | "is" |"=" | ">" | "<" | ">=" | "<=" | "!=" )
247
+
248
+ comparison: (expr COMPARISON_OPERATOR expr) | (expr array_comparison expr_tuple)
249
+
250
+ expr_tuple: "(" (expr ",")* expr ","? ")"
251
+
252
+ //unnesting is a function
253
+ unnest: "UNNEST"i "(" expr ")"
254
+ //indexing into an expression is a function
255
+ index_access: expr "[" int_lit "]"
256
+ attr_access: expr "[" _string_lit "]"
257
+
258
+ parenthetical: "(" (conditional | expr) ")"
259
+
260
+ expr: window_item | filter_item | comparison | fgroup | aggregate_functions | unnest | _string_functions | _math_functions | _generic_functions | _constant_functions| _date_functions | literal | expr_reference | index_access | attr_access | parenthetical
261
+
262
+ // functions
263
+
264
+ //math TODO: add syntactic sugar
265
+ fadd: ("add"i "(" expr "," expr ")" ) | ( expr "+" expr )
266
+ fsub: ("subtract"i "(" expr "," expr ")" ) | ( expr "-" expr )
267
+ fmul: ("multiply"i "(" expr "," expr ")" ) | ( expr "*" expr )
268
+ fdiv: ( "divide"i "(" expr "," expr ")") | ( expr "/" expr )
269
+ fmod: ( "mod"i "(" expr "," expr ")") | ( expr "%" expr )
270
+ fround: "round"i "(" expr "," expr ")"
271
+ fabs: "abs"i "(" expr ")"
272
+
273
+ _math_functions: fadd | fsub | fmul | fdiv | fround | fmod | fabs
274
+
275
+ //generic
276
+ fcast: "cast"i "(" expr "AS"i data_type ")"
277
+ concat: ("concat"i "(" (expr ",")* expr ")") | (expr "||" expr)
278
+ fcoalesce: "coalesce"i "(" (expr ",")* expr ")"
279
+ fcase_when: "WHEN"i (expr | conditional) "THEN"i expr
280
+ fcase_else: "ELSE"i expr
281
+ fcase: "CASE"i (fcase_when)* (fcase_else)? "END"i
282
+ len: "len"i "(" expr ")"
283
+ fnot: "NOT"i expr
284
+
285
+ _generic_functions: fcast | concat | fcoalesce | fcase | len | fnot
286
+
287
+ //constant
288
+ fcurrent_date: "current_date"i "(" ")"
289
+ fcurrent_datetime: "current_datetime"i "(" ")"
290
+
291
+ _constant_functions: fcurrent_date | fcurrent_datetime
292
+
293
+ //string
294
+ like: "like"i "(" expr "," _string_lit ")"
295
+ ilike: "ilike"i "(" expr "," _string_lit ")"
296
+ alt_like: expr "like"i expr
297
+ upper: "upper"i "(" expr ")"
298
+ lower: "lower"i "(" expr ")"
299
+ fsplit: "split"i "(" expr "," _string_lit ")"
300
+ fstrpos: "strpos"i "(" expr "," expr ")"
301
+ fsubstring: "substring"i "(" expr "," expr "," expr ")"
302
+
303
+ _string_functions: like | ilike | upper | lower | fsplit | fstrpos | fsubstring | alt_like
304
+
305
+ // special aggregate
306
+ fgroup: "group"i "(" expr ")" aggregate_over?
307
+ //aggregates
308
+ count: "count"i "(" expr ")"
309
+ count_distinct: "count_distinct"i "(" expr ")"
310
+ sum: "sum"i "(" expr ")"
311
+ avg: "avg"i "(" expr ")"
312
+ max: "max"i "(" expr ")"
313
+ min: "min"i "(" expr ")"
314
+
315
+ //aggregates can force a grain
316
+ aggregate_all: "*"
317
+ aggregate_over: ("BY"i (aggregate_all | over_list))
318
+ aggregate_functions: (count | count_distinct | sum | avg | max | min) aggregate_over?
319
+
320
+ // date functions
321
+ fdate: "date"i "(" expr ")"
322
+ fdatetime: "datetime"i "(" expr ")"
323
+ ftimestamp: "timestamp"i "(" expr ")"
324
+
325
+ fsecond: "second"i "(" expr ")"
326
+ fminute: "minute"i "(" expr ")"
327
+ fhour: "hour"i "(" expr ")"
328
+ fday: "day"i "(" expr ")"
329
+ fday_of_week: "day_of_week"i "(" expr ")"
330
+ fweek: "week"i "(" expr ")"
331
+ fmonth: "month"i "(" expr ")"
332
+ fquarter: "quarter"i "(" expr ")"
333
+ fyear: "year"i "(" expr ")"
334
+
335
+ DATE_PART: "DAY"i | "WEEK"i | "MONTH"i | "QUARTER"i | "YEAR"i | "MINUTE"i | "HOUR"i | "SECOND"i
336
+ fdate_trunc: "date_trunc"i "(" expr "," DATE_PART ")"
337
+ fdate_part: "date_part"i "(" expr "," DATE_PART ")"
338
+ fdate_add: "date_add"i "(" expr "," DATE_PART "," int_lit ")"
339
+ fdate_diff: "date_diff"i "(" expr "," expr "," DATE_PART ")"
340
+
341
+ _date_functions: fdate | fdate_add | fdate_diff | fdatetime | ftimestamp | fsecond | fminute | fhour | fday | fday_of_week | fweek | fmonth | fquarter | fyear | fdate_part | fdate_trunc
342
+
343
+ _static_functions: _string_functions | _math_functions | _generic_functions | _constant_functions| _date_functions
344
+
345
+ // base language constructs
346
+ IDENTIFIER: /[a-zA-Z_][a-zA-Z0-9_\\-\\.\-]*/
347
+ ADDRESS: /[a-zA-Z_][a-zA-Z0-9_\\-\\.\-\*]*/ | /`[a-zA-Z_][a-zA-Z0-9_\\-\\.\-\*]*`/
348
+
349
+ MULTILINE_STRING: /\'{3}(.*?)\'{3}/s
350
+
351
+ DOUBLE_STRING_CHARS: /(?:(?!\${)([^"\\]|\\.))+/+ // any character except "
352
+ SINGLE_STRING_CHARS: /(?:(?!\${)([^'\\]|\\.))+/+ // any character except '
353
+ _single_quote: "'" ( SINGLE_STRING_CHARS )* "'"
354
+ _double_quote: "\"" ( DOUBLE_STRING_CHARS )* "\""
355
+ _string_lit: _single_quote | _double_quote
356
+
357
+ MINUS: "-"
358
+
359
+ int_lit: MINUS? /[0-9]+/
360
+
361
+ float_lit: /[0-9]*\.[0-9]+/
362
+
363
+ array_lit: "[" (literal ",")* literal ","? "]"()
364
+
365
+ !bool_lit: "True"i | "False"i
366
+
367
+ !null_lit: "null"i
368
+
369
+ literal: _string_lit | int_lit | float_lit | bool_lit | null_lit | array_lit
370
+
371
+ MODIFIER: "Optional"i | "Partial"i | "Nullable"i
372
+
373
+ SHORTHAND_MODIFIER: "~"
374
+
375
+ struct_type: "struct" "<" ((data_type | IDENTIFIER) ",")* (data_type | IDENTIFIER) ","? ">"
376
+
377
+ list_type: "list" "<" data_type ">"
378
+
379
+
380
+ !data_type: "string"i | "number"i | "numeric"i | "map"i | "list"i | "array"i | "any"i | "int"i | "bigint" | "date"i | "datetime"i | "timestamp"i | "float"i | "bool"i | struct_type | list_type
381
+
382
+ PURPOSE: "key"i | "metric"i | "const"i | "constant"i
383
+ PROPERTY: "property"i
384
+ CONST: "const"i | "constant"i
385
+ AUTO: "AUTO"i
386
+
387
+ // meta functions
388
+ CONCEPTS: "CONCEPTS"i
389
+ DATASOURCES: "DATASOURCES"i
390
+
391
+ show_category: CONCEPTS | DATASOURCES
392
+
393
+ show_statement: "show"i ( show_category | select_statement | persist_statement) _TERMINATOR
394
+
395
+ %import common.WS_INLINE -> _WHITESPACE
396
+ %import common.WS
397
+ %ignore WS
398
+ """ # noqa: E501
399
+
400
+ PARSER = Lark(
401
+ grammar, start="start", propagate_positions=True, g_regex_flags=IGNORECASE
402
+ )
403
+
404
+
405
+ def parse_concept_reference(
406
+ name: str, environment: Environment, purpose: Optional[Purpose] = None
407
+ ) -> Tuple[str, str, str, str | None]:
408
+ parent = None
409
+ if "." in name:
410
+ if purpose == Purpose.PROPERTY:
411
+ parent, name = name.rsplit(".", 1)
412
+ namespace = environment.concepts[parent].namespace or DEFAULT_NAMESPACE
413
+ lookup = f"{namespace}.{name}"
414
+ else:
415
+ namespace, name = name.rsplit(".", 1)
416
+ lookup = f"{namespace}.{name}"
417
+ else:
418
+ namespace = environment.namespace or DEFAULT_NAMESPACE
419
+ lookup = name
420
+ return lookup, namespace, name, parent
421
+
422
+
423
+ def unwrap_transformation(
424
+ input: Union[
425
+ FilterItem,
426
+ WindowItem,
427
+ Concept,
428
+ Function,
429
+ AggregateWrapper,
430
+ int,
431
+ str,
432
+ float,
433
+ bool,
434
+ ]
435
+ ) -> Function | FilterItem | WindowItem | AggregateWrapper:
436
+ if isinstance(input, Function):
437
+ return input
438
+ elif isinstance(input, AggregateWrapper):
439
+ return input
440
+ elif isinstance(input, Concept):
441
+ return Function(
442
+ operator=FunctionType.ALIAS,
443
+ output_datatype=input.datatype,
444
+ output_purpose=input.purpose,
445
+ arguments=[input],
446
+ )
447
+ elif isinstance(input, FilterItem):
448
+ return input
449
+ elif isinstance(input, WindowItem):
450
+ return input
451
+ elif isinstance(input, Parenthetical):
452
+ return unwrap_transformation(input.content)
453
+ else:
454
+ return Function(
455
+ operator=FunctionType.CONSTANT,
456
+ output_datatype=arg_to_datatype(input),
457
+ output_purpose=Purpose.CONSTANT,
458
+ arguments=[input],
459
+ )
460
+
461
+
462
+ class ParseToObjects(Transformer):
463
+ def __init__(
464
+ self,
465
+ visit_tokens,
466
+ text,
467
+ environment: Environment,
468
+ parse_address: str | None = None,
469
+ parsed: dict | None = None,
470
+ ):
471
+ Transformer.__init__(self, visit_tokens)
472
+ self.text = text
473
+ self.environment: Environment = environment
474
+ self.imported: set[str] = set()
475
+ self.parse_address = parse_address or "root"
476
+ self.parsed: dict[str, ParseToObjects] = parsed if parsed else {}
477
+ # we do a second pass to pick up circular dependencies
478
+ # after initial parsing
479
+ self.pass_count = 1
480
+
481
+ def hydrate_missing(self):
482
+ self.pass_count = 2
483
+ for k, v in self.parsed.items():
484
+ if v.pass_count == 2:
485
+ continue
486
+ v.hydrate_missing()
487
+ self.environment.concepts.fail_on_missing = True
488
+ reparsed = self.transform(PARSER.parse(self.text))
489
+ self.environment.concepts.undefined = {}
490
+ return reparsed
491
+
492
+ def process_function_args(
493
+ self, args, meta: Meta, concept_arguments: Optional[LooseConceptList] = None
494
+ ):
495
+ final: List[Concept | Function] = []
496
+ for arg in args:
497
+ # if a function has an anonymous function argument
498
+ # create an implicit concept
499
+ while isinstance(arg, Parenthetical):
500
+ arg = arg.content
501
+ if isinstance(arg, Function):
502
+ # if it's not an aggregate function, we can skip the virtual concepts
503
+ # to simplify anonymous function handling
504
+ if arg.operator not in FunctionClass.AGGREGATE_FUNCTIONS.value:
505
+ final.append(arg)
506
+ continue
507
+ id_hash = string_to_hash(str(arg))
508
+ concept = function_to_concept(
509
+ arg,
510
+ name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
511
+ namespace=self.environment.namespace,
512
+ )
513
+ # to satisfy mypy, concept will always have metadata
514
+ if concept.metadata:
515
+ concept.metadata.line_number = meta.line
516
+ self.environment.add_concept(concept, meta=meta)
517
+ final.append(concept)
518
+ elif isinstance(arg, FilterItem):
519
+ id_hash = string_to_hash(str(arg))
520
+ concept = filter_item_to_concept(
521
+ arg,
522
+ name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
523
+ namespace=self.environment.namespace,
524
+ )
525
+ if concept.metadata:
526
+ concept.metadata.line_number = meta.line
527
+ self.environment.add_concept(concept, meta=meta)
528
+ final.append(concept)
529
+ elif isinstance(arg, WindowItem):
530
+ id_hash = string_to_hash(str(arg))
531
+ concept = window_item_to_concept(
532
+ arg,
533
+ namespace=self.environment.namespace,
534
+ name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
535
+ )
536
+ if concept.metadata:
537
+ concept.metadata.line_number = meta.line
538
+ self.environment.add_concept(concept, meta=meta)
539
+ final.append(concept)
540
+ elif isinstance(arg, AggregateWrapper):
541
+ id_hash = string_to_hash(str(arg))
542
+ concept = agg_wrapper_to_concept(
543
+ arg,
544
+ namespace=self.environment.namespace,
545
+ name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
546
+ )
547
+ if concept.metadata:
548
+ concept.metadata.line_number = meta.line
549
+ self.environment.add_concept(concept, meta=meta)
550
+ final.append(concept)
551
+ # we don't need virtual types for most constants
552
+ elif isinstance(arg, (ListWrapper)):
553
+ id_hash = string_to_hash(str(arg))
554
+ concept = constant_to_concept(
555
+ arg,
556
+ name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
557
+ namespace=self.environment.namespace,
558
+ )
559
+ if concept.metadata:
560
+ concept.metadata.line_number = meta.line
561
+ self.environment.add_concept(concept, meta=meta)
562
+ final.append(concept)
563
+ else:
564
+ final.append(arg)
565
+ return final
566
+
567
+ def start(self, args):
568
+ return args
569
+
570
+ def block(self, args):
571
+ output = args[0]
572
+ if isinstance(output, ConceptDeclarationStatement):
573
+ if len(args) > 1 and isinstance(args[1], Comment):
574
+ output.concept.metadata.description = (
575
+ output.concept.metadata.description
576
+ or args[1].text.split("#")[1].strip()
577
+ )
578
+
579
+ return args[0]
580
+
581
+ def metadata(self, args):
582
+ pairs = {key: val for key, val in zip(args[::2], args[1::2])}
583
+ return Metadata(**pairs)
584
+
585
+ def IDENTIFIER(self, args) -> str:
586
+ return args.value
587
+
588
+ def ADDRESS(self, args) -> str:
589
+ return args.value
590
+
591
+ def STRING_CHARS(self, args) -> str:
592
+ return args.value
593
+
594
+ def SINGLE_STRING_CHARS(self, args) -> str:
595
+ return args.value
596
+
597
+ def DOUBLE_STRING_CHARS(self, args) -> str:
598
+ return args.value
599
+
600
+ def MINUS(self, args) -> str:
601
+ return "-"
602
+
603
+ @v_args(meta=True)
604
+ def struct_type(self, meta: Meta, args) -> StructType:
605
+ final: list[DataType | MapType | ListType | StructType | Concept] = []
606
+ for arg in args:
607
+ if not isinstance(arg, (DataType, ListType, StructType)):
608
+ new = self.environment.concepts.__getitem__( # type: ignore
609
+ key=arg, line_no=meta.line
610
+ )
611
+ final.append(new)
612
+ else:
613
+ final.append(arg)
614
+ return StructType(fields=final)
615
+
616
+ def list_type(self, args) -> ListType:
617
+ return ListType(type=args[0])
618
+
619
+ def data_type(self, args) -> DataType | ListType | StructType:
620
+ resolved = args[0]
621
+ if isinstance(resolved, StructType):
622
+ return resolved
623
+ elif isinstance(resolved, ListType):
624
+ return resolved
625
+ return DataType(args[0].lower())
626
+
627
+ def array_comparison(self, args) -> ComparisonOperator:
628
+ return ComparisonOperator([x.value.lower() for x in args])
629
+
630
+ def COMPARISON_OPERATOR(self, args) -> ComparisonOperator:
631
+ return ComparisonOperator(args)
632
+
633
+ def LOGICAL_OPERATOR(self, args) -> BooleanOperator:
634
+ return BooleanOperator(args.lower())
635
+
636
+ def concept_assignment(self, args):
637
+ return args
638
+
639
+ @v_args(meta=True)
640
+ def column_assignment(self, meta: Meta, args):
641
+ # TODO -> deal with conceptual modifiers
642
+ modifiers = []
643
+ concept = args[1]
644
+ # recursively collect modifiers
645
+ while len(concept) > 1:
646
+ modifiers.append(concept[0])
647
+ concept = concept[1]
648
+ resolved = self.environment.concepts.__getitem__( # type: ignore
649
+ key=concept[0], line_no=meta.line
650
+ )
651
+ return ColumnAssignment(alias=args[0], modifiers=modifiers, concept=resolved)
652
+
653
+ def _TERMINATOR(self, args):
654
+ return None
655
+
656
+ def MODIFIER(self, args) -> Modifier:
657
+ return Modifier(args.value)
658
+
659
+ def SHORTHAND_MODIFIER(self, args) -> Modifier:
660
+ return Modifier(args.value)
661
+
662
+ def PURPOSE(self, args) -> Purpose:
663
+ return Purpose(args.value)
664
+
665
+ def AUTO(self, args) -> Purpose:
666
+ return Purpose.AUTO
667
+
668
+ def CONST(self, args) -> Purpose:
669
+ return Purpose.CONSTANT
670
+
671
+ def CONSTANT(self, args) -> Purpose:
672
+ return Purpose.CONSTANT
673
+
674
+ def PROPERTY(self, args):
675
+ return Purpose.PROPERTY
676
+
677
+ @v_args(meta=True)
678
+ def prop_ident(self, meta: Meta, args) -> Tuple[List[Concept], str]:
679
+ return [self.environment.concepts[grain] for grain in args[:-1]], args[-1]
680
+
681
+ @v_args(meta=True)
682
+ def concept_property_declaration(self, meta: Meta, args) -> Concept:
683
+
684
+ metadata = None
685
+ modifiers = []
686
+ for arg in args:
687
+ if isinstance(arg, Metadata):
688
+ metadata = arg
689
+ if isinstance(arg, Modifier):
690
+ modifiers.append(arg)
691
+
692
+ declaration = args[1]
693
+ if isinstance(declaration, (tuple)):
694
+ parents, name = declaration
695
+ if "." in name:
696
+ namespace, name = name.split(".", 1)
697
+ else:
698
+ namespace = self.environment.namespace or DEFAULT_NAMESPACE
699
+ else:
700
+ if "." not in declaration:
701
+ raise ParseError(
702
+ f"Property declaration {args[1]} must be fully qualified with a parent key"
703
+ )
704
+ grain, name = declaration.rsplit(".", 1)
705
+ parent = self.environment.concepts[grain]
706
+ parents = [parent]
707
+ namespace = parent.namespace
708
+ concept = Concept(
709
+ name=name,
710
+ datatype=args[2],
711
+ purpose=args[0],
712
+ metadata=metadata,
713
+ grain=Grain(components=parents),
714
+ namespace=namespace,
715
+ keys=parents,
716
+ modifiers=modifiers,
717
+ )
718
+ self.environment.add_concept(concept, meta)
719
+ return concept
720
+
721
+ @v_args(meta=True)
722
+ def concept_declaration(self, meta: Meta, args) -> ConceptDeclarationStatement:
723
+ metadata = None
724
+ modifiers = []
725
+ for arg in args:
726
+ if isinstance(arg, Metadata):
727
+ metadata = arg
728
+ if isinstance(arg, Modifier):
729
+ modifiers.append(arg)
730
+ name = args[1]
731
+ lookup, namespace, name, parent = parse_concept_reference(
732
+ name, self.environment
733
+ )
734
+ concept = Concept(
735
+ name=name,
736
+ datatype=args[2],
737
+ purpose=args[0],
738
+ metadata=metadata,
739
+ namespace=namespace,
740
+ modifiers=modifiers,
741
+ )
742
+ if concept.metadata:
743
+ concept.metadata.line_number = meta.line
744
+ self.environment.add_concept(concept, meta=meta)
745
+ return ConceptDeclarationStatement(concept=concept)
746
+
747
+ @v_args(meta=True)
748
+ def concept_derivation(self, meta: Meta, args) -> ConceptDerivation:
749
+
750
+ if len(args) > 3:
751
+ metadata = args[3]
752
+ else:
753
+ metadata = None
754
+ purpose = args[0]
755
+ if purpose == Purpose.AUTO:
756
+ purpose = None
757
+ name = args[1]
758
+ lookup, namespace, name, parent_concept = parse_concept_reference(
759
+ name, self.environment, purpose
760
+ )
761
+ source_value = args[2]
762
+ # we need to strip off every parenthetical to see what is being assigned.
763
+ while isinstance(source_value, Parenthetical):
764
+ source_value = source_value.content
765
+
766
+ if isinstance(source_value, FilterItem):
767
+ concept = filter_item_to_concept(
768
+ source_value,
769
+ name=name,
770
+ namespace=namespace,
771
+ purpose=purpose,
772
+ metadata=metadata,
773
+ )
774
+
775
+ if concept.metadata:
776
+ concept.metadata.line_number = meta.line
777
+ self.environment.add_concept(concept, meta=meta)
778
+ return ConceptDerivation(concept=concept)
779
+ elif isinstance(source_value, WindowItem):
780
+
781
+ concept = window_item_to_concept(
782
+ source_value,
783
+ name=name,
784
+ namespace=namespace,
785
+ purpose=purpose,
786
+ metadata=metadata,
787
+ )
788
+ if concept.metadata:
789
+ concept.metadata.line_number = meta.line
790
+ self.environment.add_concept(concept, meta=meta)
791
+ return ConceptDerivation(concept=concept)
792
+ elif isinstance(source_value, AggregateWrapper):
793
+ concept = agg_wrapper_to_concept(
794
+ source_value,
795
+ namespace=namespace,
796
+ name=name,
797
+ metadata=metadata,
798
+ purpose=purpose,
799
+ )
800
+ if concept.metadata:
801
+ concept.metadata.line_number = meta.line
802
+ self.environment.add_concept(concept, meta=meta)
803
+ return ConceptDerivation(concept=concept)
804
+ elif isinstance(source_value, CONSTANT_TYPES):
805
+ concept = constant_to_concept(
806
+ source_value,
807
+ name=name,
808
+ namespace=namespace,
809
+ purpose=purpose,
810
+ metadata=metadata,
811
+ )
812
+ if concept.metadata:
813
+ concept.metadata.line_number = meta.line
814
+ self.environment.add_concept(concept, meta=meta)
815
+ return ConceptDerivation(concept=concept)
816
+
817
+ elif isinstance(source_value, Function):
818
+ function: Function = source_value
819
+
820
+ concept = function_to_concept(
821
+ function,
822
+ name=name,
823
+ namespace=namespace,
824
+ )
825
+ if concept.metadata:
826
+ concept.metadata.line_number = meta.line
827
+ self.environment.add_concept(concept, meta=meta)
828
+ return ConceptDerivation(concept=concept)
829
+
830
+ raise SyntaxError(
831
+ f"Received invalid type {type(args[2])} {args[2]} as input to select"
832
+ " transform"
833
+ )
834
+
835
+ @v_args(meta=True)
836
+ def rowset_derivation_statement(
837
+ self, meta: Meta, args
838
+ ) -> RowsetDerivationStatement:
839
+ name = args[0]
840
+ select: SelectStatement | MultiSelectStatement = args[1]
841
+ output = RowsetDerivationStatement(
842
+ name=name,
843
+ select=select,
844
+ namespace=self.environment.namespace or DEFAULT_NAMESPACE,
845
+ )
846
+ for new_concept in output.derived_concepts:
847
+ if new_concept.metadata:
848
+ new_concept.metadata.line_number = meta.line
849
+ self.environment.add_concept(new_concept)
850
+
851
+ return output
852
+
853
+ @v_args(meta=True)
854
+ def constant_derivation(self, meta: Meta, args) -> Concept:
855
+ if len(args) > 3:
856
+ metadata = args[3]
857
+ else:
858
+ metadata = None
859
+ name = args[1]
860
+ constant: Union[str, float, int, bool] = args[2]
861
+ lookup, namespace, name, parent = parse_concept_reference(
862
+ name, self.environment
863
+ )
864
+ concept = Concept(
865
+ name=name,
866
+ datatype=arg_to_datatype(constant),
867
+ purpose=Purpose.CONSTANT,
868
+ metadata=metadata,
869
+ lineage=Function(
870
+ operator=FunctionType.CONSTANT,
871
+ output_datatype=arg_to_datatype(constant),
872
+ output_purpose=Purpose.CONSTANT,
873
+ arguments=[constant],
874
+ ),
875
+ grain=Grain(components=[]),
876
+ namespace=namespace,
877
+ )
878
+ if concept.metadata:
879
+ concept.metadata.line_number = meta.line
880
+ self.environment.add_concept(concept, meta)
881
+ return concept
882
+
883
+ @v_args(meta=True)
884
+ def concept(self, meta: Meta, args) -> ConceptDeclarationStatement:
885
+
886
+ if isinstance(args[0], Concept):
887
+ concept: Concept = args[0]
888
+ else:
889
+ concept = args[0].concept
890
+ if concept.metadata:
891
+ concept.metadata.line_number = meta.line
892
+ return ConceptDeclarationStatement(concept=concept)
893
+
894
+ def column_assignment_list(self, args):
895
+ return args
896
+
897
+ def column_list(self, args) -> List:
898
+ return args
899
+
900
+ def grain_clause(self, args) -> Grain:
901
+ # namespace=self.environment.namespace,
902
+ return Grain(components=[self.environment.concepts[a] for a in args[0]])
903
+
904
+ def raw_column_assignment(self, args):
905
+ return RawColumnExpr(text=args[0][3:-3])
906
+
907
+ @v_args(meta=True)
908
+ def datasource(self, meta: Meta, args):
909
+ name = args[0]
910
+ columns: List[ColumnAssignment] = args[1]
911
+ grain: Optional[Grain] = None
912
+ address: Optional[Address] = None
913
+ for val in args[1:]:
914
+ if isinstance(val, Address):
915
+ address = val
916
+ elif isinstance(val, Grain):
917
+ grain = val
918
+ elif isinstance(val, Query):
919
+ address = Address(location=f"({val.text})")
920
+ if not address:
921
+ raise ValueError(
922
+ "Malformed datasource, missing address or query declaration"
923
+ )
924
+ datasource = Datasource(
925
+ identifier=name,
926
+ columns=columns,
927
+ # grain will be set by default from args
928
+ # TODO: move to factory
929
+ grain=grain, # type: ignore
930
+ address=address,
931
+ namespace=self.environment.namespace,
932
+ )
933
+ for column in columns:
934
+ column.concept = column.concept.with_grain(datasource.grain)
935
+ self.environment.datasources[datasource.identifier] = datasource
936
+ return datasource
937
+
938
+ @v_args(meta=True)
939
+ def comment(self, meta: Meta, args):
940
+ assert len(args) == 1
941
+ return Comment(text=args[0].value)
942
+
943
+ @v_args(meta=True)
944
+ def select_transform(self, meta, args) -> ConceptTransform:
945
+
946
+ output: str = args[1]
947
+ function = unwrap_transformation(args[0])
948
+ lookup, namespace, output, parent = parse_concept_reference(
949
+ output, self.environment
950
+ )
951
+
952
+ if isinstance(function, AggregateWrapper):
953
+ concept = agg_wrapper_to_concept(function, namespace=namespace, name=output)
954
+ elif isinstance(function, WindowItem):
955
+ concept = window_item_to_concept(function, namespace=namespace, name=output)
956
+ elif isinstance(function, FilterItem):
957
+ concept = filter_item_to_concept(function, namespace=namespace, name=output)
958
+ elif isinstance(function, CONSTANT_TYPES):
959
+ concept = constant_to_concept(function, namespace=namespace, name=output)
960
+ elif isinstance(function, Function):
961
+ concept = function_to_concept(function, namespace=namespace, name=output)
962
+ else:
963
+ if function.output_purpose == Purpose.PROPERTY:
964
+ pkeys = [x for x in function.arguments if isinstance(x, Concept)]
965
+ grain = Grain(components=pkeys)
966
+ keys = tuple(grain.components_copy)
967
+ else:
968
+ grain = None
969
+ keys = None
970
+ concept = Concept(
971
+ name=output,
972
+ datatype=function.output_datatype,
973
+ purpose=function.output_purpose,
974
+ lineage=function,
975
+ namespace=namespace,
976
+ grain=Grain(components=[]) if not grain else grain,
977
+ keys=keys,
978
+ )
979
+ if concept.metadata:
980
+ concept.metadata.line_number = meta.line
981
+ self.environment.add_concept(concept, meta=meta)
982
+ return ConceptTransform(function=function, output=concept)
983
+
984
+ @v_args(meta=True)
985
+ def concept_nullable_modifier(self, meta: Meta, args) -> Modifier:
986
+ return Modifier.NULLABLE
987
+
988
+ @v_args(meta=True)
989
+ def select_hide_modifier(self, meta: Meta, args) -> Modifier:
990
+ return Modifier.HIDDEN
991
+
992
+ @v_args(meta=True)
993
+ def select_partial_modifier(self, meta: Meta, args) -> Modifier:
994
+ return Modifier.PARTIAL
995
+
996
+ @v_args(meta=True)
997
+ def select_item(self, meta: Meta, args) -> Optional[SelectItem]:
998
+ modifiers = [arg for arg in args if isinstance(arg, Modifier)]
999
+ args = [arg for arg in args if not isinstance(arg, (Modifier, Comment))]
1000
+
1001
+ if not args:
1002
+ return None
1003
+ if len(args) != 1:
1004
+ raise ParseError(
1005
+ "Malformed select statement"
1006
+ f" {args} {self.text[meta.start_pos:meta.end_pos]}"
1007
+ )
1008
+ content = args[0]
1009
+ if isinstance(content, ConceptTransform):
1010
+ return SelectItem(content=content, modifiers=modifiers)
1011
+ return SelectItem(
1012
+ content=self.environment.concepts.__getitem__(content, meta.line),
1013
+ modifiers=modifiers,
1014
+ )
1015
+
1016
+ def select_list(self, args):
1017
+ return [arg for arg in args if arg]
1018
+
1019
+ def limit(self, args):
1020
+ return Limit(count=int(args[0].value))
1021
+
1022
+ def ORDERING(self, args):
1023
+ return Ordering(args.lower())
1024
+
1025
+ def order_list(self, args):
1026
+ return [OrderItem(expr=x, order=y) for x, y in zip(args[::2], args[1::2])]
1027
+
1028
+ def order_by(self, args):
1029
+ return OrderBy(items=args[0])
1030
+
1031
+ def over_list(self, args):
1032
+ return [self.environment.concepts[x] for x in args]
1033
+
1034
+ @v_args(meta=True)
1035
+ def merge_statement(self, meta: Meta, args) -> MergeStatement:
1036
+
1037
+ parsed = [self.environment.concepts[x] for x in args]
1038
+ datatypes = {x.datatype for x in parsed}
1039
+ if not len(datatypes) == 1:
1040
+ raise SyntaxError(
1041
+ f"Cannot merge concepts with different datatypes {datatypes}"
1042
+ f"line: {meta.line} concepts: {[x.address for x in parsed]}"
1043
+ )
1044
+ merge = MergeStatement(concepts=parsed, datatype=datatypes.pop())
1045
+ new = merge.merge_concept
1046
+ self.environment.add_concept(new, meta=meta)
1047
+ return merge
1048
+
1049
+ def import_statement(self, args: list[str]):
1050
+ alias = args[-1]
1051
+ path = args[0].split(".")
1052
+
1053
+ target = join(self.environment.working_path, *path) + ".preql"
1054
+ self.imported.add(target)
1055
+ if target in self.parsed:
1056
+ nparser = self.parsed[target]
1057
+ else:
1058
+ try:
1059
+ with open(target, "r", encoding="utf-8") as f:
1060
+ text = f.read()
1061
+ nparser = ParseToObjects(
1062
+ visit_tokens=True,
1063
+ text=text,
1064
+ environment=Environment(
1065
+ working_path=dirname(target),
1066
+ # namespace=alias,
1067
+ ),
1068
+ parse_address=target,
1069
+ parsed={**self.parsed, **{self.parse_address: self}},
1070
+ )
1071
+ nparser.transform(PARSER.parse(text))
1072
+ self.parsed[target] = nparser
1073
+ except Exception as e:
1074
+ raise ImportError(
1075
+ f"Unable to import file {dirname(target)}, parsing error: {e}"
1076
+ )
1077
+
1078
+ for key, concept in nparser.environment.concepts.items():
1079
+ # self.environment.concepts[f"{alias}.{key}"] = concept.with_namespace(new_namespace)
1080
+ self.environment.add_concept(concept.with_namespace(alias))
1081
+
1082
+ for key, datasource in nparser.environment.datasources.items():
1083
+ self.environment.add_datasource(datasource.with_namespace(alias))
1084
+ # self.environment.datasources[f"{alias}.{key}"] = datasource.with_namespace(new_namespace)
1085
+
1086
+ self.environment.imports[alias] = ImportStatement(alias=alias, path=args[0])
1087
+ return None
1088
+
1089
+ @v_args(meta=True)
1090
+ def show_category(self, meta: Meta, args) -> ShowCategory:
1091
+ return ShowCategory(args[0])
1092
+
1093
+ @v_args(meta=True)
1094
+ def show_statement(self, meta: Meta, args) -> ShowStatement:
1095
+ return ShowStatement(content=args[0])
1096
+
1097
+ @v_args(meta=True)
1098
+ def persist_statement(self, meta: Meta, args) -> PersistStatement:
1099
+ identifier: str = args[0]
1100
+ address: str = args[1]
1101
+ select: SelectStatement = args[2]
1102
+ if len(args) > 3:
1103
+ grain: Grain | None = args[3]
1104
+ else:
1105
+ grain = None
1106
+ new_datasource = select.to_datasource(
1107
+ namespace=(
1108
+ self.environment.namespace
1109
+ if self.environment.namespace
1110
+ else DEFAULT_NAMESPACE
1111
+ ),
1112
+ identifier=identifier,
1113
+ address=Address(location=address),
1114
+ grain=grain,
1115
+ )
1116
+ return PersistStatement(select=select, datasource=new_datasource)
1117
+
1118
+ @v_args(meta=True)
1119
+ def align_item(self, meta: Meta, args) -> AlignItem:
1120
+ return AlignItem(
1121
+ alias=args[0],
1122
+ namespace=self.environment.namespace,
1123
+ concepts=[self.environment.concepts[arg] for arg in args[1:]],
1124
+ )
1125
+
1126
+ @v_args(meta=True)
1127
+ def align_clause(self, meta: Meta, args) -> AlignClause:
1128
+ return AlignClause(items=args)
1129
+
1130
+ @v_args(meta=True)
1131
+ def multi_select_statement(self, meta: Meta, args) -> MultiSelectStatement:
1132
+ selects = []
1133
+ align: AlignClause | None = None
1134
+ limit: int | None = None
1135
+ order_by: OrderBy | None = None
1136
+ where: WhereClause | None = None
1137
+ for arg in args:
1138
+ if isinstance(arg, SelectStatement):
1139
+ selects.append(arg)
1140
+ elif isinstance(arg, Limit):
1141
+ limit = arg.count
1142
+ elif isinstance(arg, OrderBy):
1143
+ order_by = arg
1144
+ elif isinstance(arg, WhereClause):
1145
+ where = arg
1146
+ elif isinstance(arg, AlignClause):
1147
+ align = arg
1148
+
1149
+ assert align
1150
+ assert align is not None
1151
+ multi = MultiSelectStatement(
1152
+ selects=selects,
1153
+ align=align,
1154
+ namespace=self.environment.namespace,
1155
+ where_clause=where,
1156
+ order_by=order_by,
1157
+ limit=limit,
1158
+ )
1159
+ for concept in multi.derived_concepts:
1160
+ self.environment.add_concept(concept, meta=meta)
1161
+ return multi
1162
+
1163
+ @v_args(meta=True)
1164
+ def select_statement(self, meta: Meta, args) -> SelectStatement:
1165
+ select_items = None
1166
+ limit = None
1167
+ order_by = None
1168
+ where = None
1169
+ for arg in args:
1170
+ if isinstance(arg, List):
1171
+ select_items = arg
1172
+ elif isinstance(arg, Limit):
1173
+ limit = arg.count
1174
+ elif isinstance(arg, OrderBy):
1175
+ order_by = arg
1176
+ elif isinstance(arg, WhereClause):
1177
+ where = arg
1178
+ if not select_items:
1179
+ raise ValueError("Malformed select, missing select items")
1180
+ output = SelectStatement(
1181
+ selection=select_items, where_clause=where, limit=limit, order_by=order_by
1182
+ )
1183
+ for item in select_items:
1184
+ # we don't know the grain of an aggregate at assignment time
1185
+ # so rebuild at this point in the tree
1186
+ # TODO: simplify
1187
+ if isinstance(item.content, ConceptTransform):
1188
+ new_concept = item.content.output.with_select_grain(output.grain)
1189
+ self.environment.add_concept(new_concept, meta=meta)
1190
+ item.content.output = new_concept
1191
+ if order_by:
1192
+ for item in order_by.items:
1193
+ if (
1194
+ isinstance(item.expr, Concept)
1195
+ and item.expr.purpose == Purpose.METRIC
1196
+ ):
1197
+ item.expr = item.expr.with_grain(output.grain)
1198
+ return output
1199
+
1200
+ @v_args(meta=True)
1201
+ def address(self, meta: Meta, args):
1202
+ return Address(location=args[0])
1203
+
1204
+ @v_args(meta=True)
1205
+ def query(self, meta: Meta, args):
1206
+ return Query(text=args[0][3:-3])
1207
+
1208
+ def where(self, args):
1209
+ root = args[0]
1210
+ if not isinstance(root, (Comparison, Conditional, Parenthetical)):
1211
+ root = Comparison(left=root, right=True, operator=ComparisonOperator.EQ)
1212
+ return WhereClause(conditional=root)
1213
+
1214
+ @v_args(meta=True)
1215
+ def function_binding_list(self, meta: Meta, args) -> Concept:
1216
+ return args
1217
+
1218
+ @v_args(meta=True)
1219
+ def function_binding_item(self, meta: Meta, args) -> Concept:
1220
+ return args
1221
+
1222
+ @v_args(meta=True)
1223
+ def raw_function(self, meta: Meta, args) -> Function:
1224
+ print(args)
1225
+ identity = args[0]
1226
+ fargs = args[1]
1227
+ output = args[2]
1228
+ item = Function(
1229
+ operator=FunctionType.SUM,
1230
+ arguments=[x[1] for x in fargs],
1231
+ output_datatype=output,
1232
+ output_purpose=Purpose.PROPERTY,
1233
+ arg_count=len(fargs) + 1,
1234
+ )
1235
+ self.environment.functions[identity] = item
1236
+ return item
1237
+
1238
+ @v_args(meta=True)
1239
+ def function(self, meta: Meta, args) -> Function:
1240
+ return args[0]
1241
+
1242
+ def int_lit(self, args):
1243
+ return int("".join(args))
1244
+
1245
+ def bool_lit(self, args):
1246
+ return args[0].capitalize() == "True"
1247
+
1248
+ def null_lit(self, args):
1249
+ return NULL_VALUE
1250
+
1251
+ def float_lit(self, args):
1252
+ return float(args[0])
1253
+
1254
+ def array_lit(self, args):
1255
+ types = [arg_to_datatype(arg) for arg in args]
1256
+ assert len(set(types)) == 1
1257
+ return ListWrapper(args, type=types[0])
1258
+
1259
+ def literal(self, args):
1260
+ return args[0]
1261
+
1262
+ def comparison(self, args) -> Comparison:
1263
+ return Comparison(left=args[0], right=args[2], operator=args[1])
1264
+
1265
+ def expr_tuple(self, args):
1266
+ return Parenthetical(content=args)
1267
+
1268
+ def parenthetical(self, args):
1269
+ return Parenthetical(content=args[0])
1270
+
1271
+ def conditional(self, args):
1272
+ return Conditional(left=args[0], right=args[2], operator=args[1])
1273
+
1274
+ def window_order(self, args):
1275
+ return WindowOrder(args[0])
1276
+
1277
+ def window_order_by(self, args):
1278
+ # flatten tree
1279
+ return args[0]
1280
+
1281
+ def window(self, args):
1282
+ return Window(count=args[1].value, window_order=args[0])
1283
+
1284
+ def WINDOW_TYPE(self, args):
1285
+ return WindowType(args.strip())
1286
+
1287
+ def window_item_over(self, args):
1288
+ return WindowItemOver(contents=args[0])
1289
+
1290
+ def window_item_order(self, args):
1291
+ return WindowItemOrder(contents=args[0])
1292
+
1293
+ def window_item(self, args) -> WindowItem:
1294
+ type = args[0]
1295
+ order_by = []
1296
+ over = []
1297
+ for item in args[2:]:
1298
+ if isinstance(item, WindowItemOrder):
1299
+ order_by = item.contents
1300
+ elif isinstance(item, WindowItemOver):
1301
+ over = item.contents
1302
+ concept = self.environment.concepts[args[1]]
1303
+ return WindowItem(type=type, content=concept, over=over, order_by=order_by)
1304
+
1305
+ def filter_item(self, args) -> FilterItem:
1306
+ where: WhereClause
1307
+ string_concept, where = args
1308
+ concept = self.environment.concepts[string_concept]
1309
+ return FilterItem(content=concept, where=where)
1310
+
1311
+ # BEGIN FUNCTIONS
1312
+ @v_args(meta=True)
1313
+ def expr_reference(self, meta, args) -> Concept:
1314
+ return self.environment.concepts.__getitem__(args[0], meta.line)
1315
+
1316
+ def expr(self, args):
1317
+ if len(args) > 1:
1318
+ raise ParseError("Expression should have one child only.")
1319
+ return args[0]
1320
+
1321
+ def aggregate_over(self, args):
1322
+ return args[0]
1323
+
1324
+ def aggregate_all(self, args):
1325
+ return [self.environment.concepts[f"{INTERNAL_NAMESPACE}.{ALL_ROWS_CONCEPT}"]]
1326
+
1327
+ def aggregate_functions(self, args):
1328
+ if len(args) == 2:
1329
+ return AggregateWrapper(function=args[0], by=args[1])
1330
+ return AggregateWrapper(function=args[0])
1331
+
1332
+ @v_args(meta=True)
1333
+ def index_access(self, meta, args):
1334
+ args = self.process_function_args(args, meta=meta)
1335
+ return IndexAccess(args)
1336
+
1337
+ @v_args(meta=True)
1338
+ def attr_access(self, meta, args):
1339
+ args = self.process_function_args(args, meta=meta)
1340
+ return AttrAccess(args)
1341
+
1342
+ @v_args(meta=True)
1343
+ def fcoalesce(self, meta, args):
1344
+ args = self.process_function_args(args, meta=meta)
1345
+ return Coalesce(args)
1346
+
1347
+ @v_args(meta=True)
1348
+ def unnest(self, meta, args):
1349
+ args = self.process_function_args(args, meta=meta)
1350
+ return Unnest(args)
1351
+
1352
+ @v_args(meta=True)
1353
+ def count(self, meta, args):
1354
+ args = self.process_function_args(args, meta=meta)
1355
+ return Count(args)
1356
+
1357
+ @v_args(meta=True)
1358
+ def fgroup(self, meta, args):
1359
+ if len(args) == 2:
1360
+ args = self.process_function_args([args[0]] + args[1], meta=meta)
1361
+ else:
1362
+ args = self.process_function_args([args[0]], meta=meta)
1363
+ return Group(args)
1364
+
1365
+ @v_args(meta=True)
1366
+ def fabs(self, meta, args):
1367
+ args = self.process_function_args(args, meta=meta)
1368
+ return Abs(args)
1369
+
1370
+ @v_args(meta=True)
1371
+ def count_distinct(self, meta, args):
1372
+ args = self.process_function_args(args, meta=meta)
1373
+ return CountDistinct(args)
1374
+
1375
+ @v_args(meta=True)
1376
+ def sum(self, meta, args):
1377
+ args = self.process_function_args(args, meta=meta)
1378
+ return Function(
1379
+ operator=FunctionType.SUM,
1380
+ arguments=args,
1381
+ output_datatype=args[0].datatype,
1382
+ output_purpose=Purpose.METRIC,
1383
+ arg_count=1,
1384
+ )
1385
+
1386
+ @v_args(meta=True)
1387
+ def avg(self, meta, args):
1388
+ args = self.process_function_args(args, meta=meta)
1389
+ arg = args[0]
1390
+
1391
+ return Function(
1392
+ operator=FunctionType.AVG,
1393
+ arguments=args,
1394
+ output_datatype=arg.datatype,
1395
+ output_purpose=Purpose.METRIC,
1396
+ valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1397
+ arg_count=1,
1398
+ )
1399
+
1400
+ @v_args(meta=True)
1401
+ def max(self, meta, args):
1402
+ args = self.process_function_args(args, meta=meta)
1403
+ return Max(args)
1404
+
1405
+ @v_args(meta=True)
1406
+ def min(self, meta, args):
1407
+ args = self.process_function_args(args, meta=meta)
1408
+ return Min(args)
1409
+
1410
+ @v_args(meta=True)
1411
+ def len(self, meta, args):
1412
+ args = self.process_function_args(args, meta=meta)
1413
+ return Function(
1414
+ operator=FunctionType.LENGTH,
1415
+ arguments=args,
1416
+ output_datatype=DataType.INTEGER,
1417
+ output_purpose=Purpose.PROPERTY,
1418
+ valid_inputs={DataType.STRING, DataType.ARRAY, DataType.MAP},
1419
+ # output_grain=args[0].grain,
1420
+ )
1421
+
1422
+ @v_args(meta=True)
1423
+ def fsplit(self, meta, args):
1424
+ args = self.process_function_args(args, meta=meta)
1425
+ return Split(args)
1426
+
1427
+ @v_args(meta=True)
1428
+ def concat(self, meta, args):
1429
+ args = self.process_function_args(args, meta=meta)
1430
+ return Function(
1431
+ operator=FunctionType.CONCAT,
1432
+ arguments=args,
1433
+ output_datatype=DataType.STRING,
1434
+ output_purpose=Purpose.PROPERTY,
1435
+ valid_inputs={DataType.STRING},
1436
+ arg_count=99,
1437
+ # output_grain=args[0].grain,
1438
+ )
1439
+
1440
+ @v_args(meta=True)
1441
+ def like(self, meta, args):
1442
+ args = self.process_function_args(args, meta=meta)
1443
+ return Function(
1444
+ operator=FunctionType.LIKE,
1445
+ arguments=args,
1446
+ output_datatype=DataType.BOOL,
1447
+ output_purpose=Purpose.PROPERTY,
1448
+ valid_inputs={DataType.STRING},
1449
+ arg_count=2,
1450
+ )
1451
+
1452
+ @v_args(meta=True)
1453
+ def alt_like(self, meta, args):
1454
+ args = self.process_function_args(args, meta=meta)
1455
+ return Function(
1456
+ operator=FunctionType.LIKE,
1457
+ arguments=args,
1458
+ output_datatype=DataType.BOOL,
1459
+ output_purpose=Purpose.PROPERTY,
1460
+ valid_inputs={DataType.STRING},
1461
+ arg_count=2,
1462
+ )
1463
+
1464
+ @v_args(meta=True)
1465
+ def ilike(self, meta, args):
1466
+ args = self.process_function_args(args, meta=meta)
1467
+ return Function(
1468
+ operator=FunctionType.ILIKE,
1469
+ arguments=args,
1470
+ output_datatype=DataType.BOOL,
1471
+ output_purpose=Purpose.PROPERTY,
1472
+ valid_inputs={DataType.STRING},
1473
+ arg_count=2,
1474
+ )
1475
+
1476
+ @v_args(meta=True)
1477
+ def upper(self, meta, args):
1478
+ args = self.process_function_args(args, meta=meta)
1479
+ return Function(
1480
+ operator=FunctionType.UPPER,
1481
+ arguments=args,
1482
+ output_datatype=DataType.STRING,
1483
+ output_purpose=Purpose.PROPERTY,
1484
+ valid_inputs={DataType.STRING},
1485
+ arg_count=1,
1486
+ )
1487
+
1488
+ @v_args(meta=True)
1489
+ def fstrpos(self, meta, args):
1490
+ args = self.process_function_args(args, meta=meta)
1491
+ return StrPos(args)
1492
+
1493
+ @v_args(meta=True)
1494
+ def fsubstring(self, meta, args):
1495
+ args = self.process_function_args(
1496
+ args,
1497
+ meta=meta,
1498
+ )
1499
+ return SubString(args)
1500
+
1501
+ @v_args(meta=True)
1502
+ def lower(self, meta, args):
1503
+ args = self.process_function_args(args, meta=meta)
1504
+ return Function(
1505
+ operator=FunctionType.LOWER,
1506
+ arguments=args,
1507
+ output_datatype=DataType.STRING,
1508
+ output_purpose=Purpose.PROPERTY,
1509
+ valid_inputs={DataType.STRING},
1510
+ arg_count=1,
1511
+ )
1512
+
1513
+ # date functions
1514
+ @v_args(meta=True)
1515
+ def fdate(self, meta, args):
1516
+ args = self.process_function_args(args, meta=meta)
1517
+ return Function(
1518
+ operator=FunctionType.DATE,
1519
+ arguments=args,
1520
+ output_datatype=DataType.DATE,
1521
+ output_purpose=Purpose.PROPERTY,
1522
+ valid_inputs={
1523
+ DataType.DATE,
1524
+ DataType.TIMESTAMP,
1525
+ DataType.DATETIME,
1526
+ DataType.STRING,
1527
+ },
1528
+ arg_count=1,
1529
+ )
1530
+
1531
+ def DATE_PART(self, args):
1532
+ return DatePart(args.value)
1533
+
1534
+ @v_args(meta=True)
1535
+ def fdate_trunc(self, meta, args):
1536
+ args = self.process_function_args(args, meta=meta)
1537
+ return Function(
1538
+ operator=FunctionType.DATE_TRUNCATE,
1539
+ arguments=args,
1540
+ output_datatype=DataType.DATE,
1541
+ output_purpose=Purpose.PROPERTY,
1542
+ valid_inputs=[
1543
+ {
1544
+ DataType.DATE,
1545
+ DataType.TIMESTAMP,
1546
+ DataType.DATETIME,
1547
+ DataType.STRING,
1548
+ },
1549
+ {DataType.DATE_PART},
1550
+ ],
1551
+ arg_count=2,
1552
+ )
1553
+
1554
+ @v_args(meta=True)
1555
+ def fdate_part(self, meta, args):
1556
+ args = self.process_function_args(args, meta=meta)
1557
+ return Function(
1558
+ operator=FunctionType.DATE_PART,
1559
+ arguments=args,
1560
+ output_datatype=DataType.DATE,
1561
+ output_purpose=Purpose.PROPERTY,
1562
+ valid_inputs=[
1563
+ {
1564
+ DataType.DATE,
1565
+ DataType.TIMESTAMP,
1566
+ DataType.DATETIME,
1567
+ DataType.STRING,
1568
+ },
1569
+ {DataType.DATE_PART},
1570
+ ],
1571
+ arg_count=2,
1572
+ )
1573
+
1574
+ @v_args(meta=True)
1575
+ def fdate_add(self, meta, args):
1576
+ args = self.process_function_args(args, meta=meta)
1577
+ return Function(
1578
+ operator=FunctionType.DATE_ADD,
1579
+ arguments=args,
1580
+ output_datatype=DataType.DATE,
1581
+ output_purpose=Purpose.PROPERTY,
1582
+ valid_inputs=[
1583
+ {
1584
+ DataType.DATE,
1585
+ DataType.TIMESTAMP,
1586
+ DataType.DATETIME,
1587
+ DataType.STRING,
1588
+ },
1589
+ {DataType.DATE_PART},
1590
+ {DataType.INTEGER},
1591
+ ],
1592
+ arg_count=3,
1593
+ )
1594
+
1595
+ @v_args(meta=True)
1596
+ def fdate_diff(self, meta, args):
1597
+ args = self.process_function_args(args, meta=meta)
1598
+ purpose = function_args_to_output_purpose(args)
1599
+ return Function(
1600
+ operator=FunctionType.DATE_DIFF,
1601
+ arguments=args,
1602
+ output_datatype=DataType.INTEGER,
1603
+ output_purpose=purpose,
1604
+ valid_inputs=[
1605
+ {
1606
+ DataType.DATE,
1607
+ DataType.TIMESTAMP,
1608
+ DataType.DATETIME,
1609
+ },
1610
+ {
1611
+ DataType.DATE,
1612
+ DataType.TIMESTAMP,
1613
+ DataType.DATETIME,
1614
+ },
1615
+ {DataType.DATE_PART},
1616
+ ],
1617
+ arg_count=3,
1618
+ )
1619
+
1620
+ @v_args(meta=True)
1621
+ def fdatetime(self, meta, args):
1622
+ args = self.process_function_args(args, meta=meta)
1623
+ return Function(
1624
+ operator=FunctionType.DATETIME,
1625
+ arguments=args,
1626
+ output_datatype=DataType.DATETIME,
1627
+ output_purpose=Purpose.PROPERTY,
1628
+ valid_inputs={
1629
+ DataType.DATE,
1630
+ DataType.TIMESTAMP,
1631
+ DataType.DATETIME,
1632
+ DataType.STRING,
1633
+ },
1634
+ arg_count=1,
1635
+ )
1636
+
1637
+ @v_args(meta=True)
1638
+ def ftimestamp(self, meta, args):
1639
+ args = self.process_function_args(args, meta=meta)
1640
+ return Function(
1641
+ operator=FunctionType.TIMESTAMP,
1642
+ arguments=args,
1643
+ output_datatype=DataType.TIMESTAMP,
1644
+ output_purpose=Purpose.PROPERTY,
1645
+ valid_inputs=[{DataType.TIMESTAMP, DataType.STRING}],
1646
+ arg_count=1,
1647
+ )
1648
+
1649
+ @v_args(meta=True)
1650
+ def fsecond(self, meta, args):
1651
+ args = self.process_function_args(args, meta=meta)
1652
+ return Function(
1653
+ operator=FunctionType.SECOND,
1654
+ arguments=args,
1655
+ output_datatype=DataType.INTEGER,
1656
+ output_purpose=Purpose.PROPERTY,
1657
+ valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
1658
+ arg_count=1,
1659
+ )
1660
+
1661
+ @v_args(meta=True)
1662
+ def fminute(self, meta, args):
1663
+ args = self.process_function_args(args, meta=meta)
1664
+ return Function(
1665
+ operator=FunctionType.MINUTE,
1666
+ arguments=args,
1667
+ output_datatype=DataType.INTEGER,
1668
+ output_purpose=Purpose.PROPERTY,
1669
+ valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
1670
+ arg_count=1,
1671
+ )
1672
+
1673
+ @v_args(meta=True)
1674
+ def fhour(self, meta, args):
1675
+ args = self.process_function_args(args, meta=meta)
1676
+ return Function(
1677
+ operator=FunctionType.HOUR,
1678
+ arguments=args,
1679
+ output_datatype=DataType.INTEGER,
1680
+ output_purpose=Purpose.PROPERTY,
1681
+ valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
1682
+ arg_count=1,
1683
+ )
1684
+
1685
+ @v_args(meta=True)
1686
+ def fday(self, meta, args):
1687
+ args = self.process_function_args(args, meta=meta)
1688
+ return Function(
1689
+ operator=FunctionType.DAY,
1690
+ arguments=args,
1691
+ output_datatype=DataType.INTEGER,
1692
+ output_purpose=Purpose.PROPERTY,
1693
+ valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1694
+ arg_count=1,
1695
+ )
1696
+
1697
+ @v_args(meta=True)
1698
+ def fday_of_week(self, meta, args):
1699
+ args = self.process_function_args(args, meta=meta)
1700
+ return Function(
1701
+ operator=FunctionType.DAY_OF_WEEK,
1702
+ arguments=args,
1703
+ output_datatype=DataType.INTEGER,
1704
+ output_purpose=Purpose.PROPERTY,
1705
+ valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1706
+ arg_count=1,
1707
+ )
1708
+
1709
+ @v_args(meta=True)
1710
+ def fweek(self, meta, args):
1711
+ args = self.process_function_args(args, meta=meta)
1712
+ return Function(
1713
+ operator=FunctionType.WEEK,
1714
+ arguments=args,
1715
+ output_datatype=DataType.INTEGER,
1716
+ output_purpose=Purpose.PROPERTY,
1717
+ valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1718
+ arg_count=1,
1719
+ )
1720
+
1721
+ @v_args(meta=True)
1722
+ def fmonth(self, meta, args):
1723
+ args = self.process_function_args(args, meta=meta)
1724
+ return Function(
1725
+ operator=FunctionType.MONTH,
1726
+ arguments=args,
1727
+ output_datatype=DataType.INTEGER,
1728
+ output_purpose=Purpose.PROPERTY,
1729
+ valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1730
+ arg_count=1,
1731
+ )
1732
+
1733
+ @v_args(meta=True)
1734
+ def fquarter(self, meta, args):
1735
+ args = self.process_function_args(args, meta=meta)
1736
+ return Function(
1737
+ operator=FunctionType.QUARTER,
1738
+ arguments=args,
1739
+ output_datatype=DataType.INTEGER,
1740
+ output_purpose=Purpose.PROPERTY,
1741
+ valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1742
+ arg_count=1,
1743
+ )
1744
+
1745
+ @v_args(meta=True)
1746
+ def fyear(self, meta, args):
1747
+ args = self.process_function_args(args, meta=meta)
1748
+ return Function(
1749
+ operator=FunctionType.YEAR,
1750
+ arguments=args,
1751
+ output_datatype=DataType.INTEGER,
1752
+ output_purpose=Purpose.PROPERTY,
1753
+ valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1754
+ arg_count=1,
1755
+ )
1756
+
1757
+ # utility functions
1758
+ @v_args(meta=True)
1759
+ def fcast(self, meta, args) -> Function:
1760
+ args = self.process_function_args(args, meta=meta)
1761
+ output_datatype = args[1]
1762
+ return Function(
1763
+ operator=FunctionType.CAST,
1764
+ arguments=args,
1765
+ output_datatype=output_datatype,
1766
+ output_purpose=function_args_to_output_purpose(args),
1767
+ valid_inputs={
1768
+ DataType.INTEGER,
1769
+ DataType.STRING,
1770
+ DataType.FLOAT,
1771
+ DataType.NUMBER,
1772
+ },
1773
+ arg_count=2,
1774
+ )
1775
+
1776
+ # math functions
1777
+ @v_args(meta=True)
1778
+ def fadd(self, meta, args) -> Function:
1779
+ args = self.process_function_args(args, meta=meta)
1780
+ output_datatype = arg_to_datatype(args[0])
1781
+ # TODO: check for valid transforms?
1782
+ return Function(
1783
+ operator=FunctionType.ADD,
1784
+ arguments=args,
1785
+ output_datatype=output_datatype,
1786
+ output_purpose=function_args_to_output_purpose(args),
1787
+ # valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1788
+ arg_count=2,
1789
+ )
1790
+
1791
+ @v_args(meta=True)
1792
+ def fsub(self, meta, args) -> Function:
1793
+ args = self.process_function_args(args, meta=meta)
1794
+ output_datatype = arg_to_datatype(args[0])
1795
+ return Function(
1796
+ operator=FunctionType.SUBTRACT,
1797
+ arguments=args,
1798
+ output_datatype=output_datatype,
1799
+ output_purpose=function_args_to_output_purpose(args),
1800
+ # valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1801
+ arg_count=2,
1802
+ )
1803
+
1804
+ @v_args(meta=True)
1805
+ def fmul(self, meta, args) -> Function:
1806
+ args = self.process_function_args(args, meta=meta)
1807
+ output_datatype = arg_to_datatype(args[0])
1808
+ return Function(
1809
+ operator=FunctionType.MULTIPLY,
1810
+ arguments=args,
1811
+ output_datatype=output_datatype,
1812
+ output_purpose=function_args_to_output_purpose(args),
1813
+ # valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1814
+ arg_count=2,
1815
+ )
1816
+
1817
+ @v_args(meta=True)
1818
+ def fdiv(self, meta: Meta, args):
1819
+ output_datatype = arg_to_datatype(args[0])
1820
+ args = self.process_function_args(args, meta=meta)
1821
+ return Function(
1822
+ operator=FunctionType.DIVIDE,
1823
+ arguments=args,
1824
+ output_datatype=output_datatype,
1825
+ output_purpose=function_args_to_output_purpose(args),
1826
+ # valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1827
+ arg_count=2,
1828
+ )
1829
+
1830
+ @v_args(meta=True)
1831
+ def fmod(self, meta: Meta, args):
1832
+ output_datatype = arg_to_datatype(args[0])
1833
+ args = self.process_function_args(args, meta=meta)
1834
+ return Function(
1835
+ operator=FunctionType.MOD,
1836
+ arguments=args,
1837
+ output_datatype=output_datatype,
1838
+ output_purpose=function_args_to_output_purpose(args),
1839
+ valid_inputs=[
1840
+ {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1841
+ {DataType.INTEGER},
1842
+ ],
1843
+ arg_count=2,
1844
+ )
1845
+
1846
+ @v_args(meta=True)
1847
+ def fround(self, meta, args) -> Function:
1848
+ args = self.process_function_args(args, meta=meta)
1849
+ output_datatype = arg_to_datatype(args[0])
1850
+ return Function(
1851
+ operator=FunctionType.ROUND,
1852
+ arguments=args,
1853
+ output_datatype=output_datatype,
1854
+ output_purpose=function_args_to_output_purpose(args),
1855
+ valid_inputs=[
1856
+ {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1857
+ {DataType.INTEGER},
1858
+ ],
1859
+ arg_count=2,
1860
+ )
1861
+
1862
+ def fcase(self, args: List[Union[CaseWhen, CaseElse]]):
1863
+ datatypes = set()
1864
+ for arg in args:
1865
+ output_datatype = arg_to_datatype(arg.expr)
1866
+ datatypes.add(output_datatype)
1867
+ if not len(datatypes) == 1:
1868
+ raise SyntaxError(
1869
+ f"All case expressions must have the same output datatype, got {datatypes}"
1870
+ )
1871
+ return Function(
1872
+ operator=FunctionType.CASE,
1873
+ arguments=args,
1874
+ output_datatype=datatypes.pop(),
1875
+ output_purpose=Purpose.PROPERTY,
1876
+ # valid_inputs=[{DataType.INTEGER, DataType.FLOAT, DataType.NUMBER}, {DataType.INTEGER}],
1877
+ arg_count=InfiniteFunctionArgs,
1878
+ )
1879
+
1880
+ @v_args(meta=True)
1881
+ def fcase_when(self, meta, args) -> CaseWhen:
1882
+ args = self.process_function_args(args, meta=meta)
1883
+ return CaseWhen(comparison=args[0], expr=args[1])
1884
+
1885
+ @v_args(meta=True)
1886
+ def fcase_else(self, meta, args) -> CaseElse:
1887
+ args = self.process_function_args(args, meta=meta)
1888
+ return CaseElse(expr=args[0])
1889
+
1890
+ @v_args(meta=True)
1891
+ def fcurrent_date(self, meta, args):
1892
+ args = self.process_function_args(args, meta=meta)
1893
+ return CurrentDate(args)
1894
+
1895
+ @v_args(meta=True)
1896
+ def fcurrent_datetime(self, meta, args):
1897
+ args = self.process_function_args(args, meta=meta)
1898
+ return CurrentDatetime(args)
1899
+
1900
+ @v_args(meta=True)
1901
+ def fnot(self, meta, args):
1902
+ args = self.process_function_args(args, meta=meta)
1903
+ return IsNull(args)
1904
+
1905
+
1906
+ def unpack_visit_error(e: VisitError):
1907
+ """This is required to get exceptions from imports, which would
1908
+ raise nested VisitErrors"""
1909
+ if isinstance(e.orig_exc, VisitError):
1910
+ unpack_visit_error(e.orig_exc)
1911
+ elif isinstance(e.orig_exc, (UndefinedConceptException, ImportError)):
1912
+ raise e.orig_exc
1913
+ elif isinstance(e.orig_exc, (ValidationError, TypeError)):
1914
+ raise InvalidSyntaxException(str(e.orig_exc))
1915
+ raise e
1916
+
1917
+
1918
+ def parse_text(text: str, environment: Optional[Environment] = None) -> Tuple[
1919
+ Environment,
1920
+ List[
1921
+ Datasource
1922
+ | ImportStatement
1923
+ | SelectStatement
1924
+ | PersistStatement
1925
+ | ShowStatement
1926
+ | None
1927
+ ],
1928
+ ]:
1929
+ environment = environment or Environment(datasources={})
1930
+ parser = ParseToObjects(visit_tokens=True, text=text, environment=environment)
1931
+
1932
+ try:
1933
+ parser.transform(PARSER.parse(text))
1934
+ # handle circular dependencies
1935
+ pass_two = parser.hydrate_missing()
1936
+ output = [v for v in pass_two if v]
1937
+ except VisitError as e:
1938
+ unpack_visit_error(e)
1939
+ # this will never be reached
1940
+ raise e
1941
+ except (
1942
+ UnexpectedCharacters,
1943
+ UnexpectedEOF,
1944
+ UnexpectedInput,
1945
+ UnexpectedToken,
1946
+ ValidationError,
1947
+ TypeError,
1948
+ ) as e:
1949
+ raise InvalidSyntaxException(str(e))
1950
+
1951
+ return environment, output