pytrilogy 0.0.1.102__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- pytrilogy-0.0.1.102.dist-info/LICENSE.md +19 -0
- pytrilogy-0.0.1.102.dist-info/METADATA +277 -0
- pytrilogy-0.0.1.102.dist-info/RECORD +77 -0
- pytrilogy-0.0.1.102.dist-info/WHEEL +5 -0
- pytrilogy-0.0.1.102.dist-info/entry_points.txt +2 -0
- pytrilogy-0.0.1.102.dist-info/top_level.txt +1 -0
- trilogy/__init__.py +8 -0
- trilogy/compiler.py +0 -0
- trilogy/constants.py +30 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +3 -0
- trilogy/core/enums.py +270 -0
- trilogy/core/env_processor.py +33 -0
- trilogy/core/environment_helpers.py +156 -0
- trilogy/core/ergonomics.py +187 -0
- trilogy/core/exceptions.py +23 -0
- trilogy/core/functions.py +320 -0
- trilogy/core/graph_models.py +55 -0
- trilogy/core/internal.py +37 -0
- trilogy/core/models.py +3145 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +603 -0
- trilogy/core/processing/graph_utils.py +44 -0
- trilogy/core/processing/node_generators/__init__.py +25 -0
- trilogy/core/processing/node_generators/basic_node.py +71 -0
- trilogy/core/processing/node_generators/common.py +239 -0
- trilogy/core/processing/node_generators/concept_merge.py +152 -0
- trilogy/core/processing/node_generators/filter_node.py +83 -0
- trilogy/core/processing/node_generators/group_node.py +92 -0
- trilogy/core/processing/node_generators/group_to_node.py +99 -0
- trilogy/core/processing/node_generators/merge_node.py +148 -0
- trilogy/core/processing/node_generators/multiselect_node.py +189 -0
- trilogy/core/processing/node_generators/rowset_node.py +130 -0
- trilogy/core/processing/node_generators/select_node.py +328 -0
- trilogy/core/processing/node_generators/unnest_node.py +37 -0
- trilogy/core/processing/node_generators/window_node.py +85 -0
- trilogy/core/processing/nodes/__init__.py +76 -0
- trilogy/core/processing/nodes/base_node.py +251 -0
- trilogy/core/processing/nodes/filter_node.py +49 -0
- trilogy/core/processing/nodes/group_node.py +110 -0
- trilogy/core/processing/nodes/merge_node.py +326 -0
- trilogy/core/processing/nodes/select_node_v2.py +198 -0
- trilogy/core/processing/nodes/unnest_node.py +54 -0
- trilogy/core/processing/nodes/window_node.py +34 -0
- trilogy/core/processing/utility.py +278 -0
- trilogy/core/query_processor.py +331 -0
- trilogy/dialect/__init__.py +0 -0
- trilogy/dialect/base.py +679 -0
- trilogy/dialect/bigquery.py +80 -0
- trilogy/dialect/common.py +43 -0
- trilogy/dialect/config.py +55 -0
- trilogy/dialect/duckdb.py +83 -0
- trilogy/dialect/enums.py +95 -0
- trilogy/dialect/postgres.py +86 -0
- trilogy/dialect/presto.py +82 -0
- trilogy/dialect/snowflake.py +82 -0
- trilogy/dialect/sql_server.py +89 -0
- trilogy/docs/__init__.py +0 -0
- trilogy/engine.py +48 -0
- trilogy/executor.py +242 -0
- trilogy/hooks/__init__.py +0 -0
- trilogy/hooks/base_hook.py +37 -0
- trilogy/hooks/graph_hook.py +24 -0
- trilogy/hooks/query_debugger.py +133 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +176 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +2 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +1951 -0
- trilogy/parsing/render.py +483 -0
- trilogy/py.typed +0 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/trilogy.py +127 -0
- trilogy/utility.py +31 -0
|
@@ -0,0 +1,1951 @@
|
|
|
1
|
+
from os.path import dirname, join
|
|
2
|
+
from typing import List, Optional, Tuple, Union
|
|
3
|
+
from re import IGNORECASE
|
|
4
|
+
from lark import Lark, Transformer, v_args
|
|
5
|
+
from lark.exceptions import (
|
|
6
|
+
UnexpectedCharacters,
|
|
7
|
+
UnexpectedEOF,
|
|
8
|
+
UnexpectedInput,
|
|
9
|
+
UnexpectedToken,
|
|
10
|
+
VisitError,
|
|
11
|
+
)
|
|
12
|
+
from lark.tree import Meta
|
|
13
|
+
from pydantic import ValidationError
|
|
14
|
+
from trilogy.core.internal import INTERNAL_NAMESPACE, ALL_ROWS_CONCEPT
|
|
15
|
+
from trilogy.constants import (
|
|
16
|
+
DEFAULT_NAMESPACE,
|
|
17
|
+
NULL_VALUE,
|
|
18
|
+
VIRTUAL_CONCEPT_PREFIX,
|
|
19
|
+
)
|
|
20
|
+
from trilogy.core.enums import (
|
|
21
|
+
BooleanOperator,
|
|
22
|
+
ComparisonOperator,
|
|
23
|
+
FunctionType,
|
|
24
|
+
InfiniteFunctionArgs,
|
|
25
|
+
FunctionClass,
|
|
26
|
+
Modifier,
|
|
27
|
+
Ordering,
|
|
28
|
+
Purpose,
|
|
29
|
+
WindowOrder,
|
|
30
|
+
WindowType,
|
|
31
|
+
DatePart,
|
|
32
|
+
ShowCategory,
|
|
33
|
+
)
|
|
34
|
+
from trilogy.core.exceptions import InvalidSyntaxException, UndefinedConceptException
|
|
35
|
+
from trilogy.core.functions import (
|
|
36
|
+
Count,
|
|
37
|
+
CountDistinct,
|
|
38
|
+
Group,
|
|
39
|
+
Max,
|
|
40
|
+
Min,
|
|
41
|
+
Split,
|
|
42
|
+
IndexAccess,
|
|
43
|
+
AttrAccess,
|
|
44
|
+
Abs,
|
|
45
|
+
Unnest,
|
|
46
|
+
Coalesce,
|
|
47
|
+
function_args_to_output_purpose,
|
|
48
|
+
CurrentDate,
|
|
49
|
+
CurrentDatetime,
|
|
50
|
+
IsNull,
|
|
51
|
+
SubString,
|
|
52
|
+
StrPos,
|
|
53
|
+
)
|
|
54
|
+
from trilogy.core.models import (
|
|
55
|
+
Address,
|
|
56
|
+
AlignClause,
|
|
57
|
+
AlignItem,
|
|
58
|
+
AggregateWrapper,
|
|
59
|
+
CaseElse,
|
|
60
|
+
CaseWhen,
|
|
61
|
+
ColumnAssignment,
|
|
62
|
+
Comment,
|
|
63
|
+
Comparison,
|
|
64
|
+
Concept,
|
|
65
|
+
ConceptTransform,
|
|
66
|
+
Conditional,
|
|
67
|
+
Datasource,
|
|
68
|
+
MergeStatement,
|
|
69
|
+
Environment,
|
|
70
|
+
FilterItem,
|
|
71
|
+
Function,
|
|
72
|
+
Grain,
|
|
73
|
+
ImportStatement,
|
|
74
|
+
Limit,
|
|
75
|
+
Metadata,
|
|
76
|
+
MultiSelectStatement,
|
|
77
|
+
OrderBy,
|
|
78
|
+
OrderItem,
|
|
79
|
+
Parenthetical,
|
|
80
|
+
PersistStatement,
|
|
81
|
+
Query,
|
|
82
|
+
SelectStatement,
|
|
83
|
+
SelectItem,
|
|
84
|
+
WhereClause,
|
|
85
|
+
Window,
|
|
86
|
+
WindowItem,
|
|
87
|
+
WindowItemOrder,
|
|
88
|
+
WindowItemOver,
|
|
89
|
+
RawColumnExpr,
|
|
90
|
+
arg_to_datatype,
|
|
91
|
+
ListWrapper,
|
|
92
|
+
MapType,
|
|
93
|
+
ShowStatement,
|
|
94
|
+
DataType,
|
|
95
|
+
StructType,
|
|
96
|
+
ListType,
|
|
97
|
+
ConceptDeclarationStatement,
|
|
98
|
+
ConceptDerivation,
|
|
99
|
+
RowsetDerivationStatement,
|
|
100
|
+
LooseConceptList,
|
|
101
|
+
)
|
|
102
|
+
from trilogy.parsing.exceptions import ParseError
|
|
103
|
+
from trilogy.utility import string_to_hash
|
|
104
|
+
from trilogy.parsing.common import (
|
|
105
|
+
agg_wrapper_to_concept,
|
|
106
|
+
window_item_to_concept,
|
|
107
|
+
function_to_concept,
|
|
108
|
+
filter_item_to_concept,
|
|
109
|
+
constant_to_concept,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
CONSTANT_TYPES = (int, float, str, bool, ListWrapper)
|
|
113
|
+
|
|
114
|
+
grammar = r"""
|
|
115
|
+
!start: ( block | show_statement | comment )*
|
|
116
|
+
block: statement _TERMINATOR comment?
|
|
117
|
+
?statement: concept
|
|
118
|
+
| datasource
|
|
119
|
+
| function
|
|
120
|
+
| multi_select_statement
|
|
121
|
+
| select_statement
|
|
122
|
+
| persist_statement
|
|
123
|
+
| rowset_derivation_statement
|
|
124
|
+
| import_statement
|
|
125
|
+
| merge_statement
|
|
126
|
+
|
|
127
|
+
_TERMINATOR: ";"i /\s*/
|
|
128
|
+
|
|
129
|
+
comment: /#.*(\n|$)/ | /\/\/.*\n/
|
|
130
|
+
|
|
131
|
+
// property display_name string
|
|
132
|
+
concept_declaration: PURPOSE IDENTIFIER data_type concept_nullable_modifier? metadata?
|
|
133
|
+
//customer_id.property first_name STRING;
|
|
134
|
+
//<customer_id,country>.property local_alias STRING
|
|
135
|
+
concept_property_declaration: PROPERTY (prop_ident | IDENTIFIER) data_type concept_nullable_modifier? metadata?
|
|
136
|
+
//metric post_length <- len(post_text);
|
|
137
|
+
concept_derivation: (PURPOSE | AUTO | PROPERTY ) IDENTIFIER "<" "-" expr
|
|
138
|
+
|
|
139
|
+
rowset_derivation_statement: ("rowset"i IDENTIFIER "<" "-" (multi_select_statement | select_statement)) | ("with"i IDENTIFIER "as"i (multi_select_statement | select_statement))
|
|
140
|
+
|
|
141
|
+
constant_derivation: CONST IDENTIFIER "<" "-" literal
|
|
142
|
+
concept_nullable_modifier: "?"
|
|
143
|
+
concept: (concept_declaration | concept_derivation | concept_property_declaration | constant_derivation)
|
|
144
|
+
|
|
145
|
+
//concept property
|
|
146
|
+
prop_ident: "<" (IDENTIFIER ",")* IDENTIFIER ","? ">" "." IDENTIFIER
|
|
147
|
+
|
|
148
|
+
// datasource concepts
|
|
149
|
+
datasource: "datasource" IDENTIFIER "(" column_assignment_list ")" grain_clause? (address | query)
|
|
150
|
+
|
|
151
|
+
grain_clause: "grain" "(" column_list ")"
|
|
152
|
+
|
|
153
|
+
address: "address" ADDRESS
|
|
154
|
+
|
|
155
|
+
query: "query" MULTILINE_STRING
|
|
156
|
+
|
|
157
|
+
concept_assignment: IDENTIFIER | (MODIFIER "[" concept_assignment "]" ) | (SHORTHAND_MODIFIER concept_assignment )
|
|
158
|
+
|
|
159
|
+
column_assignment: ((IDENTIFIER | raw_column_assignment | _static_functions ) ":" concept_assignment)
|
|
160
|
+
|
|
161
|
+
raw_column_assignment: "raw" "(" MULTILINE_STRING ")"
|
|
162
|
+
|
|
163
|
+
column_assignment_list : (column_assignment "," )* column_assignment ","?
|
|
164
|
+
|
|
165
|
+
column_list : (IDENTIFIER "," )* IDENTIFIER ","?
|
|
166
|
+
|
|
167
|
+
import_statement: "import" (IDENTIFIER ".") * IDENTIFIER "as" IDENTIFIER
|
|
168
|
+
|
|
169
|
+
// persist_statement
|
|
170
|
+
persist_statement: "persist"i IDENTIFIER "into"i IDENTIFIER "from"i select_statement grain_clause?
|
|
171
|
+
|
|
172
|
+
// select statement
|
|
173
|
+
select_statement: "select"i select_list where? comment* order_by? comment* limit? comment*
|
|
174
|
+
|
|
175
|
+
// multiple_selects
|
|
176
|
+
multi_select_statement: select_statement ("merge" select_statement)+ "align"i align_clause where? comment* order_by? comment* limit? comment*
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
align_item: IDENTIFIER ":" IDENTIFIER ("," IDENTIFIER)* ","?
|
|
180
|
+
|
|
181
|
+
align_clause: align_item ("," align_item)* ","?
|
|
182
|
+
|
|
183
|
+
// merge statemment
|
|
184
|
+
|
|
185
|
+
merge_statement: "merge" IDENTIFIER ("," IDENTIFIER)* ","? comment*
|
|
186
|
+
|
|
187
|
+
// FUNCTION blocks
|
|
188
|
+
function: raw_function
|
|
189
|
+
function_binding_item: IDENTIFIER data_type
|
|
190
|
+
function_binding_list: (function_binding_item ",")* function_binding_item ","?
|
|
191
|
+
raw_function: "def" "rawsql" IDENTIFIER "(" function_binding_list ")" "-" ">" data_type "as"i MULTILINE_STRING
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
// user_id where state = Mexico
|
|
195
|
+
filter_item: "filter"i IDENTIFIER where
|
|
196
|
+
|
|
197
|
+
// rank/lag/lead
|
|
198
|
+
WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i) /[\s]+/
|
|
199
|
+
|
|
200
|
+
window_item: WINDOW_TYPE (IDENTIFIER | select_transform | comment+ ) window_item_over? window_item_order?
|
|
201
|
+
|
|
202
|
+
window_item_over: ("OVER"i over_list)
|
|
203
|
+
|
|
204
|
+
window_item_order: ("ORDER"i? "BY"i order_list)
|
|
205
|
+
|
|
206
|
+
select_hide_modifier: "--"
|
|
207
|
+
select_partial_modifier: "~"
|
|
208
|
+
select_item: (select_hide_modifier | select_partial_modifier)? (IDENTIFIER | select_transform | comment+ )
|
|
209
|
+
|
|
210
|
+
select_list: ( select_item "," )* select_item ","?
|
|
211
|
+
|
|
212
|
+
// count(post_id) -> post_count
|
|
213
|
+
_assignment: ("-" ">") | "as"
|
|
214
|
+
select_transform : expr _assignment IDENTIFIER metadata?
|
|
215
|
+
|
|
216
|
+
metadata: "metadata" "(" IDENTIFIER "=" _string_lit ")"
|
|
217
|
+
|
|
218
|
+
limit: "LIMIT"i /[0-9]+/
|
|
219
|
+
|
|
220
|
+
!window_order: ("TOP"i | "BOTTOM"i)
|
|
221
|
+
|
|
222
|
+
window: window_order /[0-9]+/
|
|
223
|
+
|
|
224
|
+
window_order_by: "BY"i column_list
|
|
225
|
+
|
|
226
|
+
order_list: (expr ORDERING "," )* expr ORDERING ","?
|
|
227
|
+
|
|
228
|
+
over_list: (IDENTIFIER "," )* IDENTIFIER ","?
|
|
229
|
+
|
|
230
|
+
ORDERING: ("ASC"i | "DESC"i)
|
|
231
|
+
|
|
232
|
+
order_by: "ORDER"i "BY"i order_list
|
|
233
|
+
|
|
234
|
+
//WHERE STATEMENT
|
|
235
|
+
|
|
236
|
+
LOGICAL_OPERATOR: "AND"i | "OR"i
|
|
237
|
+
|
|
238
|
+
conditional: expr LOGICAL_OPERATOR (conditional | expr)
|
|
239
|
+
|
|
240
|
+
where: "WHERE"i (expr | conditional)
|
|
241
|
+
|
|
242
|
+
expr_reference: IDENTIFIER
|
|
243
|
+
|
|
244
|
+
!array_comparison: ( ("NOT"i "IN"i) | "IN"i)
|
|
245
|
+
|
|
246
|
+
COMPARISON_OPERATOR: (/is[\s]+not/ | "is" |"=" | ">" | "<" | ">=" | "<=" | "!=" )
|
|
247
|
+
|
|
248
|
+
comparison: (expr COMPARISON_OPERATOR expr) | (expr array_comparison expr_tuple)
|
|
249
|
+
|
|
250
|
+
expr_tuple: "(" (expr ",")* expr ","? ")"
|
|
251
|
+
|
|
252
|
+
//unnesting is a function
|
|
253
|
+
unnest: "UNNEST"i "(" expr ")"
|
|
254
|
+
//indexing into an expression is a function
|
|
255
|
+
index_access: expr "[" int_lit "]"
|
|
256
|
+
attr_access: expr "[" _string_lit "]"
|
|
257
|
+
|
|
258
|
+
parenthetical: "(" (conditional | expr) ")"
|
|
259
|
+
|
|
260
|
+
expr: window_item | filter_item | comparison | fgroup | aggregate_functions | unnest | _string_functions | _math_functions | _generic_functions | _constant_functions| _date_functions | literal | expr_reference | index_access | attr_access | parenthetical
|
|
261
|
+
|
|
262
|
+
// functions
|
|
263
|
+
|
|
264
|
+
//math TODO: add syntactic sugar
|
|
265
|
+
fadd: ("add"i "(" expr "," expr ")" ) | ( expr "+" expr )
|
|
266
|
+
fsub: ("subtract"i "(" expr "," expr ")" ) | ( expr "-" expr )
|
|
267
|
+
fmul: ("multiply"i "(" expr "," expr ")" ) | ( expr "*" expr )
|
|
268
|
+
fdiv: ( "divide"i "(" expr "," expr ")") | ( expr "/" expr )
|
|
269
|
+
fmod: ( "mod"i "(" expr "," expr ")") | ( expr "%" expr )
|
|
270
|
+
fround: "round"i "(" expr "," expr ")"
|
|
271
|
+
fabs: "abs"i "(" expr ")"
|
|
272
|
+
|
|
273
|
+
_math_functions: fadd | fsub | fmul | fdiv | fround | fmod | fabs
|
|
274
|
+
|
|
275
|
+
//generic
|
|
276
|
+
fcast: "cast"i "(" expr "AS"i data_type ")"
|
|
277
|
+
concat: ("concat"i "(" (expr ",")* expr ")") | (expr "||" expr)
|
|
278
|
+
fcoalesce: "coalesce"i "(" (expr ",")* expr ")"
|
|
279
|
+
fcase_when: "WHEN"i (expr | conditional) "THEN"i expr
|
|
280
|
+
fcase_else: "ELSE"i expr
|
|
281
|
+
fcase: "CASE"i (fcase_when)* (fcase_else)? "END"i
|
|
282
|
+
len: "len"i "(" expr ")"
|
|
283
|
+
fnot: "NOT"i expr
|
|
284
|
+
|
|
285
|
+
_generic_functions: fcast | concat | fcoalesce | fcase | len | fnot
|
|
286
|
+
|
|
287
|
+
//constant
|
|
288
|
+
fcurrent_date: "current_date"i "(" ")"
|
|
289
|
+
fcurrent_datetime: "current_datetime"i "(" ")"
|
|
290
|
+
|
|
291
|
+
_constant_functions: fcurrent_date | fcurrent_datetime
|
|
292
|
+
|
|
293
|
+
//string
|
|
294
|
+
like: "like"i "(" expr "," _string_lit ")"
|
|
295
|
+
ilike: "ilike"i "(" expr "," _string_lit ")"
|
|
296
|
+
alt_like: expr "like"i expr
|
|
297
|
+
upper: "upper"i "(" expr ")"
|
|
298
|
+
lower: "lower"i "(" expr ")"
|
|
299
|
+
fsplit: "split"i "(" expr "," _string_lit ")"
|
|
300
|
+
fstrpos: "strpos"i "(" expr "," expr ")"
|
|
301
|
+
fsubstring: "substring"i "(" expr "," expr "," expr ")"
|
|
302
|
+
|
|
303
|
+
_string_functions: like | ilike | upper | lower | fsplit | fstrpos | fsubstring | alt_like
|
|
304
|
+
|
|
305
|
+
// special aggregate
|
|
306
|
+
fgroup: "group"i "(" expr ")" aggregate_over?
|
|
307
|
+
//aggregates
|
|
308
|
+
count: "count"i "(" expr ")"
|
|
309
|
+
count_distinct: "count_distinct"i "(" expr ")"
|
|
310
|
+
sum: "sum"i "(" expr ")"
|
|
311
|
+
avg: "avg"i "(" expr ")"
|
|
312
|
+
max: "max"i "(" expr ")"
|
|
313
|
+
min: "min"i "(" expr ")"
|
|
314
|
+
|
|
315
|
+
//aggregates can force a grain
|
|
316
|
+
aggregate_all: "*"
|
|
317
|
+
aggregate_over: ("BY"i (aggregate_all | over_list))
|
|
318
|
+
aggregate_functions: (count | count_distinct | sum | avg | max | min) aggregate_over?
|
|
319
|
+
|
|
320
|
+
// date functions
|
|
321
|
+
fdate: "date"i "(" expr ")"
|
|
322
|
+
fdatetime: "datetime"i "(" expr ")"
|
|
323
|
+
ftimestamp: "timestamp"i "(" expr ")"
|
|
324
|
+
|
|
325
|
+
fsecond: "second"i "(" expr ")"
|
|
326
|
+
fminute: "minute"i "(" expr ")"
|
|
327
|
+
fhour: "hour"i "(" expr ")"
|
|
328
|
+
fday: "day"i "(" expr ")"
|
|
329
|
+
fday_of_week: "day_of_week"i "(" expr ")"
|
|
330
|
+
fweek: "week"i "(" expr ")"
|
|
331
|
+
fmonth: "month"i "(" expr ")"
|
|
332
|
+
fquarter: "quarter"i "(" expr ")"
|
|
333
|
+
fyear: "year"i "(" expr ")"
|
|
334
|
+
|
|
335
|
+
DATE_PART: "DAY"i | "WEEK"i | "MONTH"i | "QUARTER"i | "YEAR"i | "MINUTE"i | "HOUR"i | "SECOND"i
|
|
336
|
+
fdate_trunc: "date_trunc"i "(" expr "," DATE_PART ")"
|
|
337
|
+
fdate_part: "date_part"i "(" expr "," DATE_PART ")"
|
|
338
|
+
fdate_add: "date_add"i "(" expr "," DATE_PART "," int_lit ")"
|
|
339
|
+
fdate_diff: "date_diff"i "(" expr "," expr "," DATE_PART ")"
|
|
340
|
+
|
|
341
|
+
_date_functions: fdate | fdate_add | fdate_diff | fdatetime | ftimestamp | fsecond | fminute | fhour | fday | fday_of_week | fweek | fmonth | fquarter | fyear | fdate_part | fdate_trunc
|
|
342
|
+
|
|
343
|
+
_static_functions: _string_functions | _math_functions | _generic_functions | _constant_functions| _date_functions
|
|
344
|
+
|
|
345
|
+
// base language constructs
|
|
346
|
+
IDENTIFIER: /[a-zA-Z_][a-zA-Z0-9_\\-\\.\-]*/
|
|
347
|
+
ADDRESS: /[a-zA-Z_][a-zA-Z0-9_\\-\\.\-\*]*/ | /`[a-zA-Z_][a-zA-Z0-9_\\-\\.\-\*]*`/
|
|
348
|
+
|
|
349
|
+
MULTILINE_STRING: /\'{3}(.*?)\'{3}/s
|
|
350
|
+
|
|
351
|
+
DOUBLE_STRING_CHARS: /(?:(?!\${)([^"\\]|\\.))+/+ // any character except "
|
|
352
|
+
SINGLE_STRING_CHARS: /(?:(?!\${)([^'\\]|\\.))+/+ // any character except '
|
|
353
|
+
_single_quote: "'" ( SINGLE_STRING_CHARS )* "'"
|
|
354
|
+
_double_quote: "\"" ( DOUBLE_STRING_CHARS )* "\""
|
|
355
|
+
_string_lit: _single_quote | _double_quote
|
|
356
|
+
|
|
357
|
+
MINUS: "-"
|
|
358
|
+
|
|
359
|
+
int_lit: MINUS? /[0-9]+/
|
|
360
|
+
|
|
361
|
+
float_lit: /[0-9]*\.[0-9]+/
|
|
362
|
+
|
|
363
|
+
array_lit: "[" (literal ",")* literal ","? "]"()
|
|
364
|
+
|
|
365
|
+
!bool_lit: "True"i | "False"i
|
|
366
|
+
|
|
367
|
+
!null_lit: "null"i
|
|
368
|
+
|
|
369
|
+
literal: _string_lit | int_lit | float_lit | bool_lit | null_lit | array_lit
|
|
370
|
+
|
|
371
|
+
MODIFIER: "Optional"i | "Partial"i | "Nullable"i
|
|
372
|
+
|
|
373
|
+
SHORTHAND_MODIFIER: "~"
|
|
374
|
+
|
|
375
|
+
struct_type: "struct" "<" ((data_type | IDENTIFIER) ",")* (data_type | IDENTIFIER) ","? ">"
|
|
376
|
+
|
|
377
|
+
list_type: "list" "<" data_type ">"
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
!data_type: "string"i | "number"i | "numeric"i | "map"i | "list"i | "array"i | "any"i | "int"i | "bigint" | "date"i | "datetime"i | "timestamp"i | "float"i | "bool"i | struct_type | list_type
|
|
381
|
+
|
|
382
|
+
PURPOSE: "key"i | "metric"i | "const"i | "constant"i
|
|
383
|
+
PROPERTY: "property"i
|
|
384
|
+
CONST: "const"i | "constant"i
|
|
385
|
+
AUTO: "AUTO"i
|
|
386
|
+
|
|
387
|
+
// meta functions
|
|
388
|
+
CONCEPTS: "CONCEPTS"i
|
|
389
|
+
DATASOURCES: "DATASOURCES"i
|
|
390
|
+
|
|
391
|
+
show_category: CONCEPTS | DATASOURCES
|
|
392
|
+
|
|
393
|
+
show_statement: "show"i ( show_category | select_statement | persist_statement) _TERMINATOR
|
|
394
|
+
|
|
395
|
+
%import common.WS_INLINE -> _WHITESPACE
|
|
396
|
+
%import common.WS
|
|
397
|
+
%ignore WS
|
|
398
|
+
""" # noqa: E501
|
|
399
|
+
|
|
400
|
+
PARSER = Lark(
|
|
401
|
+
grammar, start="start", propagate_positions=True, g_regex_flags=IGNORECASE
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def parse_concept_reference(
|
|
406
|
+
name: str, environment: Environment, purpose: Optional[Purpose] = None
|
|
407
|
+
) -> Tuple[str, str, str, str | None]:
|
|
408
|
+
parent = None
|
|
409
|
+
if "." in name:
|
|
410
|
+
if purpose == Purpose.PROPERTY:
|
|
411
|
+
parent, name = name.rsplit(".", 1)
|
|
412
|
+
namespace = environment.concepts[parent].namespace or DEFAULT_NAMESPACE
|
|
413
|
+
lookup = f"{namespace}.{name}"
|
|
414
|
+
else:
|
|
415
|
+
namespace, name = name.rsplit(".", 1)
|
|
416
|
+
lookup = f"{namespace}.{name}"
|
|
417
|
+
else:
|
|
418
|
+
namespace = environment.namespace or DEFAULT_NAMESPACE
|
|
419
|
+
lookup = name
|
|
420
|
+
return lookup, namespace, name, parent
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def unwrap_transformation(
|
|
424
|
+
input: Union[
|
|
425
|
+
FilterItem,
|
|
426
|
+
WindowItem,
|
|
427
|
+
Concept,
|
|
428
|
+
Function,
|
|
429
|
+
AggregateWrapper,
|
|
430
|
+
int,
|
|
431
|
+
str,
|
|
432
|
+
float,
|
|
433
|
+
bool,
|
|
434
|
+
]
|
|
435
|
+
) -> Function | FilterItem | WindowItem | AggregateWrapper:
|
|
436
|
+
if isinstance(input, Function):
|
|
437
|
+
return input
|
|
438
|
+
elif isinstance(input, AggregateWrapper):
|
|
439
|
+
return input
|
|
440
|
+
elif isinstance(input, Concept):
|
|
441
|
+
return Function(
|
|
442
|
+
operator=FunctionType.ALIAS,
|
|
443
|
+
output_datatype=input.datatype,
|
|
444
|
+
output_purpose=input.purpose,
|
|
445
|
+
arguments=[input],
|
|
446
|
+
)
|
|
447
|
+
elif isinstance(input, FilterItem):
|
|
448
|
+
return input
|
|
449
|
+
elif isinstance(input, WindowItem):
|
|
450
|
+
return input
|
|
451
|
+
elif isinstance(input, Parenthetical):
|
|
452
|
+
return unwrap_transformation(input.content)
|
|
453
|
+
else:
|
|
454
|
+
return Function(
|
|
455
|
+
operator=FunctionType.CONSTANT,
|
|
456
|
+
output_datatype=arg_to_datatype(input),
|
|
457
|
+
output_purpose=Purpose.CONSTANT,
|
|
458
|
+
arguments=[input],
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
class ParseToObjects(Transformer):
|
|
463
|
+
def __init__(
|
|
464
|
+
self,
|
|
465
|
+
visit_tokens,
|
|
466
|
+
text,
|
|
467
|
+
environment: Environment,
|
|
468
|
+
parse_address: str | None = None,
|
|
469
|
+
parsed: dict | None = None,
|
|
470
|
+
):
|
|
471
|
+
Transformer.__init__(self, visit_tokens)
|
|
472
|
+
self.text = text
|
|
473
|
+
self.environment: Environment = environment
|
|
474
|
+
self.imported: set[str] = set()
|
|
475
|
+
self.parse_address = parse_address or "root"
|
|
476
|
+
self.parsed: dict[str, ParseToObjects] = parsed if parsed else {}
|
|
477
|
+
# we do a second pass to pick up circular dependencies
|
|
478
|
+
# after initial parsing
|
|
479
|
+
self.pass_count = 1
|
|
480
|
+
|
|
481
|
+
def hydrate_missing(self):
|
|
482
|
+
self.pass_count = 2
|
|
483
|
+
for k, v in self.parsed.items():
|
|
484
|
+
if v.pass_count == 2:
|
|
485
|
+
continue
|
|
486
|
+
v.hydrate_missing()
|
|
487
|
+
self.environment.concepts.fail_on_missing = True
|
|
488
|
+
reparsed = self.transform(PARSER.parse(self.text))
|
|
489
|
+
self.environment.concepts.undefined = {}
|
|
490
|
+
return reparsed
|
|
491
|
+
|
|
492
|
+
def process_function_args(
|
|
493
|
+
self, args, meta: Meta, concept_arguments: Optional[LooseConceptList] = None
|
|
494
|
+
):
|
|
495
|
+
final: List[Concept | Function] = []
|
|
496
|
+
for arg in args:
|
|
497
|
+
# if a function has an anonymous function argument
|
|
498
|
+
# create an implicit concept
|
|
499
|
+
while isinstance(arg, Parenthetical):
|
|
500
|
+
arg = arg.content
|
|
501
|
+
if isinstance(arg, Function):
|
|
502
|
+
# if it's not an aggregate function, we can skip the virtual concepts
|
|
503
|
+
# to simplify anonymous function handling
|
|
504
|
+
if arg.operator not in FunctionClass.AGGREGATE_FUNCTIONS.value:
|
|
505
|
+
final.append(arg)
|
|
506
|
+
continue
|
|
507
|
+
id_hash = string_to_hash(str(arg))
|
|
508
|
+
concept = function_to_concept(
|
|
509
|
+
arg,
|
|
510
|
+
name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
|
|
511
|
+
namespace=self.environment.namespace,
|
|
512
|
+
)
|
|
513
|
+
# to satisfy mypy, concept will always have metadata
|
|
514
|
+
if concept.metadata:
|
|
515
|
+
concept.metadata.line_number = meta.line
|
|
516
|
+
self.environment.add_concept(concept, meta=meta)
|
|
517
|
+
final.append(concept)
|
|
518
|
+
elif isinstance(arg, FilterItem):
|
|
519
|
+
id_hash = string_to_hash(str(arg))
|
|
520
|
+
concept = filter_item_to_concept(
|
|
521
|
+
arg,
|
|
522
|
+
name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
|
|
523
|
+
namespace=self.environment.namespace,
|
|
524
|
+
)
|
|
525
|
+
if concept.metadata:
|
|
526
|
+
concept.metadata.line_number = meta.line
|
|
527
|
+
self.environment.add_concept(concept, meta=meta)
|
|
528
|
+
final.append(concept)
|
|
529
|
+
elif isinstance(arg, WindowItem):
|
|
530
|
+
id_hash = string_to_hash(str(arg))
|
|
531
|
+
concept = window_item_to_concept(
|
|
532
|
+
arg,
|
|
533
|
+
namespace=self.environment.namespace,
|
|
534
|
+
name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
|
|
535
|
+
)
|
|
536
|
+
if concept.metadata:
|
|
537
|
+
concept.metadata.line_number = meta.line
|
|
538
|
+
self.environment.add_concept(concept, meta=meta)
|
|
539
|
+
final.append(concept)
|
|
540
|
+
elif isinstance(arg, AggregateWrapper):
|
|
541
|
+
id_hash = string_to_hash(str(arg))
|
|
542
|
+
concept = agg_wrapper_to_concept(
|
|
543
|
+
arg,
|
|
544
|
+
namespace=self.environment.namespace,
|
|
545
|
+
name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
|
|
546
|
+
)
|
|
547
|
+
if concept.metadata:
|
|
548
|
+
concept.metadata.line_number = meta.line
|
|
549
|
+
self.environment.add_concept(concept, meta=meta)
|
|
550
|
+
final.append(concept)
|
|
551
|
+
# we don't need virtual types for most constants
|
|
552
|
+
elif isinstance(arg, (ListWrapper)):
|
|
553
|
+
id_hash = string_to_hash(str(arg))
|
|
554
|
+
concept = constant_to_concept(
|
|
555
|
+
arg,
|
|
556
|
+
name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
|
|
557
|
+
namespace=self.environment.namespace,
|
|
558
|
+
)
|
|
559
|
+
if concept.metadata:
|
|
560
|
+
concept.metadata.line_number = meta.line
|
|
561
|
+
self.environment.add_concept(concept, meta=meta)
|
|
562
|
+
final.append(concept)
|
|
563
|
+
else:
|
|
564
|
+
final.append(arg)
|
|
565
|
+
return final
|
|
566
|
+
|
|
567
|
+
def start(self, args):
|
|
568
|
+
return args
|
|
569
|
+
|
|
570
|
+
def block(self, args):
|
|
571
|
+
output = args[0]
|
|
572
|
+
if isinstance(output, ConceptDeclarationStatement):
|
|
573
|
+
if len(args) > 1 and isinstance(args[1], Comment):
|
|
574
|
+
output.concept.metadata.description = (
|
|
575
|
+
output.concept.metadata.description
|
|
576
|
+
or args[1].text.split("#")[1].strip()
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
return args[0]
|
|
580
|
+
|
|
581
|
+
def metadata(self, args):
|
|
582
|
+
pairs = {key: val for key, val in zip(args[::2], args[1::2])}
|
|
583
|
+
return Metadata(**pairs)
|
|
584
|
+
|
|
585
|
+
def IDENTIFIER(self, args) -> str:
|
|
586
|
+
return args.value
|
|
587
|
+
|
|
588
|
+
def ADDRESS(self, args) -> str:
|
|
589
|
+
return args.value
|
|
590
|
+
|
|
591
|
+
def STRING_CHARS(self, args) -> str:
|
|
592
|
+
return args.value
|
|
593
|
+
|
|
594
|
+
def SINGLE_STRING_CHARS(self, args) -> str:
|
|
595
|
+
return args.value
|
|
596
|
+
|
|
597
|
+
def DOUBLE_STRING_CHARS(self, args) -> str:
|
|
598
|
+
return args.value
|
|
599
|
+
|
|
600
|
+
def MINUS(self, args) -> str:
|
|
601
|
+
return "-"
|
|
602
|
+
|
|
603
|
+
@v_args(meta=True)
|
|
604
|
+
def struct_type(self, meta: Meta, args) -> StructType:
|
|
605
|
+
final: list[DataType | MapType | ListType | StructType | Concept] = []
|
|
606
|
+
for arg in args:
|
|
607
|
+
if not isinstance(arg, (DataType, ListType, StructType)):
|
|
608
|
+
new = self.environment.concepts.__getitem__( # type: ignore
|
|
609
|
+
key=arg, line_no=meta.line
|
|
610
|
+
)
|
|
611
|
+
final.append(new)
|
|
612
|
+
else:
|
|
613
|
+
final.append(arg)
|
|
614
|
+
return StructType(fields=final)
|
|
615
|
+
|
|
616
|
+
def list_type(self, args) -> ListType:
|
|
617
|
+
return ListType(type=args[0])
|
|
618
|
+
|
|
619
|
+
def data_type(self, args) -> DataType | ListType | StructType:
|
|
620
|
+
resolved = args[0]
|
|
621
|
+
if isinstance(resolved, StructType):
|
|
622
|
+
return resolved
|
|
623
|
+
elif isinstance(resolved, ListType):
|
|
624
|
+
return resolved
|
|
625
|
+
return DataType(args[0].lower())
|
|
626
|
+
|
|
627
|
+
def array_comparison(self, args) -> ComparisonOperator:
|
|
628
|
+
return ComparisonOperator([x.value.lower() for x in args])
|
|
629
|
+
|
|
630
|
+
def COMPARISON_OPERATOR(self, args) -> ComparisonOperator:
|
|
631
|
+
return ComparisonOperator(args)
|
|
632
|
+
|
|
633
|
+
def LOGICAL_OPERATOR(self, args) -> BooleanOperator:
|
|
634
|
+
return BooleanOperator(args.lower())
|
|
635
|
+
|
|
636
|
+
def concept_assignment(self, args):
|
|
637
|
+
return args
|
|
638
|
+
|
|
639
|
+
@v_args(meta=True)
|
|
640
|
+
def column_assignment(self, meta: Meta, args):
|
|
641
|
+
# TODO -> deal with conceptual modifiers
|
|
642
|
+
modifiers = []
|
|
643
|
+
concept = args[1]
|
|
644
|
+
# recursively collect modifiers
|
|
645
|
+
while len(concept) > 1:
|
|
646
|
+
modifiers.append(concept[0])
|
|
647
|
+
concept = concept[1]
|
|
648
|
+
resolved = self.environment.concepts.__getitem__( # type: ignore
|
|
649
|
+
key=concept[0], line_no=meta.line
|
|
650
|
+
)
|
|
651
|
+
return ColumnAssignment(alias=args[0], modifiers=modifiers, concept=resolved)
|
|
652
|
+
|
|
653
|
+
def _TERMINATOR(self, args):
|
|
654
|
+
return None
|
|
655
|
+
|
|
656
|
+
def MODIFIER(self, args) -> Modifier:
|
|
657
|
+
return Modifier(args.value)
|
|
658
|
+
|
|
659
|
+
def SHORTHAND_MODIFIER(self, args) -> Modifier:
|
|
660
|
+
return Modifier(args.value)
|
|
661
|
+
|
|
662
|
+
def PURPOSE(self, args) -> Purpose:
|
|
663
|
+
return Purpose(args.value)
|
|
664
|
+
|
|
665
|
+
def AUTO(self, args) -> Purpose:
|
|
666
|
+
return Purpose.AUTO
|
|
667
|
+
|
|
668
|
+
def CONST(self, args) -> Purpose:
|
|
669
|
+
return Purpose.CONSTANT
|
|
670
|
+
|
|
671
|
+
def CONSTANT(self, args) -> Purpose:
|
|
672
|
+
return Purpose.CONSTANT
|
|
673
|
+
|
|
674
|
+
def PROPERTY(self, args):
|
|
675
|
+
return Purpose.PROPERTY
|
|
676
|
+
|
|
677
|
+
@v_args(meta=True)
|
|
678
|
+
def prop_ident(self, meta: Meta, args) -> Tuple[List[Concept], str]:
|
|
679
|
+
return [self.environment.concepts[grain] for grain in args[:-1]], args[-1]
|
|
680
|
+
|
|
681
|
+
@v_args(meta=True)
|
|
682
|
+
def concept_property_declaration(self, meta: Meta, args) -> Concept:
|
|
683
|
+
|
|
684
|
+
metadata = None
|
|
685
|
+
modifiers = []
|
|
686
|
+
for arg in args:
|
|
687
|
+
if isinstance(arg, Metadata):
|
|
688
|
+
metadata = arg
|
|
689
|
+
if isinstance(arg, Modifier):
|
|
690
|
+
modifiers.append(arg)
|
|
691
|
+
|
|
692
|
+
declaration = args[1]
|
|
693
|
+
if isinstance(declaration, (tuple)):
|
|
694
|
+
parents, name = declaration
|
|
695
|
+
if "." in name:
|
|
696
|
+
namespace, name = name.split(".", 1)
|
|
697
|
+
else:
|
|
698
|
+
namespace = self.environment.namespace or DEFAULT_NAMESPACE
|
|
699
|
+
else:
|
|
700
|
+
if "." not in declaration:
|
|
701
|
+
raise ParseError(
|
|
702
|
+
f"Property declaration {args[1]} must be fully qualified with a parent key"
|
|
703
|
+
)
|
|
704
|
+
grain, name = declaration.rsplit(".", 1)
|
|
705
|
+
parent = self.environment.concepts[grain]
|
|
706
|
+
parents = [parent]
|
|
707
|
+
namespace = parent.namespace
|
|
708
|
+
concept = Concept(
|
|
709
|
+
name=name,
|
|
710
|
+
datatype=args[2],
|
|
711
|
+
purpose=args[0],
|
|
712
|
+
metadata=metadata,
|
|
713
|
+
grain=Grain(components=parents),
|
|
714
|
+
namespace=namespace,
|
|
715
|
+
keys=parents,
|
|
716
|
+
modifiers=modifiers,
|
|
717
|
+
)
|
|
718
|
+
self.environment.add_concept(concept, meta)
|
|
719
|
+
return concept
|
|
720
|
+
|
|
721
|
+
@v_args(meta=True)
|
|
722
|
+
def concept_declaration(self, meta: Meta, args) -> ConceptDeclarationStatement:
|
|
723
|
+
metadata = None
|
|
724
|
+
modifiers = []
|
|
725
|
+
for arg in args:
|
|
726
|
+
if isinstance(arg, Metadata):
|
|
727
|
+
metadata = arg
|
|
728
|
+
if isinstance(arg, Modifier):
|
|
729
|
+
modifiers.append(arg)
|
|
730
|
+
name = args[1]
|
|
731
|
+
lookup, namespace, name, parent = parse_concept_reference(
|
|
732
|
+
name, self.environment
|
|
733
|
+
)
|
|
734
|
+
concept = Concept(
|
|
735
|
+
name=name,
|
|
736
|
+
datatype=args[2],
|
|
737
|
+
purpose=args[0],
|
|
738
|
+
metadata=metadata,
|
|
739
|
+
namespace=namespace,
|
|
740
|
+
modifiers=modifiers,
|
|
741
|
+
)
|
|
742
|
+
if concept.metadata:
|
|
743
|
+
concept.metadata.line_number = meta.line
|
|
744
|
+
self.environment.add_concept(concept, meta=meta)
|
|
745
|
+
return ConceptDeclarationStatement(concept=concept)
|
|
746
|
+
|
|
747
|
+
@v_args(meta=True)
|
|
748
|
+
def concept_derivation(self, meta: Meta, args) -> ConceptDerivation:
|
|
749
|
+
|
|
750
|
+
if len(args) > 3:
|
|
751
|
+
metadata = args[3]
|
|
752
|
+
else:
|
|
753
|
+
metadata = None
|
|
754
|
+
purpose = args[0]
|
|
755
|
+
if purpose == Purpose.AUTO:
|
|
756
|
+
purpose = None
|
|
757
|
+
name = args[1]
|
|
758
|
+
lookup, namespace, name, parent_concept = parse_concept_reference(
|
|
759
|
+
name, self.environment, purpose
|
|
760
|
+
)
|
|
761
|
+
source_value = args[2]
|
|
762
|
+
# we need to strip off every parenthetical to see what is being assigned.
|
|
763
|
+
while isinstance(source_value, Parenthetical):
|
|
764
|
+
source_value = source_value.content
|
|
765
|
+
|
|
766
|
+
if isinstance(source_value, FilterItem):
|
|
767
|
+
concept = filter_item_to_concept(
|
|
768
|
+
source_value,
|
|
769
|
+
name=name,
|
|
770
|
+
namespace=namespace,
|
|
771
|
+
purpose=purpose,
|
|
772
|
+
metadata=metadata,
|
|
773
|
+
)
|
|
774
|
+
|
|
775
|
+
if concept.metadata:
|
|
776
|
+
concept.metadata.line_number = meta.line
|
|
777
|
+
self.environment.add_concept(concept, meta=meta)
|
|
778
|
+
return ConceptDerivation(concept=concept)
|
|
779
|
+
elif isinstance(source_value, WindowItem):
|
|
780
|
+
|
|
781
|
+
concept = window_item_to_concept(
|
|
782
|
+
source_value,
|
|
783
|
+
name=name,
|
|
784
|
+
namespace=namespace,
|
|
785
|
+
purpose=purpose,
|
|
786
|
+
metadata=metadata,
|
|
787
|
+
)
|
|
788
|
+
if concept.metadata:
|
|
789
|
+
concept.metadata.line_number = meta.line
|
|
790
|
+
self.environment.add_concept(concept, meta=meta)
|
|
791
|
+
return ConceptDerivation(concept=concept)
|
|
792
|
+
elif isinstance(source_value, AggregateWrapper):
|
|
793
|
+
concept = agg_wrapper_to_concept(
|
|
794
|
+
source_value,
|
|
795
|
+
namespace=namespace,
|
|
796
|
+
name=name,
|
|
797
|
+
metadata=metadata,
|
|
798
|
+
purpose=purpose,
|
|
799
|
+
)
|
|
800
|
+
if concept.metadata:
|
|
801
|
+
concept.metadata.line_number = meta.line
|
|
802
|
+
self.environment.add_concept(concept, meta=meta)
|
|
803
|
+
return ConceptDerivation(concept=concept)
|
|
804
|
+
elif isinstance(source_value, CONSTANT_TYPES):
|
|
805
|
+
concept = constant_to_concept(
|
|
806
|
+
source_value,
|
|
807
|
+
name=name,
|
|
808
|
+
namespace=namespace,
|
|
809
|
+
purpose=purpose,
|
|
810
|
+
metadata=metadata,
|
|
811
|
+
)
|
|
812
|
+
if concept.metadata:
|
|
813
|
+
concept.metadata.line_number = meta.line
|
|
814
|
+
self.environment.add_concept(concept, meta=meta)
|
|
815
|
+
return ConceptDerivation(concept=concept)
|
|
816
|
+
|
|
817
|
+
elif isinstance(source_value, Function):
|
|
818
|
+
function: Function = source_value
|
|
819
|
+
|
|
820
|
+
concept = function_to_concept(
|
|
821
|
+
function,
|
|
822
|
+
name=name,
|
|
823
|
+
namespace=namespace,
|
|
824
|
+
)
|
|
825
|
+
if concept.metadata:
|
|
826
|
+
concept.metadata.line_number = meta.line
|
|
827
|
+
self.environment.add_concept(concept, meta=meta)
|
|
828
|
+
return ConceptDerivation(concept=concept)
|
|
829
|
+
|
|
830
|
+
raise SyntaxError(
|
|
831
|
+
f"Received invalid type {type(args[2])} {args[2]} as input to select"
|
|
832
|
+
" transform"
|
|
833
|
+
)
|
|
834
|
+
|
|
835
|
+
@v_args(meta=True)
|
|
836
|
+
def rowset_derivation_statement(
|
|
837
|
+
self, meta: Meta, args
|
|
838
|
+
) -> RowsetDerivationStatement:
|
|
839
|
+
name = args[0]
|
|
840
|
+
select: SelectStatement | MultiSelectStatement = args[1]
|
|
841
|
+
output = RowsetDerivationStatement(
|
|
842
|
+
name=name,
|
|
843
|
+
select=select,
|
|
844
|
+
namespace=self.environment.namespace or DEFAULT_NAMESPACE,
|
|
845
|
+
)
|
|
846
|
+
for new_concept in output.derived_concepts:
|
|
847
|
+
if new_concept.metadata:
|
|
848
|
+
new_concept.metadata.line_number = meta.line
|
|
849
|
+
self.environment.add_concept(new_concept)
|
|
850
|
+
|
|
851
|
+
return output
|
|
852
|
+
|
|
853
|
+
@v_args(meta=True)
|
|
854
|
+
def constant_derivation(self, meta: Meta, args) -> Concept:
|
|
855
|
+
if len(args) > 3:
|
|
856
|
+
metadata = args[3]
|
|
857
|
+
else:
|
|
858
|
+
metadata = None
|
|
859
|
+
name = args[1]
|
|
860
|
+
constant: Union[str, float, int, bool] = args[2]
|
|
861
|
+
lookup, namespace, name, parent = parse_concept_reference(
|
|
862
|
+
name, self.environment
|
|
863
|
+
)
|
|
864
|
+
concept = Concept(
|
|
865
|
+
name=name,
|
|
866
|
+
datatype=arg_to_datatype(constant),
|
|
867
|
+
purpose=Purpose.CONSTANT,
|
|
868
|
+
metadata=metadata,
|
|
869
|
+
lineage=Function(
|
|
870
|
+
operator=FunctionType.CONSTANT,
|
|
871
|
+
output_datatype=arg_to_datatype(constant),
|
|
872
|
+
output_purpose=Purpose.CONSTANT,
|
|
873
|
+
arguments=[constant],
|
|
874
|
+
),
|
|
875
|
+
grain=Grain(components=[]),
|
|
876
|
+
namespace=namespace,
|
|
877
|
+
)
|
|
878
|
+
if concept.metadata:
|
|
879
|
+
concept.metadata.line_number = meta.line
|
|
880
|
+
self.environment.add_concept(concept, meta)
|
|
881
|
+
return concept
|
|
882
|
+
|
|
883
|
+
@v_args(meta=True)
|
|
884
|
+
def concept(self, meta: Meta, args) -> ConceptDeclarationStatement:
|
|
885
|
+
|
|
886
|
+
if isinstance(args[0], Concept):
|
|
887
|
+
concept: Concept = args[0]
|
|
888
|
+
else:
|
|
889
|
+
concept = args[0].concept
|
|
890
|
+
if concept.metadata:
|
|
891
|
+
concept.metadata.line_number = meta.line
|
|
892
|
+
return ConceptDeclarationStatement(concept=concept)
|
|
893
|
+
|
|
894
|
+
def column_assignment_list(self, args):
|
|
895
|
+
return args
|
|
896
|
+
|
|
897
|
+
def column_list(self, args) -> List:
|
|
898
|
+
return args
|
|
899
|
+
|
|
900
|
+
def grain_clause(self, args) -> Grain:
|
|
901
|
+
# namespace=self.environment.namespace,
|
|
902
|
+
return Grain(components=[self.environment.concepts[a] for a in args[0]])
|
|
903
|
+
|
|
904
|
+
def raw_column_assignment(self, args):
|
|
905
|
+
return RawColumnExpr(text=args[0][3:-3])
|
|
906
|
+
|
|
907
|
+
@v_args(meta=True)
|
|
908
|
+
def datasource(self, meta: Meta, args):
|
|
909
|
+
name = args[0]
|
|
910
|
+
columns: List[ColumnAssignment] = args[1]
|
|
911
|
+
grain: Optional[Grain] = None
|
|
912
|
+
address: Optional[Address] = None
|
|
913
|
+
for val in args[1:]:
|
|
914
|
+
if isinstance(val, Address):
|
|
915
|
+
address = val
|
|
916
|
+
elif isinstance(val, Grain):
|
|
917
|
+
grain = val
|
|
918
|
+
elif isinstance(val, Query):
|
|
919
|
+
address = Address(location=f"({val.text})")
|
|
920
|
+
if not address:
|
|
921
|
+
raise ValueError(
|
|
922
|
+
"Malformed datasource, missing address or query declaration"
|
|
923
|
+
)
|
|
924
|
+
datasource = Datasource(
|
|
925
|
+
identifier=name,
|
|
926
|
+
columns=columns,
|
|
927
|
+
# grain will be set by default from args
|
|
928
|
+
# TODO: move to factory
|
|
929
|
+
grain=grain, # type: ignore
|
|
930
|
+
address=address,
|
|
931
|
+
namespace=self.environment.namespace,
|
|
932
|
+
)
|
|
933
|
+
for column in columns:
|
|
934
|
+
column.concept = column.concept.with_grain(datasource.grain)
|
|
935
|
+
self.environment.datasources[datasource.identifier] = datasource
|
|
936
|
+
return datasource
|
|
937
|
+
|
|
938
|
+
@v_args(meta=True)
|
|
939
|
+
def comment(self, meta: Meta, args):
|
|
940
|
+
assert len(args) == 1
|
|
941
|
+
return Comment(text=args[0].value)
|
|
942
|
+
|
|
943
|
+
@v_args(meta=True)
|
|
944
|
+
def select_transform(self, meta, args) -> ConceptTransform:
|
|
945
|
+
|
|
946
|
+
output: str = args[1]
|
|
947
|
+
function = unwrap_transformation(args[0])
|
|
948
|
+
lookup, namespace, output, parent = parse_concept_reference(
|
|
949
|
+
output, self.environment
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
if isinstance(function, AggregateWrapper):
|
|
953
|
+
concept = agg_wrapper_to_concept(function, namespace=namespace, name=output)
|
|
954
|
+
elif isinstance(function, WindowItem):
|
|
955
|
+
concept = window_item_to_concept(function, namespace=namespace, name=output)
|
|
956
|
+
elif isinstance(function, FilterItem):
|
|
957
|
+
concept = filter_item_to_concept(function, namespace=namespace, name=output)
|
|
958
|
+
elif isinstance(function, CONSTANT_TYPES):
|
|
959
|
+
concept = constant_to_concept(function, namespace=namespace, name=output)
|
|
960
|
+
elif isinstance(function, Function):
|
|
961
|
+
concept = function_to_concept(function, namespace=namespace, name=output)
|
|
962
|
+
else:
|
|
963
|
+
if function.output_purpose == Purpose.PROPERTY:
|
|
964
|
+
pkeys = [x for x in function.arguments if isinstance(x, Concept)]
|
|
965
|
+
grain = Grain(components=pkeys)
|
|
966
|
+
keys = tuple(grain.components_copy)
|
|
967
|
+
else:
|
|
968
|
+
grain = None
|
|
969
|
+
keys = None
|
|
970
|
+
concept = Concept(
|
|
971
|
+
name=output,
|
|
972
|
+
datatype=function.output_datatype,
|
|
973
|
+
purpose=function.output_purpose,
|
|
974
|
+
lineage=function,
|
|
975
|
+
namespace=namespace,
|
|
976
|
+
grain=Grain(components=[]) if not grain else grain,
|
|
977
|
+
keys=keys,
|
|
978
|
+
)
|
|
979
|
+
if concept.metadata:
|
|
980
|
+
concept.metadata.line_number = meta.line
|
|
981
|
+
self.environment.add_concept(concept, meta=meta)
|
|
982
|
+
return ConceptTransform(function=function, output=concept)
|
|
983
|
+
|
|
984
|
+
@v_args(meta=True)
|
|
985
|
+
def concept_nullable_modifier(self, meta: Meta, args) -> Modifier:
|
|
986
|
+
return Modifier.NULLABLE
|
|
987
|
+
|
|
988
|
+
@v_args(meta=True)
|
|
989
|
+
def select_hide_modifier(self, meta: Meta, args) -> Modifier:
|
|
990
|
+
return Modifier.HIDDEN
|
|
991
|
+
|
|
992
|
+
@v_args(meta=True)
|
|
993
|
+
def select_partial_modifier(self, meta: Meta, args) -> Modifier:
|
|
994
|
+
return Modifier.PARTIAL
|
|
995
|
+
|
|
996
|
+
@v_args(meta=True)
|
|
997
|
+
def select_item(self, meta: Meta, args) -> Optional[SelectItem]:
|
|
998
|
+
modifiers = [arg for arg in args if isinstance(arg, Modifier)]
|
|
999
|
+
args = [arg for arg in args if not isinstance(arg, (Modifier, Comment))]
|
|
1000
|
+
|
|
1001
|
+
if not args:
|
|
1002
|
+
return None
|
|
1003
|
+
if len(args) != 1:
|
|
1004
|
+
raise ParseError(
|
|
1005
|
+
"Malformed select statement"
|
|
1006
|
+
f" {args} {self.text[meta.start_pos:meta.end_pos]}"
|
|
1007
|
+
)
|
|
1008
|
+
content = args[0]
|
|
1009
|
+
if isinstance(content, ConceptTransform):
|
|
1010
|
+
return SelectItem(content=content, modifiers=modifiers)
|
|
1011
|
+
return SelectItem(
|
|
1012
|
+
content=self.environment.concepts.__getitem__(content, meta.line),
|
|
1013
|
+
modifiers=modifiers,
|
|
1014
|
+
)
|
|
1015
|
+
|
|
1016
|
+
def select_list(self, args):
|
|
1017
|
+
return [arg for arg in args if arg]
|
|
1018
|
+
|
|
1019
|
+
def limit(self, args):
|
|
1020
|
+
return Limit(count=int(args[0].value))
|
|
1021
|
+
|
|
1022
|
+
def ORDERING(self, args):
|
|
1023
|
+
return Ordering(args.lower())
|
|
1024
|
+
|
|
1025
|
+
def order_list(self, args):
|
|
1026
|
+
return [OrderItem(expr=x, order=y) for x, y in zip(args[::2], args[1::2])]
|
|
1027
|
+
|
|
1028
|
+
def order_by(self, args):
|
|
1029
|
+
return OrderBy(items=args[0])
|
|
1030
|
+
|
|
1031
|
+
def over_list(self, args):
|
|
1032
|
+
return [self.environment.concepts[x] for x in args]
|
|
1033
|
+
|
|
1034
|
+
@v_args(meta=True)
|
|
1035
|
+
def merge_statement(self, meta: Meta, args) -> MergeStatement:
|
|
1036
|
+
|
|
1037
|
+
parsed = [self.environment.concepts[x] for x in args]
|
|
1038
|
+
datatypes = {x.datatype for x in parsed}
|
|
1039
|
+
if not len(datatypes) == 1:
|
|
1040
|
+
raise SyntaxError(
|
|
1041
|
+
f"Cannot merge concepts with different datatypes {datatypes}"
|
|
1042
|
+
f"line: {meta.line} concepts: {[x.address for x in parsed]}"
|
|
1043
|
+
)
|
|
1044
|
+
merge = MergeStatement(concepts=parsed, datatype=datatypes.pop())
|
|
1045
|
+
new = merge.merge_concept
|
|
1046
|
+
self.environment.add_concept(new, meta=meta)
|
|
1047
|
+
return merge
|
|
1048
|
+
|
|
1049
|
+
def import_statement(self, args: list[str]):
|
|
1050
|
+
alias = args[-1]
|
|
1051
|
+
path = args[0].split(".")
|
|
1052
|
+
|
|
1053
|
+
target = join(self.environment.working_path, *path) + ".preql"
|
|
1054
|
+
self.imported.add(target)
|
|
1055
|
+
if target in self.parsed:
|
|
1056
|
+
nparser = self.parsed[target]
|
|
1057
|
+
else:
|
|
1058
|
+
try:
|
|
1059
|
+
with open(target, "r", encoding="utf-8") as f:
|
|
1060
|
+
text = f.read()
|
|
1061
|
+
nparser = ParseToObjects(
|
|
1062
|
+
visit_tokens=True,
|
|
1063
|
+
text=text,
|
|
1064
|
+
environment=Environment(
|
|
1065
|
+
working_path=dirname(target),
|
|
1066
|
+
# namespace=alias,
|
|
1067
|
+
),
|
|
1068
|
+
parse_address=target,
|
|
1069
|
+
parsed={**self.parsed, **{self.parse_address: self}},
|
|
1070
|
+
)
|
|
1071
|
+
nparser.transform(PARSER.parse(text))
|
|
1072
|
+
self.parsed[target] = nparser
|
|
1073
|
+
except Exception as e:
|
|
1074
|
+
raise ImportError(
|
|
1075
|
+
f"Unable to import file {dirname(target)}, parsing error: {e}"
|
|
1076
|
+
)
|
|
1077
|
+
|
|
1078
|
+
for key, concept in nparser.environment.concepts.items():
|
|
1079
|
+
# self.environment.concepts[f"{alias}.{key}"] = concept.with_namespace(new_namespace)
|
|
1080
|
+
self.environment.add_concept(concept.with_namespace(alias))
|
|
1081
|
+
|
|
1082
|
+
for key, datasource in nparser.environment.datasources.items():
|
|
1083
|
+
self.environment.add_datasource(datasource.with_namespace(alias))
|
|
1084
|
+
# self.environment.datasources[f"{alias}.{key}"] = datasource.with_namespace(new_namespace)
|
|
1085
|
+
|
|
1086
|
+
self.environment.imports[alias] = ImportStatement(alias=alias, path=args[0])
|
|
1087
|
+
return None
|
|
1088
|
+
|
|
1089
|
+
@v_args(meta=True)
|
|
1090
|
+
def show_category(self, meta: Meta, args) -> ShowCategory:
|
|
1091
|
+
return ShowCategory(args[0])
|
|
1092
|
+
|
|
1093
|
+
@v_args(meta=True)
|
|
1094
|
+
def show_statement(self, meta: Meta, args) -> ShowStatement:
|
|
1095
|
+
return ShowStatement(content=args[0])
|
|
1096
|
+
|
|
1097
|
+
@v_args(meta=True)
|
|
1098
|
+
def persist_statement(self, meta: Meta, args) -> PersistStatement:
|
|
1099
|
+
identifier: str = args[0]
|
|
1100
|
+
address: str = args[1]
|
|
1101
|
+
select: SelectStatement = args[2]
|
|
1102
|
+
if len(args) > 3:
|
|
1103
|
+
grain: Grain | None = args[3]
|
|
1104
|
+
else:
|
|
1105
|
+
grain = None
|
|
1106
|
+
new_datasource = select.to_datasource(
|
|
1107
|
+
namespace=(
|
|
1108
|
+
self.environment.namespace
|
|
1109
|
+
if self.environment.namespace
|
|
1110
|
+
else DEFAULT_NAMESPACE
|
|
1111
|
+
),
|
|
1112
|
+
identifier=identifier,
|
|
1113
|
+
address=Address(location=address),
|
|
1114
|
+
grain=grain,
|
|
1115
|
+
)
|
|
1116
|
+
return PersistStatement(select=select, datasource=new_datasource)
|
|
1117
|
+
|
|
1118
|
+
@v_args(meta=True)
|
|
1119
|
+
def align_item(self, meta: Meta, args) -> AlignItem:
|
|
1120
|
+
return AlignItem(
|
|
1121
|
+
alias=args[0],
|
|
1122
|
+
namespace=self.environment.namespace,
|
|
1123
|
+
concepts=[self.environment.concepts[arg] for arg in args[1:]],
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
@v_args(meta=True)
|
|
1127
|
+
def align_clause(self, meta: Meta, args) -> AlignClause:
|
|
1128
|
+
return AlignClause(items=args)
|
|
1129
|
+
|
|
1130
|
+
@v_args(meta=True)
|
|
1131
|
+
def multi_select_statement(self, meta: Meta, args) -> MultiSelectStatement:
|
|
1132
|
+
selects = []
|
|
1133
|
+
align: AlignClause | None = None
|
|
1134
|
+
limit: int | None = None
|
|
1135
|
+
order_by: OrderBy | None = None
|
|
1136
|
+
where: WhereClause | None = None
|
|
1137
|
+
for arg in args:
|
|
1138
|
+
if isinstance(arg, SelectStatement):
|
|
1139
|
+
selects.append(arg)
|
|
1140
|
+
elif isinstance(arg, Limit):
|
|
1141
|
+
limit = arg.count
|
|
1142
|
+
elif isinstance(arg, OrderBy):
|
|
1143
|
+
order_by = arg
|
|
1144
|
+
elif isinstance(arg, WhereClause):
|
|
1145
|
+
where = arg
|
|
1146
|
+
elif isinstance(arg, AlignClause):
|
|
1147
|
+
align = arg
|
|
1148
|
+
|
|
1149
|
+
assert align
|
|
1150
|
+
assert align is not None
|
|
1151
|
+
multi = MultiSelectStatement(
|
|
1152
|
+
selects=selects,
|
|
1153
|
+
align=align,
|
|
1154
|
+
namespace=self.environment.namespace,
|
|
1155
|
+
where_clause=where,
|
|
1156
|
+
order_by=order_by,
|
|
1157
|
+
limit=limit,
|
|
1158
|
+
)
|
|
1159
|
+
for concept in multi.derived_concepts:
|
|
1160
|
+
self.environment.add_concept(concept, meta=meta)
|
|
1161
|
+
return multi
|
|
1162
|
+
|
|
1163
|
+
@v_args(meta=True)
|
|
1164
|
+
def select_statement(self, meta: Meta, args) -> SelectStatement:
|
|
1165
|
+
select_items = None
|
|
1166
|
+
limit = None
|
|
1167
|
+
order_by = None
|
|
1168
|
+
where = None
|
|
1169
|
+
for arg in args:
|
|
1170
|
+
if isinstance(arg, List):
|
|
1171
|
+
select_items = arg
|
|
1172
|
+
elif isinstance(arg, Limit):
|
|
1173
|
+
limit = arg.count
|
|
1174
|
+
elif isinstance(arg, OrderBy):
|
|
1175
|
+
order_by = arg
|
|
1176
|
+
elif isinstance(arg, WhereClause):
|
|
1177
|
+
where = arg
|
|
1178
|
+
if not select_items:
|
|
1179
|
+
raise ValueError("Malformed select, missing select items")
|
|
1180
|
+
output = SelectStatement(
|
|
1181
|
+
selection=select_items, where_clause=where, limit=limit, order_by=order_by
|
|
1182
|
+
)
|
|
1183
|
+
for item in select_items:
|
|
1184
|
+
# we don't know the grain of an aggregate at assignment time
|
|
1185
|
+
# so rebuild at this point in the tree
|
|
1186
|
+
# TODO: simplify
|
|
1187
|
+
if isinstance(item.content, ConceptTransform):
|
|
1188
|
+
new_concept = item.content.output.with_select_grain(output.grain)
|
|
1189
|
+
self.environment.add_concept(new_concept, meta=meta)
|
|
1190
|
+
item.content.output = new_concept
|
|
1191
|
+
if order_by:
|
|
1192
|
+
for item in order_by.items:
|
|
1193
|
+
if (
|
|
1194
|
+
isinstance(item.expr, Concept)
|
|
1195
|
+
and item.expr.purpose == Purpose.METRIC
|
|
1196
|
+
):
|
|
1197
|
+
item.expr = item.expr.with_grain(output.grain)
|
|
1198
|
+
return output
|
|
1199
|
+
|
|
1200
|
+
@v_args(meta=True)
|
|
1201
|
+
def address(self, meta: Meta, args):
|
|
1202
|
+
return Address(location=args[0])
|
|
1203
|
+
|
|
1204
|
+
@v_args(meta=True)
|
|
1205
|
+
def query(self, meta: Meta, args):
|
|
1206
|
+
return Query(text=args[0][3:-3])
|
|
1207
|
+
|
|
1208
|
+
def where(self, args):
|
|
1209
|
+
root = args[0]
|
|
1210
|
+
if not isinstance(root, (Comparison, Conditional, Parenthetical)):
|
|
1211
|
+
root = Comparison(left=root, right=True, operator=ComparisonOperator.EQ)
|
|
1212
|
+
return WhereClause(conditional=root)
|
|
1213
|
+
|
|
1214
|
+
@v_args(meta=True)
|
|
1215
|
+
def function_binding_list(self, meta: Meta, args) -> Concept:
|
|
1216
|
+
return args
|
|
1217
|
+
|
|
1218
|
+
@v_args(meta=True)
|
|
1219
|
+
def function_binding_item(self, meta: Meta, args) -> Concept:
|
|
1220
|
+
return args
|
|
1221
|
+
|
|
1222
|
+
@v_args(meta=True)
|
|
1223
|
+
def raw_function(self, meta: Meta, args) -> Function:
|
|
1224
|
+
print(args)
|
|
1225
|
+
identity = args[0]
|
|
1226
|
+
fargs = args[1]
|
|
1227
|
+
output = args[2]
|
|
1228
|
+
item = Function(
|
|
1229
|
+
operator=FunctionType.SUM,
|
|
1230
|
+
arguments=[x[1] for x in fargs],
|
|
1231
|
+
output_datatype=output,
|
|
1232
|
+
output_purpose=Purpose.PROPERTY,
|
|
1233
|
+
arg_count=len(fargs) + 1,
|
|
1234
|
+
)
|
|
1235
|
+
self.environment.functions[identity] = item
|
|
1236
|
+
return item
|
|
1237
|
+
|
|
1238
|
+
@v_args(meta=True)
|
|
1239
|
+
def function(self, meta: Meta, args) -> Function:
|
|
1240
|
+
return args[0]
|
|
1241
|
+
|
|
1242
|
+
def int_lit(self, args):
|
|
1243
|
+
return int("".join(args))
|
|
1244
|
+
|
|
1245
|
+
def bool_lit(self, args):
|
|
1246
|
+
return args[0].capitalize() == "True"
|
|
1247
|
+
|
|
1248
|
+
def null_lit(self, args):
|
|
1249
|
+
return NULL_VALUE
|
|
1250
|
+
|
|
1251
|
+
def float_lit(self, args):
|
|
1252
|
+
return float(args[0])
|
|
1253
|
+
|
|
1254
|
+
def array_lit(self, args):
|
|
1255
|
+
types = [arg_to_datatype(arg) for arg in args]
|
|
1256
|
+
assert len(set(types)) == 1
|
|
1257
|
+
return ListWrapper(args, type=types[0])
|
|
1258
|
+
|
|
1259
|
+
def literal(self, args):
|
|
1260
|
+
return args[0]
|
|
1261
|
+
|
|
1262
|
+
def comparison(self, args) -> Comparison:
|
|
1263
|
+
return Comparison(left=args[0], right=args[2], operator=args[1])
|
|
1264
|
+
|
|
1265
|
+
def expr_tuple(self, args):
|
|
1266
|
+
return Parenthetical(content=args)
|
|
1267
|
+
|
|
1268
|
+
def parenthetical(self, args):
|
|
1269
|
+
return Parenthetical(content=args[0])
|
|
1270
|
+
|
|
1271
|
+
def conditional(self, args):
|
|
1272
|
+
return Conditional(left=args[0], right=args[2], operator=args[1])
|
|
1273
|
+
|
|
1274
|
+
def window_order(self, args):
|
|
1275
|
+
return WindowOrder(args[0])
|
|
1276
|
+
|
|
1277
|
+
def window_order_by(self, args):
|
|
1278
|
+
# flatten tree
|
|
1279
|
+
return args[0]
|
|
1280
|
+
|
|
1281
|
+
def window(self, args):
|
|
1282
|
+
return Window(count=args[1].value, window_order=args[0])
|
|
1283
|
+
|
|
1284
|
+
def WINDOW_TYPE(self, args):
|
|
1285
|
+
return WindowType(args.strip())
|
|
1286
|
+
|
|
1287
|
+
def window_item_over(self, args):
|
|
1288
|
+
return WindowItemOver(contents=args[0])
|
|
1289
|
+
|
|
1290
|
+
def window_item_order(self, args):
|
|
1291
|
+
return WindowItemOrder(contents=args[0])
|
|
1292
|
+
|
|
1293
|
+
def window_item(self, args) -> WindowItem:
|
|
1294
|
+
type = args[0]
|
|
1295
|
+
order_by = []
|
|
1296
|
+
over = []
|
|
1297
|
+
for item in args[2:]:
|
|
1298
|
+
if isinstance(item, WindowItemOrder):
|
|
1299
|
+
order_by = item.contents
|
|
1300
|
+
elif isinstance(item, WindowItemOver):
|
|
1301
|
+
over = item.contents
|
|
1302
|
+
concept = self.environment.concepts[args[1]]
|
|
1303
|
+
return WindowItem(type=type, content=concept, over=over, order_by=order_by)
|
|
1304
|
+
|
|
1305
|
+
def filter_item(self, args) -> FilterItem:
|
|
1306
|
+
where: WhereClause
|
|
1307
|
+
string_concept, where = args
|
|
1308
|
+
concept = self.environment.concepts[string_concept]
|
|
1309
|
+
return FilterItem(content=concept, where=where)
|
|
1310
|
+
|
|
1311
|
+
# BEGIN FUNCTIONS
|
|
1312
|
+
@v_args(meta=True)
|
|
1313
|
+
def expr_reference(self, meta, args) -> Concept:
|
|
1314
|
+
return self.environment.concepts.__getitem__(args[0], meta.line)
|
|
1315
|
+
|
|
1316
|
+
def expr(self, args):
|
|
1317
|
+
if len(args) > 1:
|
|
1318
|
+
raise ParseError("Expression should have one child only.")
|
|
1319
|
+
return args[0]
|
|
1320
|
+
|
|
1321
|
+
def aggregate_over(self, args):
|
|
1322
|
+
return args[0]
|
|
1323
|
+
|
|
1324
|
+
def aggregate_all(self, args):
|
|
1325
|
+
return [self.environment.concepts[f"{INTERNAL_NAMESPACE}.{ALL_ROWS_CONCEPT}"]]
|
|
1326
|
+
|
|
1327
|
+
def aggregate_functions(self, args):
|
|
1328
|
+
if len(args) == 2:
|
|
1329
|
+
return AggregateWrapper(function=args[0], by=args[1])
|
|
1330
|
+
return AggregateWrapper(function=args[0])
|
|
1331
|
+
|
|
1332
|
+
@v_args(meta=True)
|
|
1333
|
+
def index_access(self, meta, args):
|
|
1334
|
+
args = self.process_function_args(args, meta=meta)
|
|
1335
|
+
return IndexAccess(args)
|
|
1336
|
+
|
|
1337
|
+
@v_args(meta=True)
|
|
1338
|
+
def attr_access(self, meta, args):
|
|
1339
|
+
args = self.process_function_args(args, meta=meta)
|
|
1340
|
+
return AttrAccess(args)
|
|
1341
|
+
|
|
1342
|
+
@v_args(meta=True)
|
|
1343
|
+
def fcoalesce(self, meta, args):
|
|
1344
|
+
args = self.process_function_args(args, meta=meta)
|
|
1345
|
+
return Coalesce(args)
|
|
1346
|
+
|
|
1347
|
+
@v_args(meta=True)
|
|
1348
|
+
def unnest(self, meta, args):
|
|
1349
|
+
args = self.process_function_args(args, meta=meta)
|
|
1350
|
+
return Unnest(args)
|
|
1351
|
+
|
|
1352
|
+
@v_args(meta=True)
|
|
1353
|
+
def count(self, meta, args):
|
|
1354
|
+
args = self.process_function_args(args, meta=meta)
|
|
1355
|
+
return Count(args)
|
|
1356
|
+
|
|
1357
|
+
@v_args(meta=True)
|
|
1358
|
+
def fgroup(self, meta, args):
|
|
1359
|
+
if len(args) == 2:
|
|
1360
|
+
args = self.process_function_args([args[0]] + args[1], meta=meta)
|
|
1361
|
+
else:
|
|
1362
|
+
args = self.process_function_args([args[0]], meta=meta)
|
|
1363
|
+
return Group(args)
|
|
1364
|
+
|
|
1365
|
+
@v_args(meta=True)
|
|
1366
|
+
def fabs(self, meta, args):
|
|
1367
|
+
args = self.process_function_args(args, meta=meta)
|
|
1368
|
+
return Abs(args)
|
|
1369
|
+
|
|
1370
|
+
@v_args(meta=True)
|
|
1371
|
+
def count_distinct(self, meta, args):
|
|
1372
|
+
args = self.process_function_args(args, meta=meta)
|
|
1373
|
+
return CountDistinct(args)
|
|
1374
|
+
|
|
1375
|
+
@v_args(meta=True)
|
|
1376
|
+
def sum(self, meta, args):
|
|
1377
|
+
args = self.process_function_args(args, meta=meta)
|
|
1378
|
+
return Function(
|
|
1379
|
+
operator=FunctionType.SUM,
|
|
1380
|
+
arguments=args,
|
|
1381
|
+
output_datatype=args[0].datatype,
|
|
1382
|
+
output_purpose=Purpose.METRIC,
|
|
1383
|
+
arg_count=1,
|
|
1384
|
+
)
|
|
1385
|
+
|
|
1386
|
+
@v_args(meta=True)
|
|
1387
|
+
def avg(self, meta, args):
|
|
1388
|
+
args = self.process_function_args(args, meta=meta)
|
|
1389
|
+
arg = args[0]
|
|
1390
|
+
|
|
1391
|
+
return Function(
|
|
1392
|
+
operator=FunctionType.AVG,
|
|
1393
|
+
arguments=args,
|
|
1394
|
+
output_datatype=arg.datatype,
|
|
1395
|
+
output_purpose=Purpose.METRIC,
|
|
1396
|
+
valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
|
|
1397
|
+
arg_count=1,
|
|
1398
|
+
)
|
|
1399
|
+
|
|
1400
|
+
@v_args(meta=True)
|
|
1401
|
+
def max(self, meta, args):
|
|
1402
|
+
args = self.process_function_args(args, meta=meta)
|
|
1403
|
+
return Max(args)
|
|
1404
|
+
|
|
1405
|
+
@v_args(meta=True)
|
|
1406
|
+
def min(self, meta, args):
|
|
1407
|
+
args = self.process_function_args(args, meta=meta)
|
|
1408
|
+
return Min(args)
|
|
1409
|
+
|
|
1410
|
+
@v_args(meta=True)
|
|
1411
|
+
def len(self, meta, args):
|
|
1412
|
+
args = self.process_function_args(args, meta=meta)
|
|
1413
|
+
return Function(
|
|
1414
|
+
operator=FunctionType.LENGTH,
|
|
1415
|
+
arguments=args,
|
|
1416
|
+
output_datatype=DataType.INTEGER,
|
|
1417
|
+
output_purpose=Purpose.PROPERTY,
|
|
1418
|
+
valid_inputs={DataType.STRING, DataType.ARRAY, DataType.MAP},
|
|
1419
|
+
# output_grain=args[0].grain,
|
|
1420
|
+
)
|
|
1421
|
+
|
|
1422
|
+
@v_args(meta=True)
|
|
1423
|
+
def fsplit(self, meta, args):
|
|
1424
|
+
args = self.process_function_args(args, meta=meta)
|
|
1425
|
+
return Split(args)
|
|
1426
|
+
|
|
1427
|
+
@v_args(meta=True)
|
|
1428
|
+
def concat(self, meta, args):
|
|
1429
|
+
args = self.process_function_args(args, meta=meta)
|
|
1430
|
+
return Function(
|
|
1431
|
+
operator=FunctionType.CONCAT,
|
|
1432
|
+
arguments=args,
|
|
1433
|
+
output_datatype=DataType.STRING,
|
|
1434
|
+
output_purpose=Purpose.PROPERTY,
|
|
1435
|
+
valid_inputs={DataType.STRING},
|
|
1436
|
+
arg_count=99,
|
|
1437
|
+
# output_grain=args[0].grain,
|
|
1438
|
+
)
|
|
1439
|
+
|
|
1440
|
+
@v_args(meta=True)
|
|
1441
|
+
def like(self, meta, args):
|
|
1442
|
+
args = self.process_function_args(args, meta=meta)
|
|
1443
|
+
return Function(
|
|
1444
|
+
operator=FunctionType.LIKE,
|
|
1445
|
+
arguments=args,
|
|
1446
|
+
output_datatype=DataType.BOOL,
|
|
1447
|
+
output_purpose=Purpose.PROPERTY,
|
|
1448
|
+
valid_inputs={DataType.STRING},
|
|
1449
|
+
arg_count=2,
|
|
1450
|
+
)
|
|
1451
|
+
|
|
1452
|
+
@v_args(meta=True)
|
|
1453
|
+
def alt_like(self, meta, args):
|
|
1454
|
+
args = self.process_function_args(args, meta=meta)
|
|
1455
|
+
return Function(
|
|
1456
|
+
operator=FunctionType.LIKE,
|
|
1457
|
+
arguments=args,
|
|
1458
|
+
output_datatype=DataType.BOOL,
|
|
1459
|
+
output_purpose=Purpose.PROPERTY,
|
|
1460
|
+
valid_inputs={DataType.STRING},
|
|
1461
|
+
arg_count=2,
|
|
1462
|
+
)
|
|
1463
|
+
|
|
1464
|
+
@v_args(meta=True)
|
|
1465
|
+
def ilike(self, meta, args):
|
|
1466
|
+
args = self.process_function_args(args, meta=meta)
|
|
1467
|
+
return Function(
|
|
1468
|
+
operator=FunctionType.ILIKE,
|
|
1469
|
+
arguments=args,
|
|
1470
|
+
output_datatype=DataType.BOOL,
|
|
1471
|
+
output_purpose=Purpose.PROPERTY,
|
|
1472
|
+
valid_inputs={DataType.STRING},
|
|
1473
|
+
arg_count=2,
|
|
1474
|
+
)
|
|
1475
|
+
|
|
1476
|
+
@v_args(meta=True)
|
|
1477
|
+
def upper(self, meta, args):
|
|
1478
|
+
args = self.process_function_args(args, meta=meta)
|
|
1479
|
+
return Function(
|
|
1480
|
+
operator=FunctionType.UPPER,
|
|
1481
|
+
arguments=args,
|
|
1482
|
+
output_datatype=DataType.STRING,
|
|
1483
|
+
output_purpose=Purpose.PROPERTY,
|
|
1484
|
+
valid_inputs={DataType.STRING},
|
|
1485
|
+
arg_count=1,
|
|
1486
|
+
)
|
|
1487
|
+
|
|
1488
|
+
@v_args(meta=True)
|
|
1489
|
+
def fstrpos(self, meta, args):
|
|
1490
|
+
args = self.process_function_args(args, meta=meta)
|
|
1491
|
+
return StrPos(args)
|
|
1492
|
+
|
|
1493
|
+
@v_args(meta=True)
|
|
1494
|
+
def fsubstring(self, meta, args):
|
|
1495
|
+
args = self.process_function_args(
|
|
1496
|
+
args,
|
|
1497
|
+
meta=meta,
|
|
1498
|
+
)
|
|
1499
|
+
return SubString(args)
|
|
1500
|
+
|
|
1501
|
+
@v_args(meta=True)
|
|
1502
|
+
def lower(self, meta, args):
|
|
1503
|
+
args = self.process_function_args(args, meta=meta)
|
|
1504
|
+
return Function(
|
|
1505
|
+
operator=FunctionType.LOWER,
|
|
1506
|
+
arguments=args,
|
|
1507
|
+
output_datatype=DataType.STRING,
|
|
1508
|
+
output_purpose=Purpose.PROPERTY,
|
|
1509
|
+
valid_inputs={DataType.STRING},
|
|
1510
|
+
arg_count=1,
|
|
1511
|
+
)
|
|
1512
|
+
|
|
1513
|
+
# date functions
|
|
1514
|
+
@v_args(meta=True)
|
|
1515
|
+
def fdate(self, meta, args):
|
|
1516
|
+
args = self.process_function_args(args, meta=meta)
|
|
1517
|
+
return Function(
|
|
1518
|
+
operator=FunctionType.DATE,
|
|
1519
|
+
arguments=args,
|
|
1520
|
+
output_datatype=DataType.DATE,
|
|
1521
|
+
output_purpose=Purpose.PROPERTY,
|
|
1522
|
+
valid_inputs={
|
|
1523
|
+
DataType.DATE,
|
|
1524
|
+
DataType.TIMESTAMP,
|
|
1525
|
+
DataType.DATETIME,
|
|
1526
|
+
DataType.STRING,
|
|
1527
|
+
},
|
|
1528
|
+
arg_count=1,
|
|
1529
|
+
)
|
|
1530
|
+
|
|
1531
|
+
def DATE_PART(self, args):
|
|
1532
|
+
return DatePart(args.value)
|
|
1533
|
+
|
|
1534
|
+
@v_args(meta=True)
|
|
1535
|
+
def fdate_trunc(self, meta, args):
|
|
1536
|
+
args = self.process_function_args(args, meta=meta)
|
|
1537
|
+
return Function(
|
|
1538
|
+
operator=FunctionType.DATE_TRUNCATE,
|
|
1539
|
+
arguments=args,
|
|
1540
|
+
output_datatype=DataType.DATE,
|
|
1541
|
+
output_purpose=Purpose.PROPERTY,
|
|
1542
|
+
valid_inputs=[
|
|
1543
|
+
{
|
|
1544
|
+
DataType.DATE,
|
|
1545
|
+
DataType.TIMESTAMP,
|
|
1546
|
+
DataType.DATETIME,
|
|
1547
|
+
DataType.STRING,
|
|
1548
|
+
},
|
|
1549
|
+
{DataType.DATE_PART},
|
|
1550
|
+
],
|
|
1551
|
+
arg_count=2,
|
|
1552
|
+
)
|
|
1553
|
+
|
|
1554
|
+
@v_args(meta=True)
|
|
1555
|
+
def fdate_part(self, meta, args):
|
|
1556
|
+
args = self.process_function_args(args, meta=meta)
|
|
1557
|
+
return Function(
|
|
1558
|
+
operator=FunctionType.DATE_PART,
|
|
1559
|
+
arguments=args,
|
|
1560
|
+
output_datatype=DataType.DATE,
|
|
1561
|
+
output_purpose=Purpose.PROPERTY,
|
|
1562
|
+
valid_inputs=[
|
|
1563
|
+
{
|
|
1564
|
+
DataType.DATE,
|
|
1565
|
+
DataType.TIMESTAMP,
|
|
1566
|
+
DataType.DATETIME,
|
|
1567
|
+
DataType.STRING,
|
|
1568
|
+
},
|
|
1569
|
+
{DataType.DATE_PART},
|
|
1570
|
+
],
|
|
1571
|
+
arg_count=2,
|
|
1572
|
+
)
|
|
1573
|
+
|
|
1574
|
+
@v_args(meta=True)
|
|
1575
|
+
def fdate_add(self, meta, args):
|
|
1576
|
+
args = self.process_function_args(args, meta=meta)
|
|
1577
|
+
return Function(
|
|
1578
|
+
operator=FunctionType.DATE_ADD,
|
|
1579
|
+
arguments=args,
|
|
1580
|
+
output_datatype=DataType.DATE,
|
|
1581
|
+
output_purpose=Purpose.PROPERTY,
|
|
1582
|
+
valid_inputs=[
|
|
1583
|
+
{
|
|
1584
|
+
DataType.DATE,
|
|
1585
|
+
DataType.TIMESTAMP,
|
|
1586
|
+
DataType.DATETIME,
|
|
1587
|
+
DataType.STRING,
|
|
1588
|
+
},
|
|
1589
|
+
{DataType.DATE_PART},
|
|
1590
|
+
{DataType.INTEGER},
|
|
1591
|
+
],
|
|
1592
|
+
arg_count=3,
|
|
1593
|
+
)
|
|
1594
|
+
|
|
1595
|
+
@v_args(meta=True)
|
|
1596
|
+
def fdate_diff(self, meta, args):
|
|
1597
|
+
args = self.process_function_args(args, meta=meta)
|
|
1598
|
+
purpose = function_args_to_output_purpose(args)
|
|
1599
|
+
return Function(
|
|
1600
|
+
operator=FunctionType.DATE_DIFF,
|
|
1601
|
+
arguments=args,
|
|
1602
|
+
output_datatype=DataType.INTEGER,
|
|
1603
|
+
output_purpose=purpose,
|
|
1604
|
+
valid_inputs=[
|
|
1605
|
+
{
|
|
1606
|
+
DataType.DATE,
|
|
1607
|
+
DataType.TIMESTAMP,
|
|
1608
|
+
DataType.DATETIME,
|
|
1609
|
+
},
|
|
1610
|
+
{
|
|
1611
|
+
DataType.DATE,
|
|
1612
|
+
DataType.TIMESTAMP,
|
|
1613
|
+
DataType.DATETIME,
|
|
1614
|
+
},
|
|
1615
|
+
{DataType.DATE_PART},
|
|
1616
|
+
],
|
|
1617
|
+
arg_count=3,
|
|
1618
|
+
)
|
|
1619
|
+
|
|
1620
|
+
@v_args(meta=True)
|
|
1621
|
+
def fdatetime(self, meta, args):
|
|
1622
|
+
args = self.process_function_args(args, meta=meta)
|
|
1623
|
+
return Function(
|
|
1624
|
+
operator=FunctionType.DATETIME,
|
|
1625
|
+
arguments=args,
|
|
1626
|
+
output_datatype=DataType.DATETIME,
|
|
1627
|
+
output_purpose=Purpose.PROPERTY,
|
|
1628
|
+
valid_inputs={
|
|
1629
|
+
DataType.DATE,
|
|
1630
|
+
DataType.TIMESTAMP,
|
|
1631
|
+
DataType.DATETIME,
|
|
1632
|
+
DataType.STRING,
|
|
1633
|
+
},
|
|
1634
|
+
arg_count=1,
|
|
1635
|
+
)
|
|
1636
|
+
|
|
1637
|
+
@v_args(meta=True)
|
|
1638
|
+
def ftimestamp(self, meta, args):
|
|
1639
|
+
args = self.process_function_args(args, meta=meta)
|
|
1640
|
+
return Function(
|
|
1641
|
+
operator=FunctionType.TIMESTAMP,
|
|
1642
|
+
arguments=args,
|
|
1643
|
+
output_datatype=DataType.TIMESTAMP,
|
|
1644
|
+
output_purpose=Purpose.PROPERTY,
|
|
1645
|
+
valid_inputs=[{DataType.TIMESTAMP, DataType.STRING}],
|
|
1646
|
+
arg_count=1,
|
|
1647
|
+
)
|
|
1648
|
+
|
|
1649
|
+
@v_args(meta=True)
|
|
1650
|
+
def fsecond(self, meta, args):
|
|
1651
|
+
args = self.process_function_args(args, meta=meta)
|
|
1652
|
+
return Function(
|
|
1653
|
+
operator=FunctionType.SECOND,
|
|
1654
|
+
arguments=args,
|
|
1655
|
+
output_datatype=DataType.INTEGER,
|
|
1656
|
+
output_purpose=Purpose.PROPERTY,
|
|
1657
|
+
valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
|
|
1658
|
+
arg_count=1,
|
|
1659
|
+
)
|
|
1660
|
+
|
|
1661
|
+
@v_args(meta=True)
|
|
1662
|
+
def fminute(self, meta, args):
|
|
1663
|
+
args = self.process_function_args(args, meta=meta)
|
|
1664
|
+
return Function(
|
|
1665
|
+
operator=FunctionType.MINUTE,
|
|
1666
|
+
arguments=args,
|
|
1667
|
+
output_datatype=DataType.INTEGER,
|
|
1668
|
+
output_purpose=Purpose.PROPERTY,
|
|
1669
|
+
valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
|
|
1670
|
+
arg_count=1,
|
|
1671
|
+
)
|
|
1672
|
+
|
|
1673
|
+
@v_args(meta=True)
|
|
1674
|
+
def fhour(self, meta, args):
|
|
1675
|
+
args = self.process_function_args(args, meta=meta)
|
|
1676
|
+
return Function(
|
|
1677
|
+
operator=FunctionType.HOUR,
|
|
1678
|
+
arguments=args,
|
|
1679
|
+
output_datatype=DataType.INTEGER,
|
|
1680
|
+
output_purpose=Purpose.PROPERTY,
|
|
1681
|
+
valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
|
|
1682
|
+
arg_count=1,
|
|
1683
|
+
)
|
|
1684
|
+
|
|
1685
|
+
@v_args(meta=True)
|
|
1686
|
+
def fday(self, meta, args):
|
|
1687
|
+
args = self.process_function_args(args, meta=meta)
|
|
1688
|
+
return Function(
|
|
1689
|
+
operator=FunctionType.DAY,
|
|
1690
|
+
arguments=args,
|
|
1691
|
+
output_datatype=DataType.INTEGER,
|
|
1692
|
+
output_purpose=Purpose.PROPERTY,
|
|
1693
|
+
valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
|
|
1694
|
+
arg_count=1,
|
|
1695
|
+
)
|
|
1696
|
+
|
|
1697
|
+
@v_args(meta=True)
|
|
1698
|
+
def fday_of_week(self, meta, args):
|
|
1699
|
+
args = self.process_function_args(args, meta=meta)
|
|
1700
|
+
return Function(
|
|
1701
|
+
operator=FunctionType.DAY_OF_WEEK,
|
|
1702
|
+
arguments=args,
|
|
1703
|
+
output_datatype=DataType.INTEGER,
|
|
1704
|
+
output_purpose=Purpose.PROPERTY,
|
|
1705
|
+
valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
|
|
1706
|
+
arg_count=1,
|
|
1707
|
+
)
|
|
1708
|
+
|
|
1709
|
+
@v_args(meta=True)
|
|
1710
|
+
def fweek(self, meta, args):
|
|
1711
|
+
args = self.process_function_args(args, meta=meta)
|
|
1712
|
+
return Function(
|
|
1713
|
+
operator=FunctionType.WEEK,
|
|
1714
|
+
arguments=args,
|
|
1715
|
+
output_datatype=DataType.INTEGER,
|
|
1716
|
+
output_purpose=Purpose.PROPERTY,
|
|
1717
|
+
valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
|
|
1718
|
+
arg_count=1,
|
|
1719
|
+
)
|
|
1720
|
+
|
|
1721
|
+
@v_args(meta=True)
|
|
1722
|
+
def fmonth(self, meta, args):
|
|
1723
|
+
args = self.process_function_args(args, meta=meta)
|
|
1724
|
+
return Function(
|
|
1725
|
+
operator=FunctionType.MONTH,
|
|
1726
|
+
arguments=args,
|
|
1727
|
+
output_datatype=DataType.INTEGER,
|
|
1728
|
+
output_purpose=Purpose.PROPERTY,
|
|
1729
|
+
valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
|
|
1730
|
+
arg_count=1,
|
|
1731
|
+
)
|
|
1732
|
+
|
|
1733
|
+
@v_args(meta=True)
|
|
1734
|
+
def fquarter(self, meta, args):
|
|
1735
|
+
args = self.process_function_args(args, meta=meta)
|
|
1736
|
+
return Function(
|
|
1737
|
+
operator=FunctionType.QUARTER,
|
|
1738
|
+
arguments=args,
|
|
1739
|
+
output_datatype=DataType.INTEGER,
|
|
1740
|
+
output_purpose=Purpose.PROPERTY,
|
|
1741
|
+
valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
|
|
1742
|
+
arg_count=1,
|
|
1743
|
+
)
|
|
1744
|
+
|
|
1745
|
+
@v_args(meta=True)
|
|
1746
|
+
def fyear(self, meta, args):
|
|
1747
|
+
args = self.process_function_args(args, meta=meta)
|
|
1748
|
+
return Function(
|
|
1749
|
+
operator=FunctionType.YEAR,
|
|
1750
|
+
arguments=args,
|
|
1751
|
+
output_datatype=DataType.INTEGER,
|
|
1752
|
+
output_purpose=Purpose.PROPERTY,
|
|
1753
|
+
valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
|
|
1754
|
+
arg_count=1,
|
|
1755
|
+
)
|
|
1756
|
+
|
|
1757
|
+
# utility functions
|
|
1758
|
+
@v_args(meta=True)
|
|
1759
|
+
def fcast(self, meta, args) -> Function:
|
|
1760
|
+
args = self.process_function_args(args, meta=meta)
|
|
1761
|
+
output_datatype = args[1]
|
|
1762
|
+
return Function(
|
|
1763
|
+
operator=FunctionType.CAST,
|
|
1764
|
+
arguments=args,
|
|
1765
|
+
output_datatype=output_datatype,
|
|
1766
|
+
output_purpose=function_args_to_output_purpose(args),
|
|
1767
|
+
valid_inputs={
|
|
1768
|
+
DataType.INTEGER,
|
|
1769
|
+
DataType.STRING,
|
|
1770
|
+
DataType.FLOAT,
|
|
1771
|
+
DataType.NUMBER,
|
|
1772
|
+
},
|
|
1773
|
+
arg_count=2,
|
|
1774
|
+
)
|
|
1775
|
+
|
|
1776
|
+
# math functions
|
|
1777
|
+
@v_args(meta=True)
|
|
1778
|
+
def fadd(self, meta, args) -> Function:
|
|
1779
|
+
args = self.process_function_args(args, meta=meta)
|
|
1780
|
+
output_datatype = arg_to_datatype(args[0])
|
|
1781
|
+
# TODO: check for valid transforms?
|
|
1782
|
+
return Function(
|
|
1783
|
+
operator=FunctionType.ADD,
|
|
1784
|
+
arguments=args,
|
|
1785
|
+
output_datatype=output_datatype,
|
|
1786
|
+
output_purpose=function_args_to_output_purpose(args),
|
|
1787
|
+
# valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
|
|
1788
|
+
arg_count=2,
|
|
1789
|
+
)
|
|
1790
|
+
|
|
1791
|
+
@v_args(meta=True)
|
|
1792
|
+
def fsub(self, meta, args) -> Function:
|
|
1793
|
+
args = self.process_function_args(args, meta=meta)
|
|
1794
|
+
output_datatype = arg_to_datatype(args[0])
|
|
1795
|
+
return Function(
|
|
1796
|
+
operator=FunctionType.SUBTRACT,
|
|
1797
|
+
arguments=args,
|
|
1798
|
+
output_datatype=output_datatype,
|
|
1799
|
+
output_purpose=function_args_to_output_purpose(args),
|
|
1800
|
+
# valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
|
|
1801
|
+
arg_count=2,
|
|
1802
|
+
)
|
|
1803
|
+
|
|
1804
|
+
@v_args(meta=True)
|
|
1805
|
+
def fmul(self, meta, args) -> Function:
|
|
1806
|
+
args = self.process_function_args(args, meta=meta)
|
|
1807
|
+
output_datatype = arg_to_datatype(args[0])
|
|
1808
|
+
return Function(
|
|
1809
|
+
operator=FunctionType.MULTIPLY,
|
|
1810
|
+
arguments=args,
|
|
1811
|
+
output_datatype=output_datatype,
|
|
1812
|
+
output_purpose=function_args_to_output_purpose(args),
|
|
1813
|
+
# valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
|
|
1814
|
+
arg_count=2,
|
|
1815
|
+
)
|
|
1816
|
+
|
|
1817
|
+
@v_args(meta=True)
|
|
1818
|
+
def fdiv(self, meta: Meta, args):
|
|
1819
|
+
output_datatype = arg_to_datatype(args[0])
|
|
1820
|
+
args = self.process_function_args(args, meta=meta)
|
|
1821
|
+
return Function(
|
|
1822
|
+
operator=FunctionType.DIVIDE,
|
|
1823
|
+
arguments=args,
|
|
1824
|
+
output_datatype=output_datatype,
|
|
1825
|
+
output_purpose=function_args_to_output_purpose(args),
|
|
1826
|
+
# valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
|
|
1827
|
+
arg_count=2,
|
|
1828
|
+
)
|
|
1829
|
+
|
|
1830
|
+
@v_args(meta=True)
|
|
1831
|
+
def fmod(self, meta: Meta, args):
|
|
1832
|
+
output_datatype = arg_to_datatype(args[0])
|
|
1833
|
+
args = self.process_function_args(args, meta=meta)
|
|
1834
|
+
return Function(
|
|
1835
|
+
operator=FunctionType.MOD,
|
|
1836
|
+
arguments=args,
|
|
1837
|
+
output_datatype=output_datatype,
|
|
1838
|
+
output_purpose=function_args_to_output_purpose(args),
|
|
1839
|
+
valid_inputs=[
|
|
1840
|
+
{DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
|
|
1841
|
+
{DataType.INTEGER},
|
|
1842
|
+
],
|
|
1843
|
+
arg_count=2,
|
|
1844
|
+
)
|
|
1845
|
+
|
|
1846
|
+
@v_args(meta=True)
|
|
1847
|
+
def fround(self, meta, args) -> Function:
|
|
1848
|
+
args = self.process_function_args(args, meta=meta)
|
|
1849
|
+
output_datatype = arg_to_datatype(args[0])
|
|
1850
|
+
return Function(
|
|
1851
|
+
operator=FunctionType.ROUND,
|
|
1852
|
+
arguments=args,
|
|
1853
|
+
output_datatype=output_datatype,
|
|
1854
|
+
output_purpose=function_args_to_output_purpose(args),
|
|
1855
|
+
valid_inputs=[
|
|
1856
|
+
{DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
|
|
1857
|
+
{DataType.INTEGER},
|
|
1858
|
+
],
|
|
1859
|
+
arg_count=2,
|
|
1860
|
+
)
|
|
1861
|
+
|
|
1862
|
+
def fcase(self, args: List[Union[CaseWhen, CaseElse]]):
|
|
1863
|
+
datatypes = set()
|
|
1864
|
+
for arg in args:
|
|
1865
|
+
output_datatype = arg_to_datatype(arg.expr)
|
|
1866
|
+
datatypes.add(output_datatype)
|
|
1867
|
+
if not len(datatypes) == 1:
|
|
1868
|
+
raise SyntaxError(
|
|
1869
|
+
f"All case expressions must have the same output datatype, got {datatypes}"
|
|
1870
|
+
)
|
|
1871
|
+
return Function(
|
|
1872
|
+
operator=FunctionType.CASE,
|
|
1873
|
+
arguments=args,
|
|
1874
|
+
output_datatype=datatypes.pop(),
|
|
1875
|
+
output_purpose=Purpose.PROPERTY,
|
|
1876
|
+
# valid_inputs=[{DataType.INTEGER, DataType.FLOAT, DataType.NUMBER}, {DataType.INTEGER}],
|
|
1877
|
+
arg_count=InfiniteFunctionArgs,
|
|
1878
|
+
)
|
|
1879
|
+
|
|
1880
|
+
@v_args(meta=True)
|
|
1881
|
+
def fcase_when(self, meta, args) -> CaseWhen:
|
|
1882
|
+
args = self.process_function_args(args, meta=meta)
|
|
1883
|
+
return CaseWhen(comparison=args[0], expr=args[1])
|
|
1884
|
+
|
|
1885
|
+
@v_args(meta=True)
|
|
1886
|
+
def fcase_else(self, meta, args) -> CaseElse:
|
|
1887
|
+
args = self.process_function_args(args, meta=meta)
|
|
1888
|
+
return CaseElse(expr=args[0])
|
|
1889
|
+
|
|
1890
|
+
@v_args(meta=True)
|
|
1891
|
+
def fcurrent_date(self, meta, args):
|
|
1892
|
+
args = self.process_function_args(args, meta=meta)
|
|
1893
|
+
return CurrentDate(args)
|
|
1894
|
+
|
|
1895
|
+
@v_args(meta=True)
|
|
1896
|
+
def fcurrent_datetime(self, meta, args):
|
|
1897
|
+
args = self.process_function_args(args, meta=meta)
|
|
1898
|
+
return CurrentDatetime(args)
|
|
1899
|
+
|
|
1900
|
+
@v_args(meta=True)
|
|
1901
|
+
def fnot(self, meta, args):
|
|
1902
|
+
args = self.process_function_args(args, meta=meta)
|
|
1903
|
+
return IsNull(args)
|
|
1904
|
+
|
|
1905
|
+
|
|
1906
|
+
def unpack_visit_error(e: VisitError):
|
|
1907
|
+
"""This is required to get exceptions from imports, which would
|
|
1908
|
+
raise nested VisitErrors"""
|
|
1909
|
+
if isinstance(e.orig_exc, VisitError):
|
|
1910
|
+
unpack_visit_error(e.orig_exc)
|
|
1911
|
+
elif isinstance(e.orig_exc, (UndefinedConceptException, ImportError)):
|
|
1912
|
+
raise e.orig_exc
|
|
1913
|
+
elif isinstance(e.orig_exc, (ValidationError, TypeError)):
|
|
1914
|
+
raise InvalidSyntaxException(str(e.orig_exc))
|
|
1915
|
+
raise e
|
|
1916
|
+
|
|
1917
|
+
|
|
1918
|
+
def parse_text(text: str, environment: Optional[Environment] = None) -> Tuple[
|
|
1919
|
+
Environment,
|
|
1920
|
+
List[
|
|
1921
|
+
Datasource
|
|
1922
|
+
| ImportStatement
|
|
1923
|
+
| SelectStatement
|
|
1924
|
+
| PersistStatement
|
|
1925
|
+
| ShowStatement
|
|
1926
|
+
| None
|
|
1927
|
+
],
|
|
1928
|
+
]:
|
|
1929
|
+
environment = environment or Environment(datasources={})
|
|
1930
|
+
parser = ParseToObjects(visit_tokens=True, text=text, environment=environment)
|
|
1931
|
+
|
|
1932
|
+
try:
|
|
1933
|
+
parser.transform(PARSER.parse(text))
|
|
1934
|
+
# handle circular dependencies
|
|
1935
|
+
pass_two = parser.hydrate_missing()
|
|
1936
|
+
output = [v for v in pass_two if v]
|
|
1937
|
+
except VisitError as e:
|
|
1938
|
+
unpack_visit_error(e)
|
|
1939
|
+
# this will never be reached
|
|
1940
|
+
raise e
|
|
1941
|
+
except (
|
|
1942
|
+
UnexpectedCharacters,
|
|
1943
|
+
UnexpectedEOF,
|
|
1944
|
+
UnexpectedInput,
|
|
1945
|
+
UnexpectedToken,
|
|
1946
|
+
ValidationError,
|
|
1947
|
+
TypeError,
|
|
1948
|
+
) as e:
|
|
1949
|
+
raise InvalidSyntaxException(str(e))
|
|
1950
|
+
|
|
1951
|
+
return environment, output
|