pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (76) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1859 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +701 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +178 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +55 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +18 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +70 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +148 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/__init__.py +4 -0
  65. trilogy/hooks/base_hook.py +6 -4
  66. trilogy/hooks/query_debugger.py +113 -97
  67. trilogy/parser.py +1 -1
  68. trilogy/parsing/common.py +307 -64
  69. trilogy/parsing/parse_engine.py +277 -618
  70. trilogy/parsing/render.py +50 -26
  71. trilogy/scripts/trilogy.py +2 -1
  72. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  73. trilogy/core/models.py +0 -4960
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
  76. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
trilogy/core/functions.py CHANGED
@@ -1,26 +1,685 @@
1
+ from dataclasses import dataclass
1
2
  from datetime import date, datetime
2
- from typing import Optional
3
+ from typing import Any, Callable, Optional
4
+
5
+ from lark.tree import Meta
3
6
 
4
7
  from trilogy.constants import MagicConstants
5
- from trilogy.core.enums import DatePart, FunctionType, Granularity, Purpose
8
+ from trilogy.core.enums import (
9
+ DatePart,
10
+ FunctionClass,
11
+ FunctionType,
12
+ Granularity,
13
+ InfiniteFunctionArgs,
14
+ Purpose,
15
+ )
6
16
  from trilogy.core.exceptions import InvalidSyntaxException
7
- from trilogy.core.models import (
17
+ from trilogy.core.models.author import (
8
18
  AggregateWrapper,
9
19
  Concept,
10
- DataType,
11
20
  Function,
21
+ Parenthetical,
22
+ UndefinedConcept,
23
+ WindowItem,
24
+ )
25
+ from trilogy.core.models.core import (
26
+ CONCRETE_TYPES,
27
+ DataType,
12
28
  ListType,
13
29
  MapType,
14
30
  NumericType,
15
- Parenthetical,
16
31
  StructType,
17
- WindowItem,
18
32
  arg_to_datatype,
33
+ merge_datatypes,
19
34
  )
35
+ from trilogy.core.models.environment import Environment
20
36
 
21
37
  GENERIC_ARGS = Concept | Function | str | int | float | date | datetime
22
38
 
23
39
 
40
+ @dataclass
41
+ class FunctionConfig:
42
+ arg_count: int = 1
43
+ valid_inputs: set[DataType] | list[set[DataType]] | None = None
44
+ output_purpose: Purpose | None = None
45
+ output_type: DataType | ListType | MapType | StructType | NumericType | None = None
46
+ output_type_function: Optional[Callable] = None
47
+
48
+
49
+ def get_unnest_output_type(args: list[Any]) -> CONCRETE_TYPES:
50
+ output = arg_to_datatype(args[0])
51
+ if isinstance(output, (ListType, MapType)):
52
+ output = output.value_data_type
53
+ else:
54
+ output = DataType.STRING
55
+ return output
56
+
57
+
58
+ def get_coalesce_output_type(args: list[Any]) -> CONCRETE_TYPES:
59
+ non_null = [x for x in args if not x == MagicConstants.NULL]
60
+ processed = [arg_to_datatype(x) for x in non_null if x]
61
+ if not len(set(processed)) == 1:
62
+ raise InvalidSyntaxException(
63
+ f"All arguments to coalesce must be of the same type, have {set(arg_to_datatype(x) for x in args)}"
64
+ )
65
+ return processed[0]
66
+
67
+
68
+ def get_index_output_type(
69
+ args: list[Any],
70
+ ) -> CONCRETE_TYPES:
71
+ arg = args[0]
72
+ datatype = arg_to_datatype(arg)
73
+ if isinstance(datatype, ListType):
74
+ return datatype.value_data_type
75
+ elif isinstance(datatype, MapType):
76
+ return datatype.value_data_type
77
+ return datatype
78
+
79
+
80
+ def get_attr_datatype(
81
+ args: list[Any],
82
+ ) -> CONCRETE_TYPES:
83
+ arg = args[0]
84
+ lookup = args[1]
85
+ datatype = arg_to_datatype(arg)
86
+ if isinstance(datatype, StructType):
87
+ return arg_to_datatype(datatype.fields_map[lookup])
88
+ return datatype
89
+
90
+
91
+ def get_cast_output_type(
92
+ args: list[Any],
93
+ ) -> DataType:
94
+ return args[1]
95
+
96
+
97
+ def validate_case_output(
98
+ args: list[Any],
99
+ ) -> DataType:
100
+ datatypes = set()
101
+ mapz = dict()
102
+ for arg in args:
103
+ output_datatype = arg_to_datatype(arg.expr)
104
+ if output_datatype != DataType.NULL:
105
+ datatypes.add(output_datatype.data_type)
106
+ mapz[str(arg.expr)] = output_datatype
107
+ if not len(datatypes) == 1:
108
+ raise SyntaxError(
109
+ f"All case expressions must have the same output datatype, got {datatypes} from {mapz}"
110
+ )
111
+ return datatypes.pop()
112
+
113
+
114
+ def create_struct_output(
115
+ args: list[Any],
116
+ ) -> StructType:
117
+ zipped = dict(zip(args[::2], args[1::2]))
118
+ types = [arg_to_datatype(x) for x in args[1::2]]
119
+ return StructType(fields=types, fields_map=zipped)
120
+
121
+
122
+ def get_date_trunc_output(
123
+ args: list[Any],
124
+ ):
125
+ target: DatePart = args[1]
126
+ if target == DatePart.YEAR:
127
+ return DataType.DATE
128
+ elif target == DatePart.MONTH:
129
+ return DataType.DATE
130
+ elif target == DatePart.DAY:
131
+ return DataType.DATE
132
+ elif target == DatePart.HOUR:
133
+ return DataType.DATETIME
134
+ elif target == DatePart.MINUTE:
135
+ return DataType.DATETIME
136
+ elif target == DatePart.SECOND:
137
+ return DataType.DATETIME
138
+ else:
139
+ raise InvalidSyntaxException(f"Date truncation not supported for {target}")
140
+
141
+
142
+ FUNCTION_REGISTRY: dict[FunctionType, FunctionConfig] = {
143
+ FunctionType.UNNEST: FunctionConfig(
144
+ valid_inputs={
145
+ DataType.ARRAY,
146
+ DataType.LIST,
147
+ },
148
+ output_purpose=Purpose.KEY,
149
+ output_type_function=get_unnest_output_type,
150
+ arg_count=1,
151
+ ),
152
+ FunctionType.GROUP: FunctionConfig(
153
+ arg_count=-1,
154
+ ),
155
+ FunctionType.COUNT: FunctionConfig(
156
+ output_purpose=Purpose.METRIC,
157
+ output_type=DataType.INTEGER,
158
+ arg_count=1,
159
+ ),
160
+ FunctionType.COUNT_DISTINCT: FunctionConfig(
161
+ output_purpose=Purpose.METRIC,
162
+ output_type=DataType.INTEGER,
163
+ arg_count=1,
164
+ ),
165
+ FunctionType.MAX: FunctionConfig(
166
+ valid_inputs={
167
+ DataType.INTEGER,
168
+ DataType.FLOAT,
169
+ DataType.NUMBER,
170
+ DataType.DATE,
171
+ DataType.DATETIME,
172
+ DataType.TIMESTAMP,
173
+ DataType.BOOL,
174
+ },
175
+ output_purpose=Purpose.METRIC,
176
+ output_type=DataType.INTEGER,
177
+ arg_count=1,
178
+ ),
179
+ FunctionType.MIN: FunctionConfig(
180
+ valid_inputs={
181
+ DataType.INTEGER,
182
+ DataType.FLOAT,
183
+ DataType.NUMBER,
184
+ DataType.DATE,
185
+ DataType.DATETIME,
186
+ DataType.TIMESTAMP,
187
+ },
188
+ output_purpose=Purpose.METRIC,
189
+ output_type=DataType.INTEGER,
190
+ arg_count=1,
191
+ ),
192
+ FunctionType.SPLIT: FunctionConfig(
193
+ valid_inputs={DataType.STRING},
194
+ output_purpose=Purpose.PROPERTY,
195
+ output_type=ListType(type=DataType.STRING),
196
+ arg_count=2,
197
+ ),
198
+ FunctionType.INDEX_ACCESS: FunctionConfig(
199
+ valid_inputs=[
200
+ {
201
+ DataType.LIST,
202
+ DataType.ARRAY,
203
+ },
204
+ {
205
+ DataType.INTEGER,
206
+ },
207
+ ],
208
+ output_purpose=Purpose.PROPERTY,
209
+ output_type_function=get_index_output_type,
210
+ arg_count=2,
211
+ ),
212
+ FunctionType.MAP_ACCESS: FunctionConfig(
213
+ valid_inputs=[
214
+ {
215
+ DataType.MAP,
216
+ },
217
+ {
218
+ DataType.INTEGER,
219
+ DataType.STRING,
220
+ },
221
+ ],
222
+ output_purpose=Purpose.PROPERTY,
223
+ output_type_function=get_index_output_type,
224
+ arg_count=2,
225
+ ),
226
+ FunctionType.ATTR_ACCESS: FunctionConfig(
227
+ valid_inputs=[
228
+ {DataType.STRUCT},
229
+ {
230
+ DataType.STRING,
231
+ },
232
+ ],
233
+ output_purpose=Purpose.PROPERTY,
234
+ output_type_function=get_attr_datatype,
235
+ arg_count=2,
236
+ ),
237
+ FunctionType.ABS: FunctionConfig(
238
+ valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
239
+ output_purpose=Purpose.PROPERTY,
240
+ output_type=DataType.INTEGER,
241
+ arg_count=1,
242
+ ),
243
+ FunctionType.COALESCE: FunctionConfig(
244
+ valid_inputs={*DataType},
245
+ output_purpose=Purpose.PROPERTY,
246
+ output_type=DataType.INTEGER,
247
+ arg_count=-1,
248
+ output_type_function=get_coalesce_output_type,
249
+ ),
250
+ FunctionType.CURRENT_DATE: FunctionConfig(
251
+ output_purpose=Purpose.CONSTANT,
252
+ output_type=DataType.DATE,
253
+ arg_count=0,
254
+ ),
255
+ FunctionType.CURRENT_DATETIME: FunctionConfig(
256
+ output_purpose=Purpose.CONSTANT,
257
+ output_type=DataType.DATE,
258
+ arg_count=0,
259
+ ),
260
+ FunctionType.BOOL: FunctionConfig(
261
+ output_purpose=Purpose.PROPERTY,
262
+ output_type=DataType.BOOL,
263
+ arg_count=1,
264
+ ),
265
+ FunctionType.STRPOS: FunctionConfig(
266
+ valid_inputs=[
267
+ {DataType.STRING},
268
+ {DataType.STRING},
269
+ ],
270
+ output_purpose=Purpose.PROPERTY,
271
+ output_type=DataType.INTEGER,
272
+ arg_count=2,
273
+ ),
274
+ FunctionType.SUBSTRING: FunctionConfig(
275
+ valid_inputs=[{DataType.STRING}, {DataType.INTEGER}, {DataType.INTEGER}],
276
+ output_purpose=Purpose.PROPERTY,
277
+ output_type=DataType.STRING,
278
+ arg_count=3,
279
+ ),
280
+ FunctionType.UNION: FunctionConfig(
281
+ valid_inputs={*DataType},
282
+ output_purpose=Purpose.KEY,
283
+ arg_count=-1,
284
+ ),
285
+ FunctionType.LIKE: FunctionConfig(
286
+ valid_inputs={DataType.STRING},
287
+ output_purpose=Purpose.PROPERTY,
288
+ output_type=DataType.BOOL,
289
+ arg_count=2,
290
+ ),
291
+ FunctionType.ILIKE: FunctionConfig(
292
+ valid_inputs={DataType.STRING},
293
+ output_purpose=Purpose.PROPERTY,
294
+ output_type=DataType.BOOL,
295
+ arg_count=2,
296
+ ),
297
+ FunctionType.UPPER: FunctionConfig(
298
+ valid_inputs={DataType.STRING},
299
+ output_purpose=Purpose.PROPERTY,
300
+ output_type=DataType.STRING,
301
+ arg_count=1,
302
+ ),
303
+ FunctionType.LOWER: FunctionConfig(
304
+ valid_inputs={DataType.STRING},
305
+ output_purpose=Purpose.PROPERTY,
306
+ output_type=DataType.STRING,
307
+ arg_count=1,
308
+ ),
309
+ FunctionType.DATE: FunctionConfig(
310
+ valid_inputs={
311
+ DataType.DATE,
312
+ DataType.TIMESTAMP,
313
+ DataType.DATETIME,
314
+ DataType.STRING,
315
+ },
316
+ output_purpose=Purpose.PROPERTY,
317
+ output_type=DataType.DATE,
318
+ arg_count=1,
319
+ ),
320
+ FunctionType.DATE_TRUNCATE: FunctionConfig(
321
+ valid_inputs=[
322
+ {
323
+ DataType.DATE,
324
+ DataType.TIMESTAMP,
325
+ DataType.DATETIME,
326
+ DataType.STRING,
327
+ },
328
+ {DataType.DATE_PART},
329
+ ],
330
+ output_purpose=Purpose.PROPERTY,
331
+ # output_type=DataType.DATE,
332
+ output_type_function=get_date_trunc_output,
333
+ arg_count=2,
334
+ ),
335
+ FunctionType.DATE_PART: FunctionConfig(
336
+ valid_inputs=[
337
+ {
338
+ DataType.DATE,
339
+ DataType.TIMESTAMP,
340
+ DataType.DATETIME,
341
+ DataType.STRING,
342
+ },
343
+ {DataType.DATE_PART},
344
+ ],
345
+ output_purpose=Purpose.PROPERTY,
346
+ output_type=DataType.INTEGER,
347
+ arg_count=2,
348
+ ),
349
+ FunctionType.DATE_ADD: FunctionConfig(
350
+ valid_inputs=[
351
+ {
352
+ DataType.DATE,
353
+ DataType.TIMESTAMP,
354
+ DataType.DATETIME,
355
+ DataType.STRING,
356
+ },
357
+ {DataType.DATE_PART},
358
+ {DataType.INTEGER},
359
+ ],
360
+ output_purpose=Purpose.PROPERTY,
361
+ output_type=DataType.DATE,
362
+ arg_count=3,
363
+ ),
364
+ FunctionType.DATE_DIFF: FunctionConfig(
365
+ valid_inputs=[
366
+ {
367
+ DataType.DATE,
368
+ DataType.TIMESTAMP,
369
+ DataType.DATETIME,
370
+ DataType.STRING,
371
+ },
372
+ {
373
+ DataType.DATE,
374
+ DataType.TIMESTAMP,
375
+ DataType.DATETIME,
376
+ DataType.STRING,
377
+ },
378
+ {DataType.DATE_PART},
379
+ ],
380
+ output_purpose=Purpose.PROPERTY,
381
+ output_type=DataType.INTEGER,
382
+ arg_count=3,
383
+ ),
384
+ FunctionType.DATETIME: FunctionConfig(
385
+ valid_inputs={
386
+ DataType.DATE,
387
+ DataType.TIMESTAMP,
388
+ DataType.DATETIME,
389
+ DataType.STRING,
390
+ },
391
+ output_purpose=Purpose.PROPERTY,
392
+ output_type=DataType.DATETIME,
393
+ arg_count=1,
394
+ ),
395
+ FunctionType.TIMESTAMP: FunctionConfig(
396
+ valid_inputs={
397
+ DataType.DATE,
398
+ DataType.TIMESTAMP,
399
+ DataType.DATETIME,
400
+ DataType.STRING,
401
+ },
402
+ output_purpose=Purpose.PROPERTY,
403
+ output_type=DataType.TIMESTAMP,
404
+ arg_count=1,
405
+ ),
406
+ FunctionType.SECOND: FunctionConfig(
407
+ valid_inputs={
408
+ DataType.DATE,
409
+ DataType.TIMESTAMP,
410
+ DataType.DATETIME,
411
+ DataType.STRING,
412
+ },
413
+ output_purpose=Purpose.PROPERTY,
414
+ output_type=DataType.INTEGER,
415
+ arg_count=1,
416
+ ),
417
+ FunctionType.MINUTE: FunctionConfig(
418
+ valid_inputs={
419
+ DataType.DATE,
420
+ DataType.TIMESTAMP,
421
+ DataType.DATETIME,
422
+ DataType.STRING,
423
+ },
424
+ output_purpose=Purpose.PROPERTY,
425
+ output_type=DataType.INTEGER,
426
+ arg_count=1,
427
+ ),
428
+ FunctionType.HOUR: FunctionConfig(
429
+ valid_inputs={
430
+ DataType.DATE,
431
+ DataType.TIMESTAMP,
432
+ DataType.DATETIME,
433
+ DataType.STRING,
434
+ },
435
+ output_purpose=Purpose.PROPERTY,
436
+ output_type=DataType.INTEGER,
437
+ arg_count=1,
438
+ ),
439
+ FunctionType.DAY: FunctionConfig(
440
+ valid_inputs={
441
+ DataType.DATE,
442
+ DataType.TIMESTAMP,
443
+ DataType.DATETIME,
444
+ DataType.STRING,
445
+ },
446
+ output_purpose=Purpose.PROPERTY,
447
+ output_type=DataType.INTEGER,
448
+ arg_count=1,
449
+ ),
450
+ FunctionType.WEEK: FunctionConfig(
451
+ valid_inputs={
452
+ DataType.DATE,
453
+ DataType.TIMESTAMP,
454
+ DataType.DATETIME,
455
+ DataType.STRING,
456
+ },
457
+ output_purpose=Purpose.PROPERTY,
458
+ output_type=DataType.INTEGER,
459
+ arg_count=1,
460
+ ),
461
+ FunctionType.MONTH: FunctionConfig(
462
+ valid_inputs={
463
+ DataType.DATE,
464
+ DataType.TIMESTAMP,
465
+ DataType.DATETIME,
466
+ DataType.STRING,
467
+ },
468
+ output_purpose=Purpose.PROPERTY,
469
+ output_type=DataType.INTEGER,
470
+ arg_count=1,
471
+ ),
472
+ FunctionType.QUARTER: FunctionConfig(
473
+ valid_inputs={
474
+ DataType.DATE,
475
+ DataType.TIMESTAMP,
476
+ DataType.DATETIME,
477
+ DataType.STRING,
478
+ },
479
+ output_purpose=Purpose.PROPERTY,
480
+ output_type=DataType.INTEGER,
481
+ arg_count=1,
482
+ ),
483
+ FunctionType.YEAR: FunctionConfig(
484
+ valid_inputs={
485
+ DataType.DATE,
486
+ DataType.TIMESTAMP,
487
+ DataType.DATETIME,
488
+ DataType.STRING,
489
+ },
490
+ output_purpose=Purpose.PROPERTY,
491
+ output_type=DataType.INTEGER,
492
+ arg_count=1,
493
+ ),
494
+ FunctionType.DAY_OF_WEEK: FunctionConfig(
495
+ valid_inputs={
496
+ DataType.DATE,
497
+ DataType.TIMESTAMP,
498
+ DataType.DATETIME,
499
+ DataType.STRING,
500
+ },
501
+ output_purpose=Purpose.PROPERTY,
502
+ output_type=DataType.INTEGER,
503
+ arg_count=1,
504
+ ),
505
+ FunctionType.ADD: FunctionConfig(
506
+ valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
507
+ output_purpose=Purpose.PROPERTY,
508
+ output_type=DataType.INTEGER,
509
+ arg_count=InfiniteFunctionArgs,
510
+ ),
511
+ FunctionType.SUBTRACT: FunctionConfig(
512
+ valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
513
+ output_purpose=Purpose.PROPERTY,
514
+ output_type=DataType.INTEGER,
515
+ arg_count=InfiniteFunctionArgs,
516
+ ),
517
+ FunctionType.MULTIPLY: FunctionConfig(
518
+ valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
519
+ output_purpose=Purpose.PROPERTY,
520
+ output_type=DataType.INTEGER,
521
+ arg_count=InfiniteFunctionArgs,
522
+ ),
523
+ FunctionType.DIVIDE: FunctionConfig(
524
+ valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
525
+ output_purpose=Purpose.PROPERTY,
526
+ output_type=DataType.INTEGER,
527
+ arg_count=InfiniteFunctionArgs,
528
+ ),
529
+ FunctionType.MOD: FunctionConfig(
530
+ valid_inputs=[
531
+ {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
532
+ {DataType.INTEGER},
533
+ ],
534
+ output_purpose=Purpose.PROPERTY,
535
+ output_type=DataType.INTEGER,
536
+ arg_count=InfiniteFunctionArgs,
537
+ ),
538
+ FunctionType.ROUND: FunctionConfig(
539
+ valid_inputs=[
540
+ {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
541
+ {DataType.INTEGER},
542
+ ],
543
+ output_purpose=Purpose.PROPERTY,
544
+ output_type=DataType.INTEGER,
545
+ arg_count=2,
546
+ ),
547
+ FunctionType.CUSTOM: FunctionConfig(
548
+ output_purpose=Purpose.PROPERTY,
549
+ arg_count=InfiniteFunctionArgs,
550
+ ),
551
+ FunctionType.CASE: FunctionConfig(
552
+ output_purpose=Purpose.PROPERTY,
553
+ output_type_function=validate_case_output,
554
+ arg_count=InfiniteFunctionArgs,
555
+ ),
556
+ FunctionType.CAST: FunctionConfig(
557
+ output_purpose=Purpose.PROPERTY,
558
+ arg_count=2,
559
+ output_type_function=get_cast_output_type,
560
+ ),
561
+ FunctionType.CONCAT: FunctionConfig(
562
+ valid_inputs={DataType.STRING},
563
+ output_purpose=Purpose.PROPERTY,
564
+ output_type=DataType.STRING,
565
+ arg_count=InfiniteFunctionArgs,
566
+ ),
567
+ FunctionType.CONSTANT: FunctionConfig(
568
+ output_purpose=Purpose.CONSTANT,
569
+ arg_count=1,
570
+ ),
571
+ FunctionType.IS_NULL: FunctionConfig(
572
+ output_purpose=Purpose.PROPERTY,
573
+ output_type=DataType.BOOL,
574
+ arg_count=1,
575
+ ),
576
+ FunctionType.STRUCT: FunctionConfig(
577
+ output_purpose=Purpose.PROPERTY,
578
+ arg_count=InfiniteFunctionArgs,
579
+ output_type_function=create_struct_output,
580
+ ),
581
+ FunctionType.ARRAY: FunctionConfig(
582
+ output_purpose=Purpose.PROPERTY,
583
+ arg_count=InfiniteFunctionArgs,
584
+ output_type=ListType(type=DataType.STRING),
585
+ ),
586
+ FunctionType.LENGTH: FunctionConfig(
587
+ valid_inputs={DataType.STRING, DataType.ARRAY, DataType.MAP},
588
+ output_purpose=Purpose.PROPERTY,
589
+ output_type=DataType.INTEGER,
590
+ arg_count=1,
591
+ ),
592
+ FunctionType.SUM: FunctionConfig(
593
+ valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
594
+ output_purpose=Purpose.METRIC,
595
+ output_type=DataType.INTEGER,
596
+ arg_count=1,
597
+ ),
598
+ FunctionType.AVG: FunctionConfig(
599
+ valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
600
+ output_purpose=Purpose.METRIC,
601
+ output_type=DataType.INTEGER,
602
+ arg_count=1,
603
+ ),
604
+ FunctionType.UNIX_TO_TIMESTAMP: FunctionConfig(
605
+ valid_inputs={DataType.INTEGER},
606
+ output_purpose=Purpose.PROPERTY,
607
+ output_type=DataType.TIMESTAMP,
608
+ arg_count=1,
609
+ ),
610
+ }
611
+
612
+ EXCLUDED_FUNCTIONS = {
613
+ FunctionType.CUSTOM,
614
+ FunctionType.ALIAS,
615
+ FunctionType.PARENTHETICAL,
616
+ # Temporary
617
+ FunctionType.DATE_LITERAL,
618
+ FunctionType.DATETIME_LITERAL,
619
+ FunctionType.ARRAY,
620
+ }
621
+
622
+ for k in FunctionType.__members__.values():
623
+ if k not in FUNCTION_REGISTRY and k not in EXCLUDED_FUNCTIONS:
624
+ raise InvalidSyntaxException(f"Function {k} not in registry")
625
+
626
+
627
+ class FunctionFactory:
628
+ def __init__(self, environment: Environment | None = None):
629
+ self.environment = environment
630
+
631
+ def create_function(
632
+ self,
633
+ args: list[Any],
634
+ operator: FunctionType,
635
+ meta: Meta | None = None,
636
+ ):
637
+ if operator not in FUNCTION_REGISTRY:
638
+ raise ValueError(f"Function {operator} not in registry")
639
+ config = FUNCTION_REGISTRY[operator]
640
+ valid_inputs: set[DataType] | list[set[DataType]] = config.valid_inputs or set(
641
+ DataType
642
+ )
643
+ output_purpose = config.output_purpose
644
+ base_output_type = config.output_type
645
+ arg_count = config.arg_count
646
+
647
+ if args:
648
+ if not self.environment:
649
+ raise ValueError("Environment required for function creation with args")
650
+ # TODO: remove this dependency
651
+ from trilogy.parsing.common import process_function_args
652
+
653
+ full_args = process_function_args(
654
+ args, environment=self.environment, meta=meta
655
+ )
656
+ else:
657
+ full_args = []
658
+ final_output_type: CONCRETE_TYPES
659
+ if config.output_type_function:
660
+ final_output_type = config.output_type_function(full_args)
661
+ elif not base_output_type:
662
+ final_output_type = merge_datatypes([arg_to_datatype(x) for x in full_args])
663
+ elif base_output_type:
664
+ final_output_type = base_output_type
665
+ else:
666
+ raise SyntaxError(f"Could not determine output type for {operator}")
667
+ if not output_purpose:
668
+ if operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
669
+ output_purpose = Purpose.METRIC
670
+ else:
671
+ output_purpose = Purpose.PROPERTY
672
+
673
+ return Function(
674
+ operator=operator,
675
+ arguments=full_args,
676
+ output_datatype=final_output_type,
677
+ output_purpose=output_purpose,
678
+ valid_inputs=valid_inputs,
679
+ arg_count=arg_count,
680
+ )
681
+
682
+
24
683
  def create_function_derived_concept(
25
684
  name: str,
26
685
  namespace: str,
@@ -53,6 +712,8 @@ def create_function_derived_concept(
53
712
 
54
713
 
55
714
  def argument_to_purpose(arg) -> Purpose:
715
+ if isinstance(arg, UndefinedConcept):
716
+ return Purpose.UNKNOWN
56
717
  if isinstance(arg, Function):
57
718
  return arg.output_purpose
58
719
  elif isinstance(arg, AggregateWrapper):
@@ -104,280 +765,53 @@ def function_args_to_output_purpose(args) -> Purpose:
104
765
  return Purpose.PROPERTY
105
766
 
106
767
 
107
- def Unnest(args: list[Concept]) -> Function:
108
- output = arg_to_datatype(args[0])
109
- if isinstance(output, (ListType)):
110
- output = output.value_data_type
111
- else:
112
- output = DataType.STRING
113
- return Function(
114
- operator=FunctionType.UNNEST,
115
- arguments=args,
116
- output_datatype=output,
117
- output_purpose=Purpose.KEY,
118
- arg_count=1,
119
- valid_inputs={
120
- DataType.ARRAY,
121
- DataType.LIST,
122
- ListType(type=DataType.STRING),
123
- ListType(type=DataType.INTEGER),
124
- },
125
- )
126
-
127
-
128
- def Group(args: list[Concept]) -> Function:
129
- output = args[0]
130
- datatype = arg_to_datatype(output)
131
- return Function(
132
- operator=FunctionType.GROUP,
133
- arguments=args,
134
- output_datatype=datatype,
135
- output_purpose=Purpose.PROPERTY,
136
- arg_count=-1,
137
- )
138
-
139
-
140
- def Count(args: list[Concept]) -> Function:
141
- return Function(
142
- operator=FunctionType.COUNT,
143
- arguments=args,
144
- output_datatype=DataType.INTEGER,
145
- output_purpose=Purpose.METRIC,
146
- arg_count=1,
147
- )
148
-
149
-
150
- def CountDistinct(args: list[Concept]) -> Function:
151
- return Function(
152
- operator=FunctionType.COUNT_DISTINCT,
153
- arguments=args,
154
- output_datatype=DataType.INTEGER,
155
- output_purpose=Purpose.METRIC,
156
- arg_count=1,
157
- )
158
-
159
-
160
- def Max(args: list[Concept]) -> Function:
161
- return Function(
162
- operator=FunctionType.MAX,
163
- arguments=args,
164
- output_datatype=args[0].datatype,
165
- output_purpose=Purpose.METRIC,
166
- valid_inputs={
167
- DataType.INTEGER,
168
- DataType.FLOAT,
169
- DataType.NUMBER,
170
- DataType.DATE,
171
- DataType.DATETIME,
172
- DataType.TIMESTAMP,
173
- DataType.BOOL,
174
- },
175
- arg_count=1,
176
- # output_grain=Grain(components=arguments),
177
- )
178
-
179
-
180
- def Min(args: list[Concept]) -> Function:
181
- return Function(
182
- operator=FunctionType.MIN,
183
- arguments=args,
184
- output_datatype=args[0].datatype,
185
- output_purpose=Purpose.METRIC,
186
- valid_inputs={
187
- DataType.INTEGER,
188
- DataType.FLOAT,
189
- DataType.NUMBER,
190
- DataType.DATE,
191
- DataType.DATETIME,
192
- DataType.TIMESTAMP,
193
- },
194
- arg_count=1,
195
- # output_grain=Grain(components=arguments),
196
- )
197
-
198
-
199
- def Split(args: list[Concept]) -> Function:
200
- # TODO: overload this for non-string types?
201
- return Function(
202
- operator=FunctionType.SPLIT,
203
- arguments=args,
204
- # first arg sets properties
205
- output_datatype=ListType(type=DataType.STRING),
206
- output_purpose=function_args_to_output_purpose(args),
207
- valid_inputs={DataType.STRING},
208
- arg_count=2,
209
- )
210
-
211
-
212
- def get_index_output_type(
213
- arg: Concept,
214
- ) -> DataType | StructType | MapType | ListType | NumericType:
215
- if isinstance(arg.datatype, ListType):
216
- return arg.datatype.value_data_type
217
- elif isinstance(arg.datatype, MapType):
218
- return arg.datatype.value_data_type
219
- return arg.datatype
220
-
221
-
222
- def IndexAccess(args: list[Concept]):
223
- return Function(
224
- operator=FunctionType.INDEX_ACCESS,
225
- arguments=args,
226
- output_datatype=get_index_output_type(args[0]),
227
- output_purpose=Purpose.PROPERTY,
228
- valid_inputs=[
229
- {
230
- DataType.LIST,
231
- ListType(type=DataType.STRING),
232
- ListType(type=DataType.INTEGER),
233
- },
234
- {
235
- DataType.INTEGER,
236
- },
237
- ],
238
- arg_count=2,
239
- )
240
-
241
-
242
- def MapAccess(args: list[Concept]):
243
- return Function(
244
- operator=FunctionType.MAP_ACCESS,
245
- arguments=args,
246
- output_datatype=get_index_output_type(args[0]),
247
- output_purpose=Purpose.PROPERTY,
248
- valid_inputs=[
249
- {
250
- DataType.MAP,
251
- },
252
- {
253
- DataType.INTEGER,
254
- DataType.STRING,
255
- },
256
- ],
257
- arg_count=2,
258
- )
259
-
260
-
261
- def get_attr_datatype(
262
- arg: Concept, lookup
263
- ) -> DataType | ListType | StructType | MapType | NumericType:
264
- if isinstance(arg.datatype, StructType):
265
- return arg_to_datatype(arg.datatype.fields_map[lookup])
266
- return arg.datatype
267
-
268
-
269
- def AttrAccess(args: list[GENERIC_ARGS]):
270
- return Function(
271
- operator=FunctionType.ATTR_ACCESS,
272
- arguments=args,
273
- output_datatype=get_attr_datatype(args[0], args[1]), # type: ignore
274
- output_purpose=Purpose.PROPERTY,
275
- valid_inputs=[
276
- {DataType.STRUCT},
277
- {
278
- DataType.STRING,
279
- },
280
- ],
281
- arg_count=2,
768
+ def Count(args: list[Concept], environment: Environment) -> Function:
769
+ return FunctionFactory(environment).create_function(
770
+ args=args, operator=FunctionType.COUNT
282
771
  )
283
772
 
284
773
 
285
- def Abs(args: list[Concept]) -> Function:
286
- return Function(
287
- operator=FunctionType.ABS,
288
- arguments=args,
289
- output_datatype=args[0].datatype,
290
- output_purpose=function_args_to_output_purpose(args),
291
- valid_inputs={
292
- DataType.INTEGER,
293
- DataType.FLOAT,
294
- DataType.NUMBER,
295
- },
296
- arg_count=1,
297
- # output_grain=Grain(components=arguments),
774
+ def CountDistinct(args: list[Concept], environment: Environment) -> Function:
775
+ return FunctionFactory(environment).create_function(
776
+ args=args, operator=FunctionType.COUNT
298
777
  )
299
778
 
300
779
 
301
- def Coalesce(args: list[Concept]) -> Function:
302
- non_null = [x for x in args if not x == MagicConstants.NULL]
303
- if not len(set(arg_to_datatype(x) for x in non_null if x)) == 1:
304
- raise InvalidSyntaxException(
305
- f"All arguments to coalesce must be of the same type, have {set(arg_to_datatype(x) for x in args)}"
306
- )
307
- return Function(
308
- operator=FunctionType.COALESCE,
309
- arguments=args,
310
- output_datatype=arg_to_datatype(non_null[0]),
311
- output_purpose=function_args_to_output_purpose(non_null),
312
- arg_count=-1,
313
- # output_grain=Grain(components=arguments),
780
+ def Max(args: list[Concept], environment: Environment) -> Function:
781
+ return FunctionFactory(environment).create_function(
782
+ args=args, operator=FunctionType.COUNT
314
783
  )
315
784
 
316
785
 
317
- def CurrentDate(args: list[Concept]) -> Function:
318
- return Function(
319
- operator=FunctionType.CURRENT_DATE,
320
- arguments=args,
321
- output_datatype=DataType.DATE,
322
- output_purpose=Purpose.CONSTANT,
323
- arg_count=0,
324
- # output_grain=Grain(components=arguments),
786
+ def Min(args: list[Concept], environment: Environment) -> Function:
787
+ return FunctionFactory(environment).create_function(
788
+ args=args, operator=FunctionType.COUNT
325
789
  )
326
790
 
327
791
 
328
- def CurrentDatetime(args: list[Concept]) -> Function:
329
- return Function(
330
- operator=FunctionType.CURRENT_DATETIME,
331
- arguments=args,
332
- output_datatype=DataType.DATE,
333
- output_purpose=Purpose.CONSTANT,
334
- arg_count=0,
335
- # output_grain=Grain(components=arguments),
792
+ def Split(args: list[Concept], environment: Environment) -> Function:
793
+ return FunctionFactory(environment).create_function(
794
+ args=args, operator=FunctionType.SPLIT
336
795
  )
337
796
 
338
797
 
339
- def IsNull(args: list[Concept]) -> Function:
340
- return Function(
341
- operator=FunctionType.IS_NULL,
342
- arguments=args,
343
- output_datatype=DataType.BOOL,
344
- output_purpose=function_args_to_output_purpose(args),
345
- arg_count=1,
346
- # output_grain=Grain(components=arguments),
347
- )
348
-
349
-
350
- def Bool(args: list[Concept]) -> Function:
351
- return Function(
352
- operator=FunctionType.BOOL,
353
- arguments=args,
354
- output_datatype=DataType.BOOL,
355
- output_purpose=function_args_to_output_purpose(args),
356
- arg_count=1,
357
- # output_grain=Grain(components=arguments),
798
+ def AttrAccess(args: list[GENERIC_ARGS], environment: Environment):
799
+ return FunctionFactory(environment).create_function(
800
+ args=args, operator=FunctionType.ATTR_ACCESS
358
801
  )
359
802
 
360
803
 
361
- def StrPos(args: list[Concept]) -> Function:
362
- return Function(
363
- operator=FunctionType.STRPOS,
364
- arguments=args,
365
- output_datatype=DataType.INTEGER,
366
- output_purpose=function_args_to_output_purpose(args),
367
- arg_count=2,
368
- valid_inputs=[
369
- {DataType.STRING},
370
- {DataType.STRING},
371
- ],
804
+ def CurrentDate(
805
+ args: list[Concept], environment: Environment | None = None
806
+ ) -> Function:
807
+ return FunctionFactory(environment).create_function(
808
+ args=args, operator=FunctionType.CURRENT_DATE
372
809
  )
373
810
 
374
811
 
375
- def SubString(args: list[Concept]) -> Function:
376
- return Function(
377
- operator=FunctionType.SUBSTRING,
378
- arguments=args,
379
- output_datatype=DataType.STRING,
380
- output_purpose=function_args_to_output_purpose(args),
381
- arg_count=3,
382
- valid_inputs=[{DataType.STRING}, {DataType.INTEGER}, {DataType.INTEGER}],
812
+ def CurrentDatetime(
813
+ args: list[Concept], environment: Environment | None = None
814
+ ) -> Function:
815
+ return FunctionFactory(environment).create_function(
816
+ args=args, operator=FunctionType.CURRENT_DATETIME
383
817
  )