pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (76) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1859 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +701 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +178 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +55 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +18 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +70 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +148 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/__init__.py +4 -0
  65. trilogy/hooks/base_hook.py +6 -4
  66. trilogy/hooks/query_debugger.py +113 -97
  67. trilogy/parser.py +1 -1
  68. trilogy/parsing/common.py +307 -64
  69. trilogy/parsing/parse_engine.py +277 -618
  70. trilogy/parsing/render.py +50 -26
  71. trilogy/scripts/trilogy.py +2 -1
  72. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  73. trilogy/core/models.py +0 -4960
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
  76. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  from dataclasses import dataclass
2
2
  from datetime import date, datetime
3
+ from enum import Enum
3
4
  from os.path import dirname, join
4
5
  from pathlib import Path
5
6
  from re import IGNORECASE
@@ -26,8 +27,9 @@ from trilogy.core.enums import (
26
27
  ComparisonOperator,
27
28
  ConceptSource,
28
29
  DatePart,
30
+ Derivation,
29
31
  FunctionType,
30
- InfiniteFunctionArgs,
32
+ Granularity,
31
33
  IOType,
32
34
  Modifier,
33
35
  Ordering,
@@ -38,97 +40,91 @@ from trilogy.core.enums import (
38
40
  )
39
41
  from trilogy.core.exceptions import InvalidSyntaxException, UndefinedConceptException
40
42
  from trilogy.core.functions import (
41
- Abs,
42
- AttrAccess,
43
- Bool,
44
- Coalesce,
45
- Count,
46
- CountDistinct,
47
43
  CurrentDate,
48
44
  CurrentDatetime,
49
- Group,
50
- IndexAccess,
51
- IsNull,
52
- MapAccess,
53
- Max,
54
- Min,
55
- Split,
56
- StrPos,
57
- SubString,
58
- Unnest,
59
- function_args_to_output_purpose,
45
+ FunctionFactory,
60
46
  )
61
47
  from trilogy.core.internal import ALL_ROWS_CONCEPT, INTERNAL_NAMESPACE
62
- from trilogy.core.models import (
63
- Address,
48
+ from trilogy.core.models.author import (
64
49
  AggregateWrapper,
65
50
  AlignClause,
66
51
  AlignItem,
67
52
  CaseElse,
68
53
  CaseWhen,
69
- ColumnAssignment,
70
54
  Comment,
71
55
  Comparison,
72
56
  Concept,
73
- ConceptDeclarationStatement,
74
- ConceptDerivation,
75
- ConceptTransform,
57
+ ConceptRef,
76
58
  Conditional,
77
- CopyStatement,
78
- Datasource,
79
- DataType,
80
- Environment,
81
- EnvironmentConceptDict,
59
+ Expr,
82
60
  FilterItem,
83
61
  Function,
84
62
  Grain,
85
63
  HavingClause,
86
- ImportStatement,
87
- Limit,
88
- ListType,
89
- ListWrapper,
90
- MapType,
91
- MapWrapper,
92
- MergeStatementV2,
93
64
  Metadata,
94
- MultiSelectStatement,
95
- NumericType,
96
65
  OrderBy,
97
66
  OrderItem,
98
67
  Parenthetical,
99
- PersistStatement,
100
- Query,
101
- RawColumnExpr,
102
- RawSQLStatement,
103
- RowsetDerivationStatement,
104
- SelectItem,
105
- SelectStatement,
106
- ShowStatement,
107
- StructType,
108
68
  SubselectComparison,
109
- TupleWrapper,
110
69
  WhereClause,
111
70
  Window,
112
71
  WindowItem,
113
72
  WindowItemOrder,
114
73
  WindowItemOver,
74
+ )
75
+ from trilogy.core.models.core import (
76
+ DataType,
77
+ ListType,
78
+ ListWrapper,
79
+ MapType,
80
+ MapWrapper,
81
+ NumericType,
82
+ StructType,
83
+ TupleWrapper,
115
84
  arg_to_datatype,
116
85
  dict_to_map_wrapper,
117
86
  list_to_wrapper,
118
- merge_datatypes,
119
87
  tuple_to_wrapper,
120
88
  )
89
+ from trilogy.core.models.datasource import (
90
+ Address,
91
+ ColumnAssignment,
92
+ Datasource,
93
+ Query,
94
+ RawColumnExpr,
95
+ )
96
+ from trilogy.core.models.environment import Environment, Import
97
+ from trilogy.core.statements.author import (
98
+ ConceptDeclarationStatement,
99
+ ConceptDerivationStatement,
100
+ ConceptTransform,
101
+ CopyStatement,
102
+ ImportStatement,
103
+ Limit,
104
+ MergeStatementV2,
105
+ MultiSelectStatement,
106
+ PersistStatement,
107
+ RawSQLStatement,
108
+ RowsetDerivationStatement,
109
+ SelectItem,
110
+ SelectStatement,
111
+ ShowStatement,
112
+ )
121
113
  from trilogy.parsing.common import (
122
- agg_wrapper_to_concept,
114
+ align_item_to_concept,
123
115
  arbitrary_to_concept,
124
116
  constant_to_concept,
125
- filter_item_to_concept,
126
- function_to_concept,
127
117
  process_function_args,
128
- window_item_to_concept,
118
+ rowset_to_concepts,
129
119
  )
130
120
  from trilogy.parsing.exceptions import ParseError
131
121
 
122
+
123
+ class ParsePass(Enum):
124
+ INITIAL = 1
125
+ VALIDATION = 2
126
+
127
+
132
128
  CONSTANT_TYPES = (int, float, str, bool, list, ListWrapper, MapWrapper)
133
129
 
134
130
  SELF_LABEL = "root"
@@ -188,27 +184,19 @@ def expr_to_boolean(
188
184
 
189
185
 
190
186
  def unwrap_transformation(
191
- input: Union[
192
- FilterItem,
193
- WindowItem,
194
- Concept,
195
- Function,
196
- AggregateWrapper,
197
- int,
198
- str,
199
- float,
200
- bool,
201
- ],
187
+ input: Expr,
188
+ environment: Environment,
202
189
  ) -> Function | FilterItem | WindowItem | AggregateWrapper:
203
190
  if isinstance(input, Function):
204
191
  return input
205
192
  elif isinstance(input, AggregateWrapper):
206
193
  return input
207
- elif isinstance(input, Concept):
194
+ elif isinstance(input, ConceptRef):
195
+ concept = environment.concepts[input.address]
208
196
  return Function(
209
197
  operator=FunctionType.ALIAS,
210
- output_datatype=input.datatype,
211
- output_purpose=input.purpose,
198
+ output_datatype=concept.datatype,
199
+ output_purpose=concept.purpose,
212
200
  arguments=[input],
213
201
  )
214
202
  elif isinstance(input, FilterItem):
@@ -216,7 +204,7 @@ def unwrap_transformation(
216
204
  elif isinstance(input, WindowItem):
217
205
  return input
218
206
  elif isinstance(input, Parenthetical):
219
- return unwrap_transformation(input.content)
207
+ return unwrap_transformation(input.content, environment)
220
208
  else:
221
209
  return Function(
222
210
  operator=FunctionType.CONSTANT,
@@ -247,7 +235,8 @@ class ParseToObjects(Transformer):
247
235
  )
248
236
  # we do a second pass to pick up circular dependencies
249
237
  # after initial parsing
250
- self.pass_count = 1
238
+ self.parse_pass = ParsePass.INITIAL
239
+ self.function_factory = FunctionFactory(self.environment)
251
240
 
252
241
  def set_text(self, text: str):
253
242
  self.text_lookup[self.token_address] = text
@@ -258,15 +247,15 @@ class ParseToObjects(Transformer):
258
247
  return results
259
248
 
260
249
  def prepare_parse(self):
261
- self.pass_count = 1
250
+ self.parse_pass = ParsePass.INITIAL
262
251
  self.environment.concepts.fail_on_missing = False
263
252
  for _, v in self.parsed.items():
264
253
  v.prepare_parse()
265
254
 
266
255
  def hydrate_missing(self):
267
- self.pass_count = 2
256
+ self.parse_pass = ParsePass.VALIDATION
268
257
  for k, v in self.parsed.items():
269
- if v.pass_count == 2:
258
+ if v.parse_pass == ParsePass.VALIDATION:
270
259
  continue
271
260
  v.hydrate_missing()
272
261
  reparsed = self.transform(self.tokens[self.token_address])
@@ -312,10 +301,23 @@ class ParseToObjects(Transformer):
312
301
  def QUOTED_IDENTIFIER(self, args) -> str:
313
302
  return args.value[1:-1]
314
303
 
304
+ # @v_args(meta=True)
305
+ # def concept_lit(self, meta: Meta, args) -> ConceptRef:
306
+ # address = args[0]
307
+ # return self.environment.concepts.__getitem__(address, meta.line)
308
+ # return ConceptRef(address=address, line_no=meta.line)
315
309
  @v_args(meta=True)
316
- def concept_lit(self, meta: Meta, args) -> Concept:
310
+ def concept_lit(self, meta: Meta, args) -> ConceptRef:
317
311
  address = args[0]
318
- return self.environment.concepts.__getitem__(address, meta.line)
312
+ if "." not in address and self.environment.namespace == DEFAULT_NAMESPACE:
313
+ address = f"{DEFAULT_NAMESPACE}.{address}"
314
+ mapping = self.environment.concepts[address]
315
+ datatype = mapping.output_datatype
316
+ return ConceptRef(
317
+ address=mapping.address,
318
+ metadata=Metadata(line_number=meta.line),
319
+ datatype=datatype,
320
+ )
319
321
 
320
322
  def ADDRESS(self, args) -> Address:
321
323
  return Address(location=args.value, quoted=False)
@@ -399,7 +401,9 @@ class ParseToObjects(Transformer):
399
401
  resolved = self.environment.concepts.__getitem__( # type: ignore
400
402
  key=concept, line_no=meta.line, file=self.token_address
401
403
  )
402
- return ColumnAssignment(alias=alias, modifiers=modifiers, concept=resolved)
404
+ return ColumnAssignment(
405
+ alias=alias, modifiers=modifiers, concept=resolved.reference
406
+ )
403
407
 
404
408
  def _TERMINATOR(self, args):
405
409
  return None
@@ -489,14 +493,16 @@ class ParseToObjects(Transformer):
489
493
  metadata=metadata,
490
494
  namespace=namespace,
491
495
  modifiers=modifiers,
496
+ derivation=Derivation.ROOT,
497
+ granularity=Granularity.MULTI_ROW,
492
498
  )
493
499
  if concept.metadata:
494
500
  concept.metadata.line_number = meta.line
495
- self.environment.add_concept(concept, meta=meta, force=True)
501
+ self.environment.add_concept(concept, meta=meta)
496
502
  return ConceptDeclarationStatement(concept=concept)
497
503
 
498
504
  @v_args(meta=True)
499
- def concept_derivation(self, meta: Meta, args) -> ConceptDerivation:
505
+ def concept_derivation(self, meta: Meta, args) -> ConceptDerivationStatement:
500
506
  if len(args) > 3:
501
507
  metadata = args[3]
502
508
  else:
@@ -539,7 +545,7 @@ class ParseToObjects(Transformer):
539
545
  if concept.metadata:
540
546
  concept.metadata.line_number = meta.line
541
547
  self.environment.add_concept(concept, meta=meta)
542
- return ConceptDerivation(concept=concept)
548
+ return ConceptDerivationStatement(concept=concept)
543
549
 
544
550
  elif isinstance(source_value, CONSTANT_TYPES):
545
551
  concept = constant_to_concept(
@@ -552,7 +558,7 @@ class ParseToObjects(Transformer):
552
558
  if concept.metadata:
553
559
  concept.metadata.line_number = meta.line
554
560
  self.environment.add_concept(concept, meta=meta)
555
- return ConceptDerivation(concept=concept)
561
+ return ConceptDerivationStatement(concept=concept)
556
562
 
557
563
  raise SyntaxError(
558
564
  f"Received invalid type {type(args[2])} {args[2]} as input to select"
@@ -570,12 +576,24 @@ class ParseToObjects(Transformer):
570
576
  select=select,
571
577
  namespace=self.environment.namespace or DEFAULT_NAMESPACE,
572
578
  )
573
- for new_concept in output.derived_concepts:
579
+
580
+ # clean up current definitions
581
+ # to_delete = set()
582
+ # if output.name in self.environment.named_statements:
583
+ # for k, v in self.environment.concepts.items():
584
+ # if v.derivation == Derivation.ROWSET and v.lineage.rowset.name == name:
585
+ # to_delete.add(k)
586
+ # for k in to_delete:
587
+ # self.environment.concepts.pop(k)
588
+
589
+ for new_concept in rowset_to_concepts(output, self.environment):
574
590
  if new_concept.metadata:
575
591
  new_concept.metadata.line_number = meta.line
576
- # output.select.local_concepts[new_concept.address] = new_concept
577
- self.environment.add_concept(new_concept)
592
+ self.environment.add_concept(new_concept, force=True)
578
593
 
594
+ self.environment.add_rowset(
595
+ output.name, output.select.as_lineage(self.environment)
596
+ )
579
597
  return output
580
598
 
581
599
  @v_args(meta=True)
@@ -672,15 +690,14 @@ class ParseToObjects(Transformer):
672
690
  where=where,
673
691
  non_partial_for=non_partial_for,
674
692
  )
675
- for column in columns:
676
- column.concept = column.concept.with_grain(datasource.grain)
677
693
  if datasource.where:
678
694
  for x in datasource.where.concept_arguments:
679
695
  if x.address not in datasource.output_concepts:
680
696
  raise ValueError(
681
697
  f"Datasource {name} where condition depends on concept {x.address} that does not exist on the datasource, line {meta.line}."
682
698
  )
683
- self.environment.add_datasource(datasource, meta=meta)
699
+ if self.parse_pass == ParsePass.VALIDATION:
700
+ self.environment.add_datasource(datasource, meta=meta)
684
701
  return datasource
685
702
 
686
703
  @v_args(meta=True)
@@ -694,39 +711,20 @@ class ParseToObjects(Transformer):
694
711
  @v_args(meta=True)
695
712
  def select_transform(self, meta: Meta, args) -> ConceptTransform:
696
713
  output: str = args[1]
697
- transformation = unwrap_transformation(args[0])
714
+ transformation = unwrap_transformation(args[0], self.environment)
698
715
  lookup, namespace, output, parent = parse_concept_reference(
699
716
  output, self.environment
700
717
  )
701
718
 
702
719
  metadata = Metadata(line_number=meta.line, concept_source=ConceptSource.SELECT)
703
720
 
704
- if isinstance(transformation, AggregateWrapper):
705
- concept = agg_wrapper_to_concept(
706
- transformation, namespace=namespace, name=output, metadata=metadata
707
- )
708
- elif isinstance(transformation, WindowItem):
709
- concept = window_item_to_concept(
710
- transformation, namespace=namespace, name=output, metadata=metadata
711
- )
712
- elif isinstance(transformation, FilterItem):
713
- concept = filter_item_to_concept(
714
- transformation, namespace=namespace, name=output, metadata=metadata
715
- )
716
- elif isinstance(transformation, CONSTANT_TYPES):
717
- concept = constant_to_concept(
718
- transformation, namespace=namespace, name=output, metadata=metadata
719
- )
720
- elif isinstance(transformation, Function):
721
- concept = function_to_concept(
722
- transformation,
723
- namespace=namespace,
724
- name=output,
725
- metadata=metadata,
726
- environment=self.environment,
727
- )
728
- else:
729
- raise SyntaxError("Invalid transformation")
721
+ concept = arbitrary_to_concept(
722
+ transformation,
723
+ environment=self.environment,
724
+ namespace=namespace,
725
+ name=output,
726
+ metadata=metadata,
727
+ )
730
728
 
731
729
  return ConceptTransform(function=transformation, output=concept)
732
730
 
@@ -775,20 +773,10 @@ class ParseToObjects(Transformer):
775
773
  return Ordering(" ".join([base, "nulls", null_sort.lower()]))
776
774
  return Ordering(base)
777
775
 
778
- def order_list(self, args):
779
- def handle_order_item(x, namespace: str):
780
- if not isinstance(x, Concept):
781
- x = arbitrary_to_concept(
782
- x, namespace=namespace, environment=self.environment
783
- )
784
- return x
785
-
776
+ def order_list(self, args) -> List[OrderItem]:
786
777
  return [
787
778
  OrderItem(
788
- expr=handle_order_item(
789
- x,
790
- self.environment.namespace,
791
- ),
779
+ expr=x,
792
780
  order=y,
793
781
  )
794
782
  for x, y in zip(args[::2], args[1::2])
@@ -801,7 +789,7 @@ class ParseToObjects(Transformer):
801
789
  return [x for x in args]
802
790
 
803
791
  @v_args(meta=True)
804
- def merge_statement(self, meta: Meta, args) -> MergeStatementV2:
792
+ def merge_statement(self, meta: Meta, args) -> MergeStatementV2 | None:
805
793
  modifiers = []
806
794
  cargs: list[str] = []
807
795
  source_wildcard = None
@@ -817,12 +805,12 @@ class ParseToObjects(Transformer):
817
805
  raise ValueError("Invalid merge, source is wildcard, target is not")
818
806
  source_wildcard = source[:-2]
819
807
  target_wildcard = target[:-2]
820
- sources = [
808
+ sources: list[Concept] = [
821
809
  v
822
810
  for k, v in self.environment.concepts.items()
823
811
  if v.namespace == source_wildcard
824
812
  ]
825
- targets = {}
813
+ targets: dict[str, Concept] = {}
826
814
  for x in sources:
827
815
  target = target_wildcard + "." + x.name
828
816
  if target in self.environment.concepts:
@@ -831,19 +819,22 @@ class ParseToObjects(Transformer):
831
819
  else:
832
820
  sources = [self.environment.concepts[source]]
833
821
  targets = {sources[0].address: self.environment.concepts[target]}
834
- new = MergeStatementV2(
835
- sources=sources,
836
- targets=targets,
837
- modifiers=modifiers,
838
- source_wildcard=source_wildcard,
839
- target_wildcard=target_wildcard,
840
- )
841
- for source_c in new.sources:
842
- self.environment.merge_concept(
843
- source_c, targets[source_c.address], modifiers
822
+
823
+ if self.parse_pass == ParsePass.VALIDATION:
824
+ new = MergeStatementV2(
825
+ sources=sources,
826
+ targets=targets,
827
+ modifiers=modifiers,
828
+ source_wildcard=source_wildcard,
829
+ target_wildcard=target_wildcard,
844
830
  )
831
+ for source_c in new.sources:
832
+ self.environment.merge_concept(
833
+ source_c, targets[source_c.address], modifiers
834
+ )
845
835
 
846
- return new
836
+ return new
837
+ return None
847
838
 
848
839
  @v_args(meta=True)
849
840
  def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
@@ -919,7 +910,9 @@ class ParseToObjects(Transformer):
919
910
  ) from e
920
911
 
921
912
  imps = ImportStatement(alias=alias, path=Path(args[0]))
922
- self.environment.add_import(alias, new_env, imps)
913
+ self.environment.add_import(
914
+ alias, new_env, Import(alias=alias, path=Path(args[0]))
915
+ )
923
916
  return imps
924
917
 
925
918
  @v_args(meta=True)
@@ -931,7 +924,7 @@ class ParseToObjects(Transformer):
931
924
  return ShowStatement(content=args[0])
932
925
 
933
926
  @v_args(meta=True)
934
- def persist_statement(self, meta: Meta, args) -> PersistStatement:
927
+ def persist_statement(self, meta: Meta, args) -> PersistStatement | None:
935
928
  identifier: str = args[0]
936
929
  address: str = args[1]
937
930
  select: SelectStatement = args[2]
@@ -939,29 +932,31 @@ class ParseToObjects(Transformer):
939
932
  grain: Grain | None = args[3]
940
933
  else:
941
934
  grain = None
942
-
943
- new_datasource = select.to_datasource(
944
- namespace=(
945
- self.environment.namespace
946
- if self.environment.namespace
947
- else DEFAULT_NAMESPACE
948
- ),
949
- name=identifier,
950
- address=Address(location=address),
951
- grain=grain,
952
- )
953
- return PersistStatement(
954
- select=select,
955
- datasource=new_datasource,
956
- meta=Metadata(line_number=meta.line),
957
- )
935
+ if self.parse_pass == ParsePass.VALIDATION:
936
+ new_datasource = select.to_datasource(
937
+ namespace=(
938
+ self.environment.namespace
939
+ if self.environment.namespace
940
+ else DEFAULT_NAMESPACE
941
+ ),
942
+ name=identifier,
943
+ address=Address(location=address),
944
+ grain=grain,
945
+ environment=self.environment,
946
+ )
947
+ return PersistStatement(
948
+ select=select,
949
+ datasource=new_datasource,
950
+ meta=Metadata(line_number=meta.line),
951
+ )
952
+ return None
958
953
 
959
954
  @v_args(meta=True)
960
955
  def align_item(self, meta: Meta, args) -> AlignItem:
961
956
  return AlignItem(
962
957
  alias=args[0],
963
958
  namespace=self.environment.namespace,
964
- concepts=[self.environment.concepts[arg] for arg in args[1:]],
959
+ concepts=[self.environment.concepts[arg].reference for arg in args[1:]],
965
960
  )
966
961
 
967
962
  @v_args(meta=True)
@@ -970,11 +965,13 @@ class ParseToObjects(Transformer):
970
965
 
971
966
  @v_args(meta=True)
972
967
  def multi_select_statement(self, meta: Meta, args) -> MultiSelectStatement:
968
+
973
969
  selects: list[SelectStatement] = []
974
970
  align: AlignClause | None = None
975
971
  limit: int | None = None
976
972
  order_by: OrderBy | None = None
977
973
  where: WhereClause | None = None
974
+ having: HavingClause | None = None
978
975
  for arg in args:
979
976
  if isinstance(arg, SelectStatement):
980
977
  selects.append(arg)
@@ -984,15 +981,27 @@ class ParseToObjects(Transformer):
984
981
  order_by = arg
985
982
  elif isinstance(arg, WhereClause):
986
983
  where = arg
984
+ elif isinstance(arg, HavingClause):
985
+ having = arg
987
986
  elif isinstance(arg, AlignClause):
988
987
  align = arg
989
988
 
990
989
  assert align
991
990
  assert align is not None
992
- base_local: EnvironmentConceptDict = selects[0].local_concepts
993
- for select in selects[1:]:
994
- for k, v in select.local_concepts.items():
995
- base_local[k] = v
991
+
992
+ derived_concepts = []
993
+ for x in align.items:
994
+ concept = align_item_to_concept(
995
+ x,
996
+ align,
997
+ selects,
998
+ where=where,
999
+ having=having,
1000
+ limit=limit,
1001
+ environment=self.environment,
1002
+ )
1003
+ derived_concepts.append(concept)
1004
+ self.environment.add_concept(concept, meta=meta)
996
1005
  multi = MultiSelectStatement(
997
1006
  selects=selects,
998
1007
  align=align,
@@ -1001,10 +1010,8 @@ class ParseToObjects(Transformer):
1001
1010
  order_by=order_by,
1002
1011
  limit=limit,
1003
1012
  meta=Metadata(line_number=meta.line),
1004
- local_concepts=base_local,
1013
+ derived_concepts=derived_concepts,
1005
1014
  )
1006
- for concept in multi.derived_concepts:
1007
- self.environment.add_concept(concept, meta=meta)
1008
1015
  return multi
1009
1016
 
1010
1017
  @v_args(meta=True)
@@ -1114,15 +1121,10 @@ class ParseToObjects(Transformer):
1114
1121
  return ""
1115
1122
  return args[0]
1116
1123
 
1117
- def struct_lit(self, args):
1118
- zipped = dict(zip(args[::2], args[1::2]))
1119
- types = [arg_to_datatype(x) for x in args[1::2]]
1120
- return Function(
1121
- operator=FunctionType.STRUCT,
1122
- output_datatype=StructType(fields=types, fields_map=zipped),
1123
- output_purpose=function_args_to_output_purpose(args),
1124
- arguments=args,
1125
- arg_count=-1,
1124
+ @v_args(meta=True)
1125
+ def struct_lit(self, meta, args):
1126
+ return self.function_factory.create_function(
1127
+ args, operator=FunctionType.STRUCT, meta=meta
1126
1128
  )
1127
1129
 
1128
1130
  def map_lit(self, args):
@@ -1137,19 +1139,21 @@ class ParseToObjects(Transformer):
1137
1139
  if args[1] == ComparisonOperator.IN:
1138
1140
  raise SyntaxError
1139
1141
  if isinstance(args[0], AggregateWrapper):
1140
- left = arbitrary_to_concept(
1142
+ left_c = arbitrary_to_concept(
1141
1143
  args[0],
1142
1144
  environment=self.environment,
1143
1145
  )
1144
- self.environment.add_concept(left)
1146
+ self.environment.add_concept(left_c)
1147
+ left = left_c.reference
1145
1148
  else:
1146
1149
  left = args[0]
1147
1150
  if isinstance(args[2], AggregateWrapper):
1148
- right = arbitrary_to_concept(
1151
+ right_c = arbitrary_to_concept(
1149
1152
  args[2],
1150
1153
  environment=self.environment,
1151
1154
  )
1152
- self.environment.add_concept(right)
1155
+ self.environment.add_concept(right_c)
1156
+ right = right_c.reference
1153
1157
  else:
1154
1158
  right = args[2]
1155
1159
  return Comparison(left=left, right=right, operator=args[1])
@@ -1185,8 +1189,9 @@ class ParseToObjects(Transformer):
1185
1189
  ):
1186
1190
  right = right.content
1187
1191
  if isinstance(right, (Function, FilterItem, WindowItem, AggregateWrapper)):
1188
- right = arbitrary_to_concept(right, environment=self.environment)
1189
- self.environment.add_concept(right, meta=meta)
1192
+ right_concept = arbitrary_to_concept(right, environment=self.environment)
1193
+ self.environment.add_concept(right_concept, meta=meta)
1194
+ right = right_concept.reference
1190
1195
  return SubselectComparison(
1191
1196
  left=args[0],
1192
1197
  right=right,
@@ -1233,6 +1238,12 @@ class ParseToObjects(Transformer):
1233
1238
  def window_item_order(self, args):
1234
1239
  return WindowItemOrder(contents=args[0])
1235
1240
 
1241
+ def logical_operator(self, args):
1242
+ return BooleanOperator(args[0].value.lower())
1243
+
1244
+ def DATE_PART(self, args):
1245
+ return DatePart(args.value)
1246
+
1236
1247
  @v_args(meta=True)
1237
1248
  def window_item(self, meta, args) -> WindowItem:
1238
1249
  type: WindowType = args[0]
@@ -1249,8 +1260,8 @@ class ParseToObjects(Transformer):
1249
1260
  over = item.contents
1250
1261
  elif isinstance(item, str):
1251
1262
  concept = self.environment.concepts[item]
1252
- elif isinstance(item, Concept):
1253
- concept = item
1263
+ elif isinstance(item, ConceptRef):
1264
+ concept = self.environment.concepts[item.address]
1254
1265
  elif isinstance(item, WindowType):
1255
1266
  type = item
1256
1267
  else:
@@ -1258,7 +1269,11 @@ class ParseToObjects(Transformer):
1258
1269
  self.environment.add_concept(concept, meta=meta)
1259
1270
  assert concept
1260
1271
  return WindowItem(
1261
- type=type, content=concept, over=over, order_by=order_by, index=index
1272
+ type=type,
1273
+ content=concept.reference,
1274
+ over=over,
1275
+ order_by=order_by,
1276
+ index=index,
1262
1277
  )
1263
1278
 
1264
1279
  def filter_item(self, args) -> FilterItem:
@@ -1268,7 +1283,7 @@ class ParseToObjects(Transformer):
1268
1283
  where = raw
1269
1284
  else:
1270
1285
  where = WhereClause(conditional=raw)
1271
- concept = self.environment.concepts[string_concept]
1286
+ concept = self.environment.concepts[string_concept].reference
1272
1287
  return FilterItem(content=concept, where=where)
1273
1288
 
1274
1289
  # BEGIN FUNCTIONS
@@ -1285,7 +1300,12 @@ class ParseToObjects(Transformer):
1285
1300
  return args[0]
1286
1301
 
1287
1302
  def aggregate_all(self, args):
1288
- return [self.environment.concepts[f"{INTERNAL_NAMESPACE}.{ALL_ROWS_CONCEPT}"]]
1303
+ return [
1304
+ ConceptRef(
1305
+ address=f"{INTERNAL_NAMESPACE}.{ALL_ROWS_CONCEPT}",
1306
+ datatype=DataType.INTEGER,
1307
+ )
1308
+ ]
1289
1309
 
1290
1310
  def aggregate_functions(self, args):
1291
1311
  if len(args) == 2:
@@ -1295,452 +1315,190 @@ class ParseToObjects(Transformer):
1295
1315
  @v_args(meta=True)
1296
1316
  def index_access(self, meta, args):
1297
1317
  args = process_function_args(args, meta=meta, environment=self.environment)
1298
- if args[0].datatype == DataType.MAP or isinstance(args[0].datatype, MapType):
1299
- return MapAccess(args)
1300
- return IndexAccess(args)
1318
+ base = args[0]
1319
+ if base.datatype == DataType.MAP or isinstance(base.datatype, MapType):
1320
+ return self.function_factory.create_function(
1321
+ args, FunctionType.MAP_ACCESS, meta
1322
+ )
1323
+ return self.function_factory.create_function(
1324
+ args, FunctionType.INDEX_ACCESS, meta
1325
+ )
1301
1326
 
1302
1327
  @v_args(meta=True)
1303
1328
  def map_key_access(self, meta, args):
1304
- args = process_function_args(args, meta=meta, environment=self.environment)
1305
- return MapAccess(args)
1329
+ return self.function_factory.create_function(
1330
+ args, FunctionType.MAP_ACCESS, meta
1331
+ )
1306
1332
 
1307
1333
  @v_args(meta=True)
1308
1334
  def attr_access(self, meta, args):
1309
- args = process_function_args(args, meta=meta, environment=self.environment)
1310
- return AttrAccess(args)
1335
+ return self.function_factory.create_function(
1336
+ args, FunctionType.ATTR_ACCESS, meta
1337
+ )
1311
1338
 
1312
1339
  @v_args(meta=True)
1313
1340
  def fcoalesce(self, meta, args):
1314
- args = process_function_args(args, meta=meta, environment=self.environment)
1315
- return Coalesce(args)
1341
+ return self.function_factory.create_function(args, FunctionType.COALESCE, meta)
1316
1342
 
1317
1343
  @v_args(meta=True)
1318
1344
  def unnest(self, meta, args):
1319
- args = process_function_args(args, meta=meta, environment=self.environment)
1320
- return Unnest(args)
1345
+ return self.function_factory.create_function(args, FunctionType.UNNEST, meta)
1321
1346
 
1322
1347
  @v_args(meta=True)
1323
1348
  def count(self, meta, args):
1324
- args = process_function_args(args, meta=meta, environment=self.environment)
1325
- return Count(args)
1349
+ return self.function_factory.create_function(args, FunctionType.COUNT, meta)
1326
1350
 
1327
1351
  @v_args(meta=True)
1328
1352
  def fgroup(self, meta, args):
1329
1353
  if len(args) == 2:
1330
- fargs = [args[0]] + args[1]
1354
+ fargs = [args[0]] + list(args[1])
1331
1355
  else:
1332
1356
  fargs = [args[0]]
1333
- args = process_function_args(fargs, meta=meta, environment=self.environment)
1334
- return Group(args)
1357
+ return self.function_factory.create_function(fargs, FunctionType.GROUP, meta)
1335
1358
 
1336
1359
  @v_args(meta=True)
1337
1360
  def fabs(self, meta, args):
1338
- args = process_function_args(args, meta=meta, environment=self.environment)
1339
- return Abs(args)
1361
+ return self.function_factory.create_function(args, FunctionType.ABS, meta)
1340
1362
 
1341
1363
  @v_args(meta=True)
1342
1364
  def count_distinct(self, meta, args):
1343
- args = process_function_args(args, meta=meta, environment=self.environment)
1344
- return CountDistinct(args)
1365
+ return self.function_factory.create_function(
1366
+ args, FunctionType.COUNT_DISTINCT, meta
1367
+ )
1345
1368
 
1346
1369
  @v_args(meta=True)
1347
1370
  def sum(self, meta, args):
1348
- args = process_function_args(args, meta=meta, environment=self.environment)
1349
- return Function(
1350
- operator=FunctionType.SUM,
1351
- arguments=args,
1352
- output_datatype=args[0].datatype,
1353
- output_purpose=Purpose.METRIC,
1354
- arg_count=1,
1355
- )
1371
+ return self.function_factory.create_function(args, FunctionType.SUM, meta)
1356
1372
 
1357
1373
  @v_args(meta=True)
1358
1374
  def avg(self, meta, args):
1359
- args = process_function_args(args, meta=meta, environment=self.environment)
1360
- arg = args[0]
1361
-
1362
- return Function(
1363
- operator=FunctionType.AVG,
1364
- arguments=args,
1365
- output_datatype=arg.datatype,
1366
- output_purpose=Purpose.METRIC,
1367
- valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1368
- arg_count=1,
1369
- )
1375
+ return self.function_factory.create_function(args, FunctionType.AVG, meta)
1370
1376
 
1371
1377
  @v_args(meta=True)
1372
1378
  def max(self, meta, args):
1373
- args = process_function_args(args, meta=meta, environment=self.environment)
1374
- return Max(args)
1379
+ return self.function_factory.create_function(args, FunctionType.MAX, meta)
1375
1380
 
1376
1381
  @v_args(meta=True)
1377
1382
  def min(self, meta, args):
1378
- args = process_function_args(args, meta=meta, environment=self.environment)
1379
- return Min(args)
1383
+ return self.function_factory.create_function(args, FunctionType.MIN, meta)
1380
1384
 
1381
1385
  @v_args(meta=True)
1382
1386
  def len(self, meta, args):
1383
- args = process_function_args(args, meta=meta, environment=self.environment)
1384
- return Function(
1385
- operator=FunctionType.LENGTH,
1386
- arguments=args,
1387
- output_datatype=DataType.INTEGER,
1388
- output_purpose=Purpose.PROPERTY,
1389
- valid_inputs={DataType.STRING, DataType.ARRAY, DataType.MAP},
1390
- # output_grain=args[0].grain,
1391
- )
1387
+ return self.function_factory.create_function(args, FunctionType.LENGTH, meta)
1392
1388
 
1393
1389
  @v_args(meta=True)
1394
1390
  def fsplit(self, meta, args):
1395
- args = process_function_args(args, meta=meta, environment=self.environment)
1396
- return Split(args)
1391
+ return self.function_factory.create_function(args, FunctionType.SPLIT, meta)
1397
1392
 
1398
1393
  @v_args(meta=True)
1399
1394
  def concat(self, meta, args):
1400
- args = process_function_args(args, meta=meta, environment=self.environment)
1401
- return Function(
1402
- operator=FunctionType.CONCAT,
1403
- arguments=args,
1404
- output_datatype=DataType.STRING,
1405
- output_purpose=Purpose.PROPERTY,
1406
- valid_inputs={DataType.STRING},
1407
- arg_count=-1,
1408
- # output_grain=args[0].grain,
1409
- )
1395
+ return self.function_factory.create_function(args, FunctionType.CONCAT, meta)
1410
1396
 
1411
1397
  @v_args(meta=True)
1412
1398
  def union(self, meta, args):
1413
- args = process_function_args(args, meta=meta, environment=self.environment)
1414
- output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1415
- return Function(
1416
- operator=FunctionType.UNION,
1417
- arguments=args,
1418
- output_datatype=output_datatype,
1419
- output_purpose=Purpose.KEY,
1420
- valid_inputs={*DataType},
1421
- arg_count=-1,
1422
- )
1399
+ return self.function_factory.create_function(args, FunctionType.UNION, meta)
1423
1400
 
1424
1401
  @v_args(meta=True)
1425
1402
  def like(self, meta, args):
1426
- args = process_function_args(args, meta=meta, environment=self.environment)
1427
- return Function(
1428
- operator=FunctionType.LIKE,
1429
- arguments=args,
1430
- output_datatype=DataType.BOOL,
1431
- output_purpose=Purpose.PROPERTY,
1432
- valid_inputs={DataType.STRING},
1433
- arg_count=2,
1434
- )
1403
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
1435
1404
 
1436
1405
  @v_args(meta=True)
1437
1406
  def alt_like(self, meta, args):
1438
- args = process_function_args(args, meta=meta, environment=self.environment)
1439
- return Function(
1440
- operator=FunctionType.LIKE,
1441
- arguments=args,
1442
- output_datatype=DataType.BOOL,
1443
- output_purpose=Purpose.PROPERTY,
1444
- valid_inputs={DataType.STRING},
1445
- arg_count=2,
1446
- )
1407
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
1447
1408
 
1448
1409
  @v_args(meta=True)
1449
1410
  def ilike(self, meta, args):
1450
- args = process_function_args(args, meta=meta, environment=self.environment)
1451
- return Function(
1452
- operator=FunctionType.ILIKE,
1453
- arguments=args,
1454
- output_datatype=DataType.BOOL,
1455
- output_purpose=Purpose.PROPERTY,
1456
- valid_inputs={DataType.STRING},
1457
- arg_count=2,
1458
- )
1411
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
1459
1412
 
1460
1413
  @v_args(meta=True)
1461
1414
  def upper(self, meta, args):
1462
- args = process_function_args(args, meta=meta, environment=self.environment)
1463
- return Function(
1464
- operator=FunctionType.UPPER,
1465
- arguments=args,
1466
- output_datatype=DataType.STRING,
1467
- output_purpose=Purpose.PROPERTY,
1468
- valid_inputs={DataType.STRING},
1469
- arg_count=1,
1470
- )
1415
+ return self.function_factory.create_function(args, FunctionType.UPPER, meta)
1471
1416
 
1472
1417
  @v_args(meta=True)
1473
1418
  def fstrpos(self, meta, args):
1474
- args = process_function_args(args, meta=meta, environment=self.environment)
1475
- return StrPos(args)
1419
+ return self.function_factory.create_function(args, FunctionType.STRPOS, meta)
1476
1420
 
1477
1421
  @v_args(meta=True)
1478
1422
  def fsubstring(self, meta, args):
1479
- args = process_function_args(args, meta=meta, environment=self.environment)
1480
- return SubString(args)
1481
-
1482
- def logical_operator(self, args):
1483
- return BooleanOperator(args[0].value.lower())
1423
+ return self.function_factory.create_function(args, FunctionType.SUBSTRING, meta)
1484
1424
 
1485
1425
  @v_args(meta=True)
1486
1426
  def lower(self, meta, args):
1487
- args = process_function_args(args, meta=meta, environment=self.environment)
1488
- return Function(
1489
- operator=FunctionType.LOWER,
1490
- arguments=args,
1491
- output_datatype=DataType.STRING,
1492
- output_purpose=Purpose.PROPERTY,
1493
- valid_inputs={DataType.STRING},
1494
- arg_count=1,
1495
- )
1427
+ return self.function_factory.create_function(args, FunctionType.LOWER, meta)
1496
1428
 
1497
1429
  # date functions
1498
1430
  @v_args(meta=True)
1499
1431
  def fdate(self, meta, args):
1500
- args = process_function_args(args, meta=meta, environment=self.environment)
1501
- return Function(
1502
- operator=FunctionType.DATE,
1503
- arguments=args,
1504
- output_datatype=DataType.DATE,
1505
- output_purpose=Purpose.PROPERTY,
1506
- valid_inputs={
1507
- DataType.DATE,
1508
- DataType.TIMESTAMP,
1509
- DataType.DATETIME,
1510
- DataType.STRING,
1511
- },
1512
- arg_count=1,
1513
- )
1514
-
1515
- def DATE_PART(self, args):
1516
- return DatePart(args.value)
1432
+ return self.function_factory.create_function(args, FunctionType.DATE, meta)
1517
1433
 
1518
1434
  @v_args(meta=True)
1519
1435
  def fdate_trunc(self, meta, args):
1520
- args = process_function_args(args, meta=meta, environment=self.environment)
1521
- return Function(
1522
- operator=FunctionType.DATE_TRUNCATE,
1523
- arguments=args,
1524
- output_datatype=DataType.DATE,
1525
- output_purpose=Purpose.PROPERTY,
1526
- valid_inputs=[
1527
- {
1528
- DataType.DATE,
1529
- DataType.TIMESTAMP,
1530
- DataType.DATETIME,
1531
- DataType.STRING,
1532
- },
1533
- {DataType.DATE_PART},
1534
- ],
1535
- arg_count=2,
1436
+ return self.function_factory.create_function(
1437
+ args, FunctionType.DATE_TRUNCATE, meta
1536
1438
  )
1537
1439
 
1538
1440
  @v_args(meta=True)
1539
1441
  def fdate_part(self, meta, args):
1540
- args = process_function_args(args, meta=meta, environment=self.environment)
1541
- return Function(
1542
- operator=FunctionType.DATE_PART,
1543
- arguments=args,
1544
- output_datatype=DataType.DATE,
1545
- output_purpose=Purpose.PROPERTY,
1546
- valid_inputs=[
1547
- {
1548
- DataType.DATE,
1549
- DataType.TIMESTAMP,
1550
- DataType.DATETIME,
1551
- DataType.STRING,
1552
- },
1553
- {DataType.DATE_PART},
1554
- ],
1555
- arg_count=2,
1556
- )
1442
+ return self.function_factory.create_function(args, FunctionType.DATE_PART, meta)
1557
1443
 
1558
1444
  @v_args(meta=True)
1559
1445
  def fdate_add(self, meta, args):
1560
- args = process_function_args(args, meta=meta, environment=self.environment)
1561
- return Function(
1562
- operator=FunctionType.DATE_ADD,
1563
- arguments=args,
1564
- output_datatype=DataType.DATE,
1565
- output_purpose=Purpose.PROPERTY,
1566
- valid_inputs=[
1567
- {
1568
- DataType.DATE,
1569
- DataType.TIMESTAMP,
1570
- DataType.DATETIME,
1571
- DataType.STRING,
1572
- },
1573
- {DataType.DATE_PART},
1574
- {DataType.INTEGER},
1575
- ],
1576
- arg_count=3,
1577
- )
1446
+ return self.function_factory.create_function(args, FunctionType.DATE_ADD, meta)
1578
1447
 
1579
1448
  @v_args(meta=True)
1580
1449
  def fdate_diff(self, meta, args):
1581
- args = process_function_args(args, meta=meta, environment=self.environment)
1582
- purpose = function_args_to_output_purpose(args)
1583
- return Function(
1584
- operator=FunctionType.DATE_DIFF,
1585
- arguments=args,
1586
- output_datatype=DataType.INTEGER,
1587
- output_purpose=purpose,
1588
- valid_inputs=[
1589
- {
1590
- DataType.DATE,
1591
- DataType.TIMESTAMP,
1592
- DataType.DATETIME,
1593
- },
1594
- {
1595
- DataType.DATE,
1596
- DataType.TIMESTAMP,
1597
- DataType.DATETIME,
1598
- },
1599
- {DataType.DATE_PART},
1600
- ],
1601
- arg_count=3,
1602
- )
1450
+ return self.function_factory.create_function(args, FunctionType.DATE_DIFF, meta)
1603
1451
 
1604
1452
  @v_args(meta=True)
1605
1453
  def fdatetime(self, meta, args):
1606
- args = process_function_args(args, meta=meta, environment=self.environment)
1607
- return Function(
1608
- operator=FunctionType.DATETIME,
1609
- arguments=args,
1610
- output_datatype=DataType.DATETIME,
1611
- output_purpose=Purpose.PROPERTY,
1612
- valid_inputs={
1613
- DataType.DATE,
1614
- DataType.TIMESTAMP,
1615
- DataType.DATETIME,
1616
- DataType.STRING,
1617
- },
1618
- arg_count=1,
1619
- )
1454
+ return self.function_factory.create_function(args, FunctionType.DATETIME, meta)
1620
1455
 
1621
1456
  @v_args(meta=True)
1622
1457
  def ftimestamp(self, meta, args):
1623
- args = process_function_args(args, meta=meta, environment=self.environment)
1624
- return Function(
1625
- operator=FunctionType.TIMESTAMP,
1626
- arguments=args,
1627
- output_datatype=DataType.TIMESTAMP,
1628
- output_purpose=Purpose.PROPERTY,
1629
- valid_inputs=[{DataType.TIMESTAMP, DataType.STRING}],
1630
- arg_count=1,
1631
- )
1458
+ return self.function_factory.create_function(args, FunctionType.TIMESTAMP, meta)
1632
1459
 
1633
1460
  @v_args(meta=True)
1634
1461
  def fsecond(self, meta, args):
1635
- args = process_function_args(args, meta=meta, environment=self.environment)
1636
- return Function(
1637
- operator=FunctionType.SECOND,
1638
- arguments=args,
1639
- output_datatype=DataType.INTEGER,
1640
- output_purpose=Purpose.PROPERTY,
1641
- valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
1642
- arg_count=1,
1643
- )
1462
+ return self.function_factory.create_function(args, FunctionType.SECOND, meta)
1644
1463
 
1645
1464
  @v_args(meta=True)
1646
1465
  def fminute(self, meta, args):
1647
- args = process_function_args(args, meta=meta, environment=self.environment)
1648
- return Function(
1649
- operator=FunctionType.MINUTE,
1650
- arguments=args,
1651
- output_datatype=DataType.INTEGER,
1652
- output_purpose=Purpose.PROPERTY,
1653
- valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
1654
- arg_count=1,
1655
- )
1466
+ return self.function_factory.create_function(args, FunctionType.MINUTE, meta)
1656
1467
 
1657
1468
  @v_args(meta=True)
1658
1469
  def fhour(self, meta, args):
1659
- args = process_function_args(args, meta=meta, environment=self.environment)
1660
- return Function(
1661
- operator=FunctionType.HOUR,
1662
- arguments=args,
1663
- output_datatype=DataType.INTEGER,
1664
- output_purpose=Purpose.PROPERTY,
1665
- valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
1666
- arg_count=1,
1667
- )
1470
+ return self.function_factory.create_function(args, FunctionType.HOUR, meta)
1668
1471
 
1669
1472
  @v_args(meta=True)
1670
1473
  def fday(self, meta, args):
1671
- args = process_function_args(args, meta=meta, environment=self.environment)
1672
- return Function(
1673
- operator=FunctionType.DAY,
1674
- arguments=args,
1675
- output_datatype=DataType.INTEGER,
1676
- output_purpose=Purpose.PROPERTY,
1677
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1678
- arg_count=1,
1679
- )
1474
+ return self.function_factory.create_function(args, FunctionType.DAY, meta)
1680
1475
 
1681
1476
  @v_args(meta=True)
1682
1477
  def fday_of_week(self, meta, args):
1683
- args = process_function_args(args, meta=meta, environment=self.environment)
1684
- return Function(
1685
- operator=FunctionType.DAY_OF_WEEK,
1686
- arguments=args,
1687
- output_datatype=DataType.INTEGER,
1688
- output_purpose=Purpose.PROPERTY,
1689
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1690
- arg_count=1,
1478
+ return self.function_factory.create_function(
1479
+ args, FunctionType.DAY_OF_WEEK, meta
1691
1480
  )
1692
1481
 
1693
1482
  @v_args(meta=True)
1694
1483
  def fweek(self, meta, args):
1695
- args = process_function_args(args, meta=meta, environment=self.environment)
1696
- return Function(
1697
- operator=FunctionType.WEEK,
1698
- arguments=args,
1699
- output_datatype=DataType.INTEGER,
1700
- output_purpose=Purpose.PROPERTY,
1701
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1702
- arg_count=1,
1703
- )
1484
+ return self.function_factory.create_function(args, FunctionType.WEEK, meta)
1704
1485
 
1705
1486
  @v_args(meta=True)
1706
1487
  def fmonth(self, meta, args):
1707
- args = process_function_args(args, meta=meta, environment=self.environment)
1708
- return Function(
1709
- operator=FunctionType.MONTH,
1710
- arguments=args,
1711
- output_datatype=DataType.INTEGER,
1712
- output_purpose=Purpose.PROPERTY,
1713
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1714
- arg_count=1,
1715
- )
1488
+ return self.function_factory.create_function(args, FunctionType.MONTH, meta)
1716
1489
 
1717
1490
  @v_args(meta=True)
1718
1491
  def fquarter(self, meta, args):
1719
- args = process_function_args(args, meta=meta, environment=self.environment)
1720
- return Function(
1721
- operator=FunctionType.QUARTER,
1722
- arguments=args,
1723
- output_datatype=DataType.INTEGER,
1724
- output_purpose=Purpose.PROPERTY,
1725
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1726
- arg_count=1,
1727
- )
1492
+ return self.function_factory.create_function(args, FunctionType.QUARTER, meta)
1728
1493
 
1729
1494
  @v_args(meta=True)
1730
1495
  def fyear(self, meta, args):
1731
- args = process_function_args(args, meta=meta, environment=self.environment)
1732
- return Function(
1733
- operator=FunctionType.YEAR,
1734
- arguments=args,
1735
- output_datatype=DataType.INTEGER,
1736
- output_purpose=Purpose.PROPERTY,
1737
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1738
- arg_count=1,
1739
- )
1496
+ return self.function_factory.create_function(args, FunctionType.YEAR, meta)
1740
1497
 
1741
1498
  # utility functions
1742
1499
  @v_args(meta=True)
1743
1500
  def fcast(self, meta, args) -> Function:
1501
+ # if it's casting a constant, we'll process that directly
1744
1502
  args = process_function_args(args, meta=meta, environment=self.environment)
1745
1503
  if isinstance(args[0], str):
1746
1504
  processed: date | datetime | int | float | bool | str
@@ -1760,134 +1518,39 @@ class ParseToObjects(Transformer):
1760
1518
  processed = args[0]
1761
1519
  else:
1762
1520
  raise SyntaxError(f"Invalid cast type {args[1]}")
1763
- return Function(
1764
- operator=FunctionType.CONSTANT,
1765
- output_datatype=args[1],
1766
- output_purpose=Purpose.CONSTANT,
1767
- arguments=[processed],
1521
+ return self.function_factory.create_function(
1522
+ [processed], FunctionType.CONSTANT, meta
1768
1523
  )
1769
- output_datatype = args[1]
1770
- return Function(
1771
- operator=FunctionType.CAST,
1772
- arguments=args,
1773
- output_datatype=output_datatype,
1774
- output_purpose=function_args_to_output_purpose(args),
1775
- valid_inputs={
1776
- DataType.INTEGER,
1777
- DataType.STRING,
1778
- DataType.FLOAT,
1779
- DataType.NUMBER,
1780
- DataType.NUMERIC,
1781
- DataType.BOOL,
1782
- },
1783
- arg_count=2,
1784
- )
1524
+ return self.function_factory.create_function(args, FunctionType.CAST, meta)
1785
1525
 
1786
1526
  # math functions
1787
1527
  @v_args(meta=True)
1788
1528
  def fadd(self, meta, args) -> Function:
1789
- args = process_function_args(args, meta=meta, environment=self.environment)
1790
- output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1791
- return Function(
1792
- operator=FunctionType.ADD,
1793
- arguments=args,
1794
- output_datatype=output_datatype,
1795
- output_purpose=function_args_to_output_purpose(args),
1796
- valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1797
- arg_count=-1,
1798
- )
1529
+ return self.function_factory.create_function(args, FunctionType.ADD, meta)
1799
1530
 
1800
1531
  @v_args(meta=True)
1801
1532
  def fsub(self, meta, args) -> Function:
1802
- args = process_function_args(args, meta=meta, environment=self.environment)
1803
- output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1804
- return Function(
1805
- operator=FunctionType.SUBTRACT,
1806
- arguments=args,
1807
- output_datatype=output_datatype,
1808
- output_purpose=function_args_to_output_purpose(args),
1809
- valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1810
- arg_count=-1,
1811
- )
1533
+ return self.function_factory.create_function(args, FunctionType.SUBTRACT, meta)
1812
1534
 
1813
1535
  @v_args(meta=True)
1814
1536
  def fmul(self, meta, args) -> Function:
1815
- args = process_function_args(args, meta=meta, environment=self.environment)
1816
- output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1817
- return Function(
1818
- operator=FunctionType.MULTIPLY,
1819
- arguments=args,
1820
- output_datatype=output_datatype,
1821
- output_purpose=function_args_to_output_purpose(args),
1822
- valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1823
- arg_count=-1,
1824
- )
1537
+ return self.function_factory.create_function(args, FunctionType.MULTIPLY, meta)
1825
1538
 
1826
1539
  @v_args(meta=True)
1827
- def fdiv(self, meta: Meta, args):
1828
- args = process_function_args(args, meta=meta, environment=self.environment)
1829
- # 2024-11-18 - this is a bit of a hack, but division always returns a float
1830
- # output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1831
- return Function(
1832
- operator=FunctionType.DIVIDE,
1833
- arguments=args,
1834
- output_datatype=DataType.FLOAT, # division always returns a float
1835
- output_purpose=function_args_to_output_purpose(args),
1836
- valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1837
- arg_count=-1,
1838
- )
1540
+ def fdiv(self, meta: Meta, args) -> Function:
1541
+ return self.function_factory.create_function(args, FunctionType.DIVIDE, meta)
1839
1542
 
1840
1543
  @v_args(meta=True)
1841
- def fmod(self, meta: Meta, args):
1842
- args = process_function_args(args, meta=meta, environment=self.environment)
1843
- return Function(
1844
- operator=FunctionType.MOD,
1845
- arguments=args,
1846
- output_datatype=DataType.INTEGER,
1847
- output_purpose=function_args_to_output_purpose(args),
1848
- valid_inputs=[
1849
- {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1850
- {DataType.INTEGER},
1851
- ],
1852
- arg_count=2,
1853
- )
1544
+ def fmod(self, meta: Meta, args) -> Function:
1545
+ return self.function_factory.create_function(args, FunctionType.MOD, meta)
1854
1546
 
1855
1547
  @v_args(meta=True)
1856
1548
  def fround(self, meta, args) -> Function:
1857
- args = process_function_args(args, meta=meta, environment=self.environment)
1858
- output_datatype = arg_to_datatype(args[0])
1859
- return Function(
1860
- operator=FunctionType.ROUND,
1861
- arguments=args,
1862
- output_datatype=output_datatype,
1863
- output_purpose=function_args_to_output_purpose(args),
1864
- valid_inputs=[
1865
- {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1866
- {DataType.INTEGER},
1867
- ],
1868
- arg_count=2,
1869
- )
1549
+ return self.function_factory.create_function(args, FunctionType.ROUND, meta)
1870
1550
 
1871
- def fcase(self, args: List[Union[CaseWhen, CaseElse]]):
1872
- datatypes = set()
1873
- mapz = dict()
1874
- for arg in args:
1875
- output_datatype = arg_to_datatype(arg.expr)
1876
- if output_datatype != DataType.NULL:
1877
- datatypes.add(output_datatype)
1878
- mapz[str(arg.expr)] = output_datatype
1879
- if not len(datatypes) == 1:
1880
- raise SyntaxError(
1881
- f"All case expressions must have the same output datatype, got {datatypes} from {mapz}"
1882
- )
1883
- return Function(
1884
- operator=FunctionType.CASE,
1885
- arguments=args,
1886
- output_datatype=datatypes.pop(),
1887
- output_purpose=Purpose.PROPERTY,
1888
- # valid_inputs=[{DataType.INTEGER, DataType.FLOAT, DataType.NUMBER}, {DataType.INTEGER}],
1889
- arg_count=InfiniteFunctionArgs,
1890
- )
1551
+ @v_args(meta=True)
1552
+ def fcase(self, meta, args: List[Union[CaseWhen, CaseElse]]) -> Function:
1553
+ return self.function_factory.create_function(args, FunctionType.CASE, meta)
1891
1554
 
1892
1555
  @v_args(meta=True)
1893
1556
  def fcase_when(self, meta, args) -> CaseWhen:
@@ -1902,23 +1565,19 @@ class ParseToObjects(Transformer):
1902
1565
 
1903
1566
  @v_args(meta=True)
1904
1567
  def fcurrent_date(self, meta, args):
1905
- args = process_function_args(args, meta=meta, environment=self.environment)
1906
1568
  return CurrentDate([])
1907
1569
 
1908
1570
  @v_args(meta=True)
1909
1571
  def fcurrent_datetime(self, meta, args):
1910
- args = process_function_args(args, meta=meta, environment=self.environment)
1911
1572
  return CurrentDatetime([])
1912
1573
 
1913
1574
  @v_args(meta=True)
1914
1575
  def fnot(self, meta, args):
1915
- args = process_function_args(args, meta=meta, environment=self.environment)
1916
- return IsNull(args)
1576
+ return self.function_factory.create_function(args, FunctionType.IS_NULL, meta)
1917
1577
 
1918
1578
  @v_args(meta=True)
1919
1579
  def fbool(self, meta, args):
1920
- args = process_function_args(args, meta=meta, environment=self.environment)
1921
- return Bool(args)
1580
+ return self.function_factory.create_function(args, FunctionType.BOOL, meta)
1922
1581
 
1923
1582
 
1924
1583
  def unpack_visit_error(e: VisitError):