pytrilogy 0.0.2.57__py3-none-any.whl → 0.0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1845 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +696 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +17 -22
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +181 -146
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +51 -45
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +13 -10
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +59 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +19 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +92 -77
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +55 -40
  45. trilogy/core/processing/nodes/merge_node.py +47 -38
  46. trilogy/core/processing/nodes/select_node_v2.py +54 -40
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +108 -80
  51. trilogy/core/query_processor.py +67 -49
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +152 -111
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/base_hook.py +6 -4
  65. trilogy/hooks/query_debugger.py +110 -93
  66. trilogy/parser.py +1 -1
  67. trilogy/parsing/common.py +303 -64
  68. trilogy/parsing/parse_engine.py +263 -617
  69. trilogy/parsing/render.py +50 -26
  70. trilogy/scripts/trilogy.py +2 -1
  71. pytrilogy-0.0.2.57.dist-info/RECORD +0 -87
  72. trilogy/core/models.py +0 -4960
  73. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
  74. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
  75. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  from dataclasses import dataclass
2
2
  from datetime import date, datetime
3
+ from enum import Enum
3
4
  from os.path import dirname, join
4
5
  from pathlib import Path
5
6
  from re import IGNORECASE
@@ -26,8 +27,9 @@ from trilogy.core.enums import (
26
27
  ComparisonOperator,
27
28
  ConceptSource,
28
29
  DatePart,
30
+ Derivation,
29
31
  FunctionType,
30
- InfiniteFunctionArgs,
32
+ Granularity,
31
33
  IOType,
32
34
  Modifier,
33
35
  Ordering,
@@ -38,97 +40,91 @@ from trilogy.core.enums import (
38
40
  )
39
41
  from trilogy.core.exceptions import InvalidSyntaxException, UndefinedConceptException
40
42
  from trilogy.core.functions import (
41
- Abs,
42
- AttrAccess,
43
- Bool,
44
- Coalesce,
45
- Count,
46
- CountDistinct,
47
43
  CurrentDate,
48
44
  CurrentDatetime,
49
- Group,
50
- IndexAccess,
51
- IsNull,
52
- MapAccess,
53
- Max,
54
- Min,
55
- Split,
56
- StrPos,
57
- SubString,
58
- Unnest,
59
- function_args_to_output_purpose,
45
+ FunctionFactory,
60
46
  )
61
47
  from trilogy.core.internal import ALL_ROWS_CONCEPT, INTERNAL_NAMESPACE
62
- from trilogy.core.models import (
63
- Address,
48
+ from trilogy.core.models.author import (
64
49
  AggregateWrapper,
65
50
  AlignClause,
66
51
  AlignItem,
67
52
  CaseElse,
68
53
  CaseWhen,
69
- ColumnAssignment,
70
54
  Comment,
71
55
  Comparison,
72
56
  Concept,
73
- ConceptDeclarationStatement,
74
- ConceptDerivation,
75
- ConceptTransform,
57
+ ConceptRef,
76
58
  Conditional,
77
- CopyStatement,
78
- Datasource,
79
- DataType,
80
- Environment,
81
- EnvironmentConceptDict,
59
+ Expr,
82
60
  FilterItem,
83
61
  Function,
84
62
  Grain,
85
63
  HavingClause,
86
- ImportStatement,
87
- Limit,
88
- ListType,
89
- ListWrapper,
90
- MapType,
91
- MapWrapper,
92
- MergeStatementV2,
93
64
  Metadata,
94
- MultiSelectStatement,
95
- NumericType,
96
65
  OrderBy,
97
66
  OrderItem,
98
67
  Parenthetical,
99
- PersistStatement,
100
- Query,
101
- RawColumnExpr,
102
- RawSQLStatement,
103
- RowsetDerivationStatement,
104
- SelectItem,
105
- SelectStatement,
106
- ShowStatement,
107
- StructType,
108
68
  SubselectComparison,
109
- TupleWrapper,
110
69
  WhereClause,
111
70
  Window,
112
71
  WindowItem,
113
72
  WindowItemOrder,
114
73
  WindowItemOver,
74
+ )
75
+ from trilogy.core.models.core import (
76
+ DataType,
77
+ ListType,
78
+ ListWrapper,
79
+ MapType,
80
+ MapWrapper,
81
+ NumericType,
82
+ StructType,
83
+ TupleWrapper,
115
84
  arg_to_datatype,
116
85
  dict_to_map_wrapper,
117
86
  list_to_wrapper,
118
- merge_datatypes,
119
87
  tuple_to_wrapper,
120
88
  )
89
+ from trilogy.core.models.datasource import (
90
+ Address,
91
+ ColumnAssignment,
92
+ Datasource,
93
+ Query,
94
+ RawColumnExpr,
95
+ )
96
+ from trilogy.core.models.environment import Environment, Import
97
+ from trilogy.core.statements.author import (
98
+ ConceptDeclarationStatement,
99
+ ConceptDerivationStatement,
100
+ ConceptTransform,
101
+ CopyStatement,
102
+ ImportStatement,
103
+ Limit,
104
+ MergeStatementV2,
105
+ MultiSelectStatement,
106
+ PersistStatement,
107
+ RawSQLStatement,
108
+ RowsetDerivationStatement,
109
+ SelectItem,
110
+ SelectStatement,
111
+ ShowStatement,
112
+ )
121
113
  from trilogy.parsing.common import (
122
- agg_wrapper_to_concept,
114
+ align_item_to_concept,
123
115
  arbitrary_to_concept,
124
116
  constant_to_concept,
125
- filter_item_to_concept,
126
- function_to_concept,
127
117
  process_function_args,
128
- window_item_to_concept,
118
+ rowset_to_concepts,
129
119
  )
130
120
  from trilogy.parsing.exceptions import ParseError
131
121
 
122
+
123
+ class ParsePass(Enum):
124
+ INITIAL = 1
125
+ VALIDATION = 2
126
+
127
+
132
128
  CONSTANT_TYPES = (int, float, str, bool, list, ListWrapper, MapWrapper)
133
129
 
134
130
  SELF_LABEL = "root"
@@ -188,27 +184,19 @@ def expr_to_boolean(
188
184
 
189
185
 
190
186
  def unwrap_transformation(
191
- input: Union[
192
- FilterItem,
193
- WindowItem,
194
- Concept,
195
- Function,
196
- AggregateWrapper,
197
- int,
198
- str,
199
- float,
200
- bool,
201
- ],
187
+ input: Expr,
188
+ environment: Environment,
202
189
  ) -> Function | FilterItem | WindowItem | AggregateWrapper:
203
190
  if isinstance(input, Function):
204
191
  return input
205
192
  elif isinstance(input, AggregateWrapper):
206
193
  return input
207
- elif isinstance(input, Concept):
194
+ elif isinstance(input, ConceptRef):
195
+ concept = environment.concepts[input.address]
208
196
  return Function(
209
197
  operator=FunctionType.ALIAS,
210
- output_datatype=input.datatype,
211
- output_purpose=input.purpose,
198
+ output_datatype=concept.datatype,
199
+ output_purpose=concept.purpose,
212
200
  arguments=[input],
213
201
  )
214
202
  elif isinstance(input, FilterItem):
@@ -216,7 +204,7 @@ def unwrap_transformation(
216
204
  elif isinstance(input, WindowItem):
217
205
  return input
218
206
  elif isinstance(input, Parenthetical):
219
- return unwrap_transformation(input.content)
207
+ return unwrap_transformation(input.content, environment)
220
208
  else:
221
209
  return Function(
222
210
  operator=FunctionType.CONSTANT,
@@ -247,7 +235,8 @@ class ParseToObjects(Transformer):
247
235
  )
248
236
  # we do a second pass to pick up circular dependencies
249
237
  # after initial parsing
250
- self.pass_count = 1
238
+ self.parse_pass = ParsePass.INITIAL
239
+ self.function_factory = FunctionFactory(self.environment)
251
240
 
252
241
  def set_text(self, text: str):
253
242
  self.text_lookup[self.token_address] = text
@@ -258,15 +247,15 @@ class ParseToObjects(Transformer):
258
247
  return results
259
248
 
260
249
  def prepare_parse(self):
261
- self.pass_count = 1
250
+ self.parse_pass = ParsePass.INITIAL
262
251
  self.environment.concepts.fail_on_missing = False
263
252
  for _, v in self.parsed.items():
264
253
  v.prepare_parse()
265
254
 
266
255
  def hydrate_missing(self):
267
- self.pass_count = 2
256
+ self.parse_pass = ParsePass.VALIDATION
268
257
  for k, v in self.parsed.items():
269
- if v.pass_count == 2:
258
+ if v.parse_pass == ParsePass.VALIDATION:
270
259
  continue
271
260
  v.hydrate_missing()
272
261
  reparsed = self.transform(self.tokens[self.token_address])
@@ -312,10 +301,23 @@ class ParseToObjects(Transformer):
312
301
  def QUOTED_IDENTIFIER(self, args) -> str:
313
302
  return args.value[1:-1]
314
303
 
304
+ # @v_args(meta=True)
305
+ # def concept_lit(self, meta: Meta, args) -> ConceptRef:
306
+ # address = args[0]
307
+ # return self.environment.concepts.__getitem__(address, meta.line)
308
+ # return ConceptRef(address=address, line_no=meta.line)
315
309
  @v_args(meta=True)
316
- def concept_lit(self, meta: Meta, args) -> Concept:
310
+ def concept_lit(self, meta: Meta, args) -> ConceptRef:
317
311
  address = args[0]
318
- return self.environment.concepts.__getitem__(address, meta.line)
312
+ if "." not in address and self.environment.namespace == DEFAULT_NAMESPACE:
313
+ address = f"{DEFAULT_NAMESPACE}.{address}"
314
+ mapping = self.environment.concepts[address]
315
+ datatype = mapping.output_datatype
316
+ return ConceptRef(
317
+ address=mapping.address,
318
+ metadata=Metadata(line_number=meta.line),
319
+ datatype=datatype,
320
+ )
319
321
 
320
322
  def ADDRESS(self, args) -> Address:
321
323
  return Address(location=args.value, quoted=False)
@@ -399,7 +401,9 @@ class ParseToObjects(Transformer):
399
401
  resolved = self.environment.concepts.__getitem__( # type: ignore
400
402
  key=concept, line_no=meta.line, file=self.token_address
401
403
  )
402
- return ColumnAssignment(alias=alias, modifiers=modifiers, concept=resolved)
404
+ return ColumnAssignment(
405
+ alias=alias, modifiers=modifiers, concept=resolved.reference
406
+ )
403
407
 
404
408
  def _TERMINATOR(self, args):
405
409
  return None
@@ -489,14 +493,16 @@ class ParseToObjects(Transformer):
489
493
  metadata=metadata,
490
494
  namespace=namespace,
491
495
  modifiers=modifiers,
496
+ derivation=Derivation.ROOT,
497
+ granularity=Granularity.MULTI_ROW,
492
498
  )
493
499
  if concept.metadata:
494
500
  concept.metadata.line_number = meta.line
495
- self.environment.add_concept(concept, meta=meta, force=True)
501
+ self.environment.add_concept(concept, meta=meta)
496
502
  return ConceptDeclarationStatement(concept=concept)
497
503
 
498
504
  @v_args(meta=True)
499
- def concept_derivation(self, meta: Meta, args) -> ConceptDerivation:
505
+ def concept_derivation(self, meta: Meta, args) -> ConceptDerivationStatement:
500
506
  if len(args) > 3:
501
507
  metadata = args[3]
502
508
  else:
@@ -539,7 +545,7 @@ class ParseToObjects(Transformer):
539
545
  if concept.metadata:
540
546
  concept.metadata.line_number = meta.line
541
547
  self.environment.add_concept(concept, meta=meta)
542
- return ConceptDerivation(concept=concept)
548
+ return ConceptDerivationStatement(concept=concept)
543
549
 
544
550
  elif isinstance(source_value, CONSTANT_TYPES):
545
551
  concept = constant_to_concept(
@@ -552,7 +558,7 @@ class ParseToObjects(Transformer):
552
558
  if concept.metadata:
553
559
  concept.metadata.line_number = meta.line
554
560
  self.environment.add_concept(concept, meta=meta)
555
- return ConceptDerivation(concept=concept)
561
+ return ConceptDerivationStatement(concept=concept)
556
562
 
557
563
  raise SyntaxError(
558
564
  f"Received invalid type {type(args[2])} {args[2]} as input to select"
@@ -570,12 +576,11 @@ class ParseToObjects(Transformer):
570
576
  select=select,
571
577
  namespace=self.environment.namespace or DEFAULT_NAMESPACE,
572
578
  )
573
- for new_concept in output.derived_concepts:
579
+ for new_concept in rowset_to_concepts(output, self.environment):
574
580
  if new_concept.metadata:
575
581
  new_concept.metadata.line_number = meta.line
576
582
  # output.select.local_concepts[new_concept.address] = new_concept
577
583
  self.environment.add_concept(new_concept)
578
-
579
584
  return output
580
585
 
581
586
  @v_args(meta=True)
@@ -672,15 +677,14 @@ class ParseToObjects(Transformer):
672
677
  where=where,
673
678
  non_partial_for=non_partial_for,
674
679
  )
675
- for column in columns:
676
- column.concept = column.concept.with_grain(datasource.grain)
677
680
  if datasource.where:
678
681
  for x in datasource.where.concept_arguments:
679
682
  if x.address not in datasource.output_concepts:
680
683
  raise ValueError(
681
684
  f"Datasource {name} where condition depends on concept {x.address} that does not exist on the datasource, line {meta.line}."
682
685
  )
683
- self.environment.add_datasource(datasource, meta=meta)
686
+ if self.parse_pass == ParsePass.VALIDATION:
687
+ self.environment.add_datasource(datasource, meta=meta)
684
688
  return datasource
685
689
 
686
690
  @v_args(meta=True)
@@ -694,39 +698,20 @@ class ParseToObjects(Transformer):
694
698
  @v_args(meta=True)
695
699
  def select_transform(self, meta: Meta, args) -> ConceptTransform:
696
700
  output: str = args[1]
697
- transformation = unwrap_transformation(args[0])
701
+ transformation = unwrap_transformation(args[0], self.environment)
698
702
  lookup, namespace, output, parent = parse_concept_reference(
699
703
  output, self.environment
700
704
  )
701
705
 
702
706
  metadata = Metadata(line_number=meta.line, concept_source=ConceptSource.SELECT)
703
707
 
704
- if isinstance(transformation, AggregateWrapper):
705
- concept = agg_wrapper_to_concept(
706
- transformation, namespace=namespace, name=output, metadata=metadata
707
- )
708
- elif isinstance(transformation, WindowItem):
709
- concept = window_item_to_concept(
710
- transformation, namespace=namespace, name=output, metadata=metadata
711
- )
712
- elif isinstance(transformation, FilterItem):
713
- concept = filter_item_to_concept(
714
- transformation, namespace=namespace, name=output, metadata=metadata
715
- )
716
- elif isinstance(transformation, CONSTANT_TYPES):
717
- concept = constant_to_concept(
718
- transformation, namespace=namespace, name=output, metadata=metadata
719
- )
720
- elif isinstance(transformation, Function):
721
- concept = function_to_concept(
722
- transformation,
723
- namespace=namespace,
724
- name=output,
725
- metadata=metadata,
726
- environment=self.environment,
727
- )
728
- else:
729
- raise SyntaxError("Invalid transformation")
708
+ concept = arbitrary_to_concept(
709
+ transformation,
710
+ environment=self.environment,
711
+ namespace=namespace,
712
+ name=output,
713
+ metadata=metadata,
714
+ )
730
715
 
731
716
  return ConceptTransform(function=transformation, output=concept)
732
717
 
@@ -775,20 +760,10 @@ class ParseToObjects(Transformer):
775
760
  return Ordering(" ".join([base, "nulls", null_sort.lower()]))
776
761
  return Ordering(base)
777
762
 
778
- def order_list(self, args):
779
- def handle_order_item(x, namespace: str):
780
- if not isinstance(x, Concept):
781
- x = arbitrary_to_concept(
782
- x, namespace=namespace, environment=self.environment
783
- )
784
- return x
785
-
763
+ def order_list(self, args) -> List[OrderItem]:
786
764
  return [
787
765
  OrderItem(
788
- expr=handle_order_item(
789
- x,
790
- self.environment.namespace,
791
- ),
766
+ expr=x,
792
767
  order=y,
793
768
  )
794
769
  for x, y in zip(args[::2], args[1::2])
@@ -801,7 +776,7 @@ class ParseToObjects(Transformer):
801
776
  return [x for x in args]
802
777
 
803
778
  @v_args(meta=True)
804
- def merge_statement(self, meta: Meta, args) -> MergeStatementV2:
779
+ def merge_statement(self, meta: Meta, args) -> MergeStatementV2 | None:
805
780
  modifiers = []
806
781
  cargs: list[str] = []
807
782
  source_wildcard = None
@@ -817,12 +792,12 @@ class ParseToObjects(Transformer):
817
792
  raise ValueError("Invalid merge, source is wildcard, target is not")
818
793
  source_wildcard = source[:-2]
819
794
  target_wildcard = target[:-2]
820
- sources = [
795
+ sources: list[Concept] = [
821
796
  v
822
797
  for k, v in self.environment.concepts.items()
823
798
  if v.namespace == source_wildcard
824
799
  ]
825
- targets = {}
800
+ targets: dict[str, Concept] = {}
826
801
  for x in sources:
827
802
  target = target_wildcard + "." + x.name
828
803
  if target in self.environment.concepts:
@@ -831,19 +806,22 @@ class ParseToObjects(Transformer):
831
806
  else:
832
807
  sources = [self.environment.concepts[source]]
833
808
  targets = {sources[0].address: self.environment.concepts[target]}
834
- new = MergeStatementV2(
835
- sources=sources,
836
- targets=targets,
837
- modifiers=modifiers,
838
- source_wildcard=source_wildcard,
839
- target_wildcard=target_wildcard,
840
- )
841
- for source_c in new.sources:
842
- self.environment.merge_concept(
843
- source_c, targets[source_c.address], modifiers
809
+
810
+ if self.parse_pass == ParsePass.VALIDATION:
811
+ new = MergeStatementV2(
812
+ sources=sources,
813
+ targets=targets,
814
+ modifiers=modifiers,
815
+ source_wildcard=source_wildcard,
816
+ target_wildcard=target_wildcard,
844
817
  )
818
+ for source_c in new.sources:
819
+ self.environment.merge_concept(
820
+ source_c, targets[source_c.address], modifiers
821
+ )
845
822
 
846
- return new
823
+ return new
824
+ return None
847
825
 
848
826
  @v_args(meta=True)
849
827
  def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
@@ -919,7 +897,9 @@ class ParseToObjects(Transformer):
919
897
  ) from e
920
898
 
921
899
  imps = ImportStatement(alias=alias, path=Path(args[0]))
922
- self.environment.add_import(alias, new_env, imps)
900
+ self.environment.add_import(
901
+ alias, new_env, Import(alias=alias, path=Path(args[0]))
902
+ )
923
903
  return imps
924
904
 
925
905
  @v_args(meta=True)
@@ -931,7 +911,7 @@ class ParseToObjects(Transformer):
931
911
  return ShowStatement(content=args[0])
932
912
 
933
913
  @v_args(meta=True)
934
- def persist_statement(self, meta: Meta, args) -> PersistStatement:
914
+ def persist_statement(self, meta: Meta, args) -> PersistStatement | None:
935
915
  identifier: str = args[0]
936
916
  address: str = args[1]
937
917
  select: SelectStatement = args[2]
@@ -939,29 +919,31 @@ class ParseToObjects(Transformer):
939
919
  grain: Grain | None = args[3]
940
920
  else:
941
921
  grain = None
942
-
943
- new_datasource = select.to_datasource(
944
- namespace=(
945
- self.environment.namespace
946
- if self.environment.namespace
947
- else DEFAULT_NAMESPACE
948
- ),
949
- name=identifier,
950
- address=Address(location=address),
951
- grain=grain,
952
- )
953
- return PersistStatement(
954
- select=select,
955
- datasource=new_datasource,
956
- meta=Metadata(line_number=meta.line),
957
- )
922
+ if self.parse_pass == ParsePass.VALIDATION:
923
+ new_datasource = select.to_datasource(
924
+ namespace=(
925
+ self.environment.namespace
926
+ if self.environment.namespace
927
+ else DEFAULT_NAMESPACE
928
+ ),
929
+ name=identifier,
930
+ address=Address(location=address),
931
+ grain=grain,
932
+ environment=self.environment,
933
+ )
934
+ return PersistStatement(
935
+ select=select,
936
+ datasource=new_datasource,
937
+ meta=Metadata(line_number=meta.line),
938
+ )
939
+ return None
958
940
 
959
941
  @v_args(meta=True)
960
942
  def align_item(self, meta: Meta, args) -> AlignItem:
961
943
  return AlignItem(
962
944
  alias=args[0],
963
945
  namespace=self.environment.namespace,
964
- concepts=[self.environment.concepts[arg] for arg in args[1:]],
946
+ concepts=[self.environment.concepts[arg].reference for arg in args[1:]],
965
947
  )
966
948
 
967
949
  @v_args(meta=True)
@@ -970,11 +952,13 @@ class ParseToObjects(Transformer):
970
952
 
971
953
  @v_args(meta=True)
972
954
  def multi_select_statement(self, meta: Meta, args) -> MultiSelectStatement:
955
+
973
956
  selects: list[SelectStatement] = []
974
957
  align: AlignClause | None = None
975
958
  limit: int | None = None
976
959
  order_by: OrderBy | None = None
977
960
  where: WhereClause | None = None
961
+ having: HavingClause | None = None
978
962
  for arg in args:
979
963
  if isinstance(arg, SelectStatement):
980
964
  selects.append(arg)
@@ -984,15 +968,27 @@ class ParseToObjects(Transformer):
984
968
  order_by = arg
985
969
  elif isinstance(arg, WhereClause):
986
970
  where = arg
971
+ elif isinstance(arg, HavingClause):
972
+ having = arg
987
973
  elif isinstance(arg, AlignClause):
988
974
  align = arg
989
975
 
990
976
  assert align
991
977
  assert align is not None
992
- base_local: EnvironmentConceptDict = selects[0].local_concepts
993
- for select in selects[1:]:
994
- for k, v in select.local_concepts.items():
995
- base_local[k] = v
978
+
979
+ derived_concepts = []
980
+ for x in align.items:
981
+ concept = align_item_to_concept(
982
+ x,
983
+ align,
984
+ selects,
985
+ where=where,
986
+ having=having,
987
+ limit=limit,
988
+ environment=self.environment,
989
+ )
990
+ derived_concepts.append(concept)
991
+ self.environment.add_concept(concept, meta=meta)
996
992
  multi = MultiSelectStatement(
997
993
  selects=selects,
998
994
  align=align,
@@ -1001,10 +997,8 @@ class ParseToObjects(Transformer):
1001
997
  order_by=order_by,
1002
998
  limit=limit,
1003
999
  meta=Metadata(line_number=meta.line),
1004
- local_concepts=base_local,
1000
+ derived_concepts=derived_concepts,
1005
1001
  )
1006
- for concept in multi.derived_concepts:
1007
- self.environment.add_concept(concept, meta=meta)
1008
1002
  return multi
1009
1003
 
1010
1004
  @v_args(meta=True)
@@ -1114,15 +1108,10 @@ class ParseToObjects(Transformer):
1114
1108
  return ""
1115
1109
  return args[0]
1116
1110
 
1117
- def struct_lit(self, args):
1118
- zipped = dict(zip(args[::2], args[1::2]))
1119
- types = [arg_to_datatype(x) for x in args[1::2]]
1120
- return Function(
1121
- operator=FunctionType.STRUCT,
1122
- output_datatype=StructType(fields=types, fields_map=zipped),
1123
- output_purpose=function_args_to_output_purpose(args),
1124
- arguments=args,
1125
- arg_count=-1,
1111
+ @v_args(meta=True)
1112
+ def struct_lit(self, meta, args):
1113
+ return self.function_factory.create_function(
1114
+ args, operator=FunctionType.STRUCT, meta=meta
1126
1115
  )
1127
1116
 
1128
1117
  def map_lit(self, args):
@@ -1137,19 +1126,21 @@ class ParseToObjects(Transformer):
1137
1126
  if args[1] == ComparisonOperator.IN:
1138
1127
  raise SyntaxError
1139
1128
  if isinstance(args[0], AggregateWrapper):
1140
- left = arbitrary_to_concept(
1129
+ left_c = arbitrary_to_concept(
1141
1130
  args[0],
1142
1131
  environment=self.environment,
1143
1132
  )
1144
- self.environment.add_concept(left)
1133
+ self.environment.add_concept(left_c)
1134
+ left = left_c.reference
1145
1135
  else:
1146
1136
  left = args[0]
1147
1137
  if isinstance(args[2], AggregateWrapper):
1148
- right = arbitrary_to_concept(
1138
+ right_c = arbitrary_to_concept(
1149
1139
  args[2],
1150
1140
  environment=self.environment,
1151
1141
  )
1152
- self.environment.add_concept(right)
1142
+ self.environment.add_concept(right_c)
1143
+ right = right_c.reference
1153
1144
  else:
1154
1145
  right = args[2]
1155
1146
  return Comparison(left=left, right=right, operator=args[1])
@@ -1185,8 +1176,9 @@ class ParseToObjects(Transformer):
1185
1176
  ):
1186
1177
  right = right.content
1187
1178
  if isinstance(right, (Function, FilterItem, WindowItem, AggregateWrapper)):
1188
- right = arbitrary_to_concept(right, environment=self.environment)
1189
- self.environment.add_concept(right, meta=meta)
1179
+ right_concept = arbitrary_to_concept(right, environment=self.environment)
1180
+ self.environment.add_concept(right_concept, meta=meta)
1181
+ right = right_concept.reference
1190
1182
  return SubselectComparison(
1191
1183
  left=args[0],
1192
1184
  right=right,
@@ -1233,6 +1225,12 @@ class ParseToObjects(Transformer):
1233
1225
  def window_item_order(self, args):
1234
1226
  return WindowItemOrder(contents=args[0])
1235
1227
 
1228
+ def logical_operator(self, args):
1229
+ return BooleanOperator(args[0].value.lower())
1230
+
1231
+ def DATE_PART(self, args):
1232
+ return DatePart(args.value)
1233
+
1236
1234
  @v_args(meta=True)
1237
1235
  def window_item(self, meta, args) -> WindowItem:
1238
1236
  type: WindowType = args[0]
@@ -1249,8 +1247,8 @@ class ParseToObjects(Transformer):
1249
1247
  over = item.contents
1250
1248
  elif isinstance(item, str):
1251
1249
  concept = self.environment.concepts[item]
1252
- elif isinstance(item, Concept):
1253
- concept = item
1250
+ elif isinstance(item, ConceptRef):
1251
+ concept = self.environment.concepts[item.address]
1254
1252
  elif isinstance(item, WindowType):
1255
1253
  type = item
1256
1254
  else:
@@ -1258,7 +1256,11 @@ class ParseToObjects(Transformer):
1258
1256
  self.environment.add_concept(concept, meta=meta)
1259
1257
  assert concept
1260
1258
  return WindowItem(
1261
- type=type, content=concept, over=over, order_by=order_by, index=index
1259
+ type=type,
1260
+ content=concept.reference,
1261
+ over=over,
1262
+ order_by=order_by,
1263
+ index=index,
1262
1264
  )
1263
1265
 
1264
1266
  def filter_item(self, args) -> FilterItem:
@@ -1268,7 +1270,7 @@ class ParseToObjects(Transformer):
1268
1270
  where = raw
1269
1271
  else:
1270
1272
  where = WhereClause(conditional=raw)
1271
- concept = self.environment.concepts[string_concept]
1273
+ concept = self.environment.concepts[string_concept].reference
1272
1274
  return FilterItem(content=concept, where=where)
1273
1275
 
1274
1276
  # BEGIN FUNCTIONS
@@ -1285,7 +1287,12 @@ class ParseToObjects(Transformer):
1285
1287
  return args[0]
1286
1288
 
1287
1289
  def aggregate_all(self, args):
1288
- return [self.environment.concepts[f"{INTERNAL_NAMESPACE}.{ALL_ROWS_CONCEPT}"]]
1290
+ return [
1291
+ ConceptRef(
1292
+ address=f"{INTERNAL_NAMESPACE}.{ALL_ROWS_CONCEPT}",
1293
+ datatype=DataType.INTEGER,
1294
+ )
1295
+ ]
1289
1296
 
1290
1297
  def aggregate_functions(self, args):
1291
1298
  if len(args) == 2:
@@ -1295,452 +1302,190 @@ class ParseToObjects(Transformer):
1295
1302
  @v_args(meta=True)
1296
1303
  def index_access(self, meta, args):
1297
1304
  args = process_function_args(args, meta=meta, environment=self.environment)
1298
- if args[0].datatype == DataType.MAP or isinstance(args[0].datatype, MapType):
1299
- return MapAccess(args)
1300
- return IndexAccess(args)
1305
+ base = args[0]
1306
+ if base.datatype == DataType.MAP or isinstance(base.datatype, MapType):
1307
+ return self.function_factory.create_function(
1308
+ args, FunctionType.MAP_ACCESS, meta
1309
+ )
1310
+ return self.function_factory.create_function(
1311
+ args, FunctionType.INDEX_ACCESS, meta
1312
+ )
1301
1313
 
1302
1314
  @v_args(meta=True)
1303
1315
  def map_key_access(self, meta, args):
1304
- args = process_function_args(args, meta=meta, environment=self.environment)
1305
- return MapAccess(args)
1316
+ return self.function_factory.create_function(
1317
+ args, FunctionType.MAP_ACCESS, meta
1318
+ )
1306
1319
 
1307
1320
  @v_args(meta=True)
1308
1321
  def attr_access(self, meta, args):
1309
- args = process_function_args(args, meta=meta, environment=self.environment)
1310
- return AttrAccess(args)
1322
+ return self.function_factory.create_function(
1323
+ args, FunctionType.ATTR_ACCESS, meta
1324
+ )
1311
1325
 
1312
1326
  @v_args(meta=True)
1313
1327
  def fcoalesce(self, meta, args):
1314
- args = process_function_args(args, meta=meta, environment=self.environment)
1315
- return Coalesce(args)
1328
+ return self.function_factory.create_function(args, FunctionType.COALESCE, meta)
1316
1329
 
1317
1330
  @v_args(meta=True)
1318
1331
  def unnest(self, meta, args):
1319
- args = process_function_args(args, meta=meta, environment=self.environment)
1320
- return Unnest(args)
1332
+ return self.function_factory.create_function(args, FunctionType.UNNEST, meta)
1321
1333
 
1322
1334
  @v_args(meta=True)
1323
1335
  def count(self, meta, args):
1324
- args = process_function_args(args, meta=meta, environment=self.environment)
1325
- return Count(args)
1336
+ return self.function_factory.create_function(args, FunctionType.COUNT, meta)
1326
1337
 
1327
1338
  @v_args(meta=True)
1328
1339
  def fgroup(self, meta, args):
1329
1340
  if len(args) == 2:
1330
- fargs = [args[0]] + args[1]
1341
+ fargs = [args[0]] + list(args[1])
1331
1342
  else:
1332
1343
  fargs = [args[0]]
1333
- args = process_function_args(fargs, meta=meta, environment=self.environment)
1334
- return Group(args)
1344
+ return self.function_factory.create_function(fargs, FunctionType.GROUP, meta)
1335
1345
 
1336
1346
  @v_args(meta=True)
1337
1347
  def fabs(self, meta, args):
1338
- args = process_function_args(args, meta=meta, environment=self.environment)
1339
- return Abs(args)
1348
+ return self.function_factory.create_function(args, FunctionType.ABS, meta)
1340
1349
 
1341
1350
  @v_args(meta=True)
1342
1351
  def count_distinct(self, meta, args):
1343
- args = process_function_args(args, meta=meta, environment=self.environment)
1344
- return CountDistinct(args)
1352
+ return self.function_factory.create_function(
1353
+ args, FunctionType.COUNT_DISTINCT, meta
1354
+ )
1345
1355
 
1346
1356
  @v_args(meta=True)
1347
1357
  def sum(self, meta, args):
1348
- args = process_function_args(args, meta=meta, environment=self.environment)
1349
- return Function(
1350
- operator=FunctionType.SUM,
1351
- arguments=args,
1352
- output_datatype=args[0].datatype,
1353
- output_purpose=Purpose.METRIC,
1354
- arg_count=1,
1355
- )
1358
+ return self.function_factory.create_function(args, FunctionType.SUM, meta)
1356
1359
 
1357
1360
  @v_args(meta=True)
1358
1361
  def avg(self, meta, args):
1359
- args = process_function_args(args, meta=meta, environment=self.environment)
1360
- arg = args[0]
1361
-
1362
- return Function(
1363
- operator=FunctionType.AVG,
1364
- arguments=args,
1365
- output_datatype=arg.datatype,
1366
- output_purpose=Purpose.METRIC,
1367
- valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1368
- arg_count=1,
1369
- )
1362
+ return self.function_factory.create_function(args, FunctionType.AVG, meta)
1370
1363
 
1371
1364
  @v_args(meta=True)
1372
1365
  def max(self, meta, args):
1373
- args = process_function_args(args, meta=meta, environment=self.environment)
1374
- return Max(args)
1366
+ return self.function_factory.create_function(args, FunctionType.MAX, meta)
1375
1367
 
1376
1368
  @v_args(meta=True)
1377
1369
  def min(self, meta, args):
1378
- args = process_function_args(args, meta=meta, environment=self.environment)
1379
- return Min(args)
1370
+ return self.function_factory.create_function(args, FunctionType.MIN, meta)
1380
1371
 
1381
1372
  @v_args(meta=True)
1382
1373
  def len(self, meta, args):
1383
- args = process_function_args(args, meta=meta, environment=self.environment)
1384
- return Function(
1385
- operator=FunctionType.LENGTH,
1386
- arguments=args,
1387
- output_datatype=DataType.INTEGER,
1388
- output_purpose=Purpose.PROPERTY,
1389
- valid_inputs={DataType.STRING, DataType.ARRAY, DataType.MAP},
1390
- # output_grain=args[0].grain,
1391
- )
1374
+ return self.function_factory.create_function(args, FunctionType.LENGTH, meta)
1392
1375
 
1393
1376
  @v_args(meta=True)
1394
1377
  def fsplit(self, meta, args):
1395
- args = process_function_args(args, meta=meta, environment=self.environment)
1396
- return Split(args)
1378
+ return self.function_factory.create_function(args, FunctionType.SPLIT, meta)
1397
1379
 
1398
1380
  @v_args(meta=True)
1399
1381
  def concat(self, meta, args):
1400
- args = process_function_args(args, meta=meta, environment=self.environment)
1401
- return Function(
1402
- operator=FunctionType.CONCAT,
1403
- arguments=args,
1404
- output_datatype=DataType.STRING,
1405
- output_purpose=Purpose.PROPERTY,
1406
- valid_inputs={DataType.STRING},
1407
- arg_count=-1,
1408
- # output_grain=args[0].grain,
1409
- )
1382
+ return self.function_factory.create_function(args, FunctionType.CONCAT, meta)
1410
1383
 
1411
1384
  @v_args(meta=True)
1412
1385
  def union(self, meta, args):
1413
- args = process_function_args(args, meta=meta, environment=self.environment)
1414
- output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1415
- return Function(
1416
- operator=FunctionType.UNION,
1417
- arguments=args,
1418
- output_datatype=output_datatype,
1419
- output_purpose=Purpose.KEY,
1420
- valid_inputs={*DataType},
1421
- arg_count=-1,
1422
- )
1386
+ return self.function_factory.create_function(args, FunctionType.UNION, meta)
1423
1387
 
1424
1388
  @v_args(meta=True)
1425
1389
  def like(self, meta, args):
1426
- args = process_function_args(args, meta=meta, environment=self.environment)
1427
- return Function(
1428
- operator=FunctionType.LIKE,
1429
- arguments=args,
1430
- output_datatype=DataType.BOOL,
1431
- output_purpose=Purpose.PROPERTY,
1432
- valid_inputs={DataType.STRING},
1433
- arg_count=2,
1434
- )
1390
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
1435
1391
 
1436
1392
  @v_args(meta=True)
1437
1393
  def alt_like(self, meta, args):
1438
- args = process_function_args(args, meta=meta, environment=self.environment)
1439
- return Function(
1440
- operator=FunctionType.LIKE,
1441
- arguments=args,
1442
- output_datatype=DataType.BOOL,
1443
- output_purpose=Purpose.PROPERTY,
1444
- valid_inputs={DataType.STRING},
1445
- arg_count=2,
1446
- )
1394
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
1447
1395
 
1448
1396
  @v_args(meta=True)
1449
1397
  def ilike(self, meta, args):
1450
- args = process_function_args(args, meta=meta, environment=self.environment)
1451
- return Function(
1452
- operator=FunctionType.ILIKE,
1453
- arguments=args,
1454
- output_datatype=DataType.BOOL,
1455
- output_purpose=Purpose.PROPERTY,
1456
- valid_inputs={DataType.STRING},
1457
- arg_count=2,
1458
- )
1398
+ return self.function_factory.create_function(args, FunctionType.LIKE, meta)
1459
1399
 
1460
1400
  @v_args(meta=True)
1461
1401
  def upper(self, meta, args):
1462
- args = process_function_args(args, meta=meta, environment=self.environment)
1463
- return Function(
1464
- operator=FunctionType.UPPER,
1465
- arguments=args,
1466
- output_datatype=DataType.STRING,
1467
- output_purpose=Purpose.PROPERTY,
1468
- valid_inputs={DataType.STRING},
1469
- arg_count=1,
1470
- )
1402
+ return self.function_factory.create_function(args, FunctionType.UPPER, meta)
1471
1403
 
1472
1404
  @v_args(meta=True)
1473
1405
  def fstrpos(self, meta, args):
1474
- args = process_function_args(args, meta=meta, environment=self.environment)
1475
- return StrPos(args)
1406
+ return self.function_factory.create_function(args, FunctionType.STRPOS, meta)
1476
1407
 
1477
1408
  @v_args(meta=True)
1478
1409
  def fsubstring(self, meta, args):
1479
- args = process_function_args(args, meta=meta, environment=self.environment)
1480
- return SubString(args)
1481
-
1482
- def logical_operator(self, args):
1483
- return BooleanOperator(args[0].value.lower())
1410
+ return self.function_factory.create_function(args, FunctionType.SUBSTRING, meta)
1484
1411
 
1485
1412
  @v_args(meta=True)
1486
1413
  def lower(self, meta, args):
1487
- args = process_function_args(args, meta=meta, environment=self.environment)
1488
- return Function(
1489
- operator=FunctionType.LOWER,
1490
- arguments=args,
1491
- output_datatype=DataType.STRING,
1492
- output_purpose=Purpose.PROPERTY,
1493
- valid_inputs={DataType.STRING},
1494
- arg_count=1,
1495
- )
1414
+ return self.function_factory.create_function(args, FunctionType.LOWER, meta)
1496
1415
 
1497
1416
  # date functions
1498
1417
  @v_args(meta=True)
1499
1418
  def fdate(self, meta, args):
1500
- args = process_function_args(args, meta=meta, environment=self.environment)
1501
- return Function(
1502
- operator=FunctionType.DATE,
1503
- arguments=args,
1504
- output_datatype=DataType.DATE,
1505
- output_purpose=Purpose.PROPERTY,
1506
- valid_inputs={
1507
- DataType.DATE,
1508
- DataType.TIMESTAMP,
1509
- DataType.DATETIME,
1510
- DataType.STRING,
1511
- },
1512
- arg_count=1,
1513
- )
1514
-
1515
- def DATE_PART(self, args):
1516
- return DatePart(args.value)
1419
+ return self.function_factory.create_function(args, FunctionType.DATE, meta)
1517
1420
 
1518
1421
  @v_args(meta=True)
1519
1422
  def fdate_trunc(self, meta, args):
1520
- args = process_function_args(args, meta=meta, environment=self.environment)
1521
- return Function(
1522
- operator=FunctionType.DATE_TRUNCATE,
1523
- arguments=args,
1524
- output_datatype=DataType.DATE,
1525
- output_purpose=Purpose.PROPERTY,
1526
- valid_inputs=[
1527
- {
1528
- DataType.DATE,
1529
- DataType.TIMESTAMP,
1530
- DataType.DATETIME,
1531
- DataType.STRING,
1532
- },
1533
- {DataType.DATE_PART},
1534
- ],
1535
- arg_count=2,
1423
+ return self.function_factory.create_function(
1424
+ args, FunctionType.DATE_TRUNCATE, meta
1536
1425
  )
1537
1426
 
1538
1427
  @v_args(meta=True)
1539
1428
  def fdate_part(self, meta, args):
1540
- args = process_function_args(args, meta=meta, environment=self.environment)
1541
- return Function(
1542
- operator=FunctionType.DATE_PART,
1543
- arguments=args,
1544
- output_datatype=DataType.DATE,
1545
- output_purpose=Purpose.PROPERTY,
1546
- valid_inputs=[
1547
- {
1548
- DataType.DATE,
1549
- DataType.TIMESTAMP,
1550
- DataType.DATETIME,
1551
- DataType.STRING,
1552
- },
1553
- {DataType.DATE_PART},
1554
- ],
1555
- arg_count=2,
1556
- )
1429
+ return self.function_factory.create_function(args, FunctionType.DATE_PART, meta)
1557
1430
 
1558
1431
  @v_args(meta=True)
1559
1432
  def fdate_add(self, meta, args):
1560
- args = process_function_args(args, meta=meta, environment=self.environment)
1561
- return Function(
1562
- operator=FunctionType.DATE_ADD,
1563
- arguments=args,
1564
- output_datatype=DataType.DATE,
1565
- output_purpose=Purpose.PROPERTY,
1566
- valid_inputs=[
1567
- {
1568
- DataType.DATE,
1569
- DataType.TIMESTAMP,
1570
- DataType.DATETIME,
1571
- DataType.STRING,
1572
- },
1573
- {DataType.DATE_PART},
1574
- {DataType.INTEGER},
1575
- ],
1576
- arg_count=3,
1577
- )
1433
+ return self.function_factory.create_function(args, FunctionType.DATE_ADD, meta)
1578
1434
 
1579
1435
  @v_args(meta=True)
1580
1436
  def fdate_diff(self, meta, args):
1581
- args = process_function_args(args, meta=meta, environment=self.environment)
1582
- purpose = function_args_to_output_purpose(args)
1583
- return Function(
1584
- operator=FunctionType.DATE_DIFF,
1585
- arguments=args,
1586
- output_datatype=DataType.INTEGER,
1587
- output_purpose=purpose,
1588
- valid_inputs=[
1589
- {
1590
- DataType.DATE,
1591
- DataType.TIMESTAMP,
1592
- DataType.DATETIME,
1593
- },
1594
- {
1595
- DataType.DATE,
1596
- DataType.TIMESTAMP,
1597
- DataType.DATETIME,
1598
- },
1599
- {DataType.DATE_PART},
1600
- ],
1601
- arg_count=3,
1602
- )
1437
+ return self.function_factory.create_function(args, FunctionType.DATE_DIFF, meta)
1603
1438
 
1604
1439
  @v_args(meta=True)
1605
1440
  def fdatetime(self, meta, args):
1606
- args = process_function_args(args, meta=meta, environment=self.environment)
1607
- return Function(
1608
- operator=FunctionType.DATETIME,
1609
- arguments=args,
1610
- output_datatype=DataType.DATETIME,
1611
- output_purpose=Purpose.PROPERTY,
1612
- valid_inputs={
1613
- DataType.DATE,
1614
- DataType.TIMESTAMP,
1615
- DataType.DATETIME,
1616
- DataType.STRING,
1617
- },
1618
- arg_count=1,
1619
- )
1441
+ return self.function_factory.create_function(args, FunctionType.DATETIME, meta)
1620
1442
 
1621
1443
  @v_args(meta=True)
1622
1444
  def ftimestamp(self, meta, args):
1623
- args = process_function_args(args, meta=meta, environment=self.environment)
1624
- return Function(
1625
- operator=FunctionType.TIMESTAMP,
1626
- arguments=args,
1627
- output_datatype=DataType.TIMESTAMP,
1628
- output_purpose=Purpose.PROPERTY,
1629
- valid_inputs=[{DataType.TIMESTAMP, DataType.STRING}],
1630
- arg_count=1,
1631
- )
1445
+ return self.function_factory.create_function(args, FunctionType.TIMESTAMP, meta)
1632
1446
 
1633
1447
  @v_args(meta=True)
1634
1448
  def fsecond(self, meta, args):
1635
- args = process_function_args(args, meta=meta, environment=self.environment)
1636
- return Function(
1637
- operator=FunctionType.SECOND,
1638
- arguments=args,
1639
- output_datatype=DataType.INTEGER,
1640
- output_purpose=Purpose.PROPERTY,
1641
- valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
1642
- arg_count=1,
1643
- )
1449
+ return self.function_factory.create_function(args, FunctionType.SECOND, meta)
1644
1450
 
1645
1451
  @v_args(meta=True)
1646
1452
  def fminute(self, meta, args):
1647
- args = process_function_args(args, meta=meta, environment=self.environment)
1648
- return Function(
1649
- operator=FunctionType.MINUTE,
1650
- arguments=args,
1651
- output_datatype=DataType.INTEGER,
1652
- output_purpose=Purpose.PROPERTY,
1653
- valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
1654
- arg_count=1,
1655
- )
1453
+ return self.function_factory.create_function(args, FunctionType.MINUTE, meta)
1656
1454
 
1657
1455
  @v_args(meta=True)
1658
1456
  def fhour(self, meta, args):
1659
- args = process_function_args(args, meta=meta, environment=self.environment)
1660
- return Function(
1661
- operator=FunctionType.HOUR,
1662
- arguments=args,
1663
- output_datatype=DataType.INTEGER,
1664
- output_purpose=Purpose.PROPERTY,
1665
- valid_inputs={DataType.TIMESTAMP, DataType.DATETIME},
1666
- arg_count=1,
1667
- )
1457
+ return self.function_factory.create_function(args, FunctionType.HOUR, meta)
1668
1458
 
1669
1459
  @v_args(meta=True)
1670
1460
  def fday(self, meta, args):
1671
- args = process_function_args(args, meta=meta, environment=self.environment)
1672
- return Function(
1673
- operator=FunctionType.DAY,
1674
- arguments=args,
1675
- output_datatype=DataType.INTEGER,
1676
- output_purpose=Purpose.PROPERTY,
1677
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1678
- arg_count=1,
1679
- )
1461
+ return self.function_factory.create_function(args, FunctionType.DAY, meta)
1680
1462
 
1681
1463
  @v_args(meta=True)
1682
1464
  def fday_of_week(self, meta, args):
1683
- args = process_function_args(args, meta=meta, environment=self.environment)
1684
- return Function(
1685
- operator=FunctionType.DAY_OF_WEEK,
1686
- arguments=args,
1687
- output_datatype=DataType.INTEGER,
1688
- output_purpose=Purpose.PROPERTY,
1689
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1690
- arg_count=1,
1465
+ return self.function_factory.create_function(
1466
+ args, FunctionType.DAY_OF_WEEK, meta
1691
1467
  )
1692
1468
 
1693
1469
  @v_args(meta=True)
1694
1470
  def fweek(self, meta, args):
1695
- args = process_function_args(args, meta=meta, environment=self.environment)
1696
- return Function(
1697
- operator=FunctionType.WEEK,
1698
- arguments=args,
1699
- output_datatype=DataType.INTEGER,
1700
- output_purpose=Purpose.PROPERTY,
1701
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1702
- arg_count=1,
1703
- )
1471
+ return self.function_factory.create_function(args, FunctionType.WEEK, meta)
1704
1472
 
1705
1473
  @v_args(meta=True)
1706
1474
  def fmonth(self, meta, args):
1707
- args = process_function_args(args, meta=meta, environment=self.environment)
1708
- return Function(
1709
- operator=FunctionType.MONTH,
1710
- arguments=args,
1711
- output_datatype=DataType.INTEGER,
1712
- output_purpose=Purpose.PROPERTY,
1713
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1714
- arg_count=1,
1715
- )
1475
+ return self.function_factory.create_function(args, FunctionType.MONTH, meta)
1716
1476
 
1717
1477
  @v_args(meta=True)
1718
1478
  def fquarter(self, meta, args):
1719
- args = process_function_args(args, meta=meta, environment=self.environment)
1720
- return Function(
1721
- operator=FunctionType.QUARTER,
1722
- arguments=args,
1723
- output_datatype=DataType.INTEGER,
1724
- output_purpose=Purpose.PROPERTY,
1725
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1726
- arg_count=1,
1727
- )
1479
+ return self.function_factory.create_function(args, FunctionType.QUARTER, meta)
1728
1480
 
1729
1481
  @v_args(meta=True)
1730
1482
  def fyear(self, meta, args):
1731
- args = process_function_args(args, meta=meta, environment=self.environment)
1732
- return Function(
1733
- operator=FunctionType.YEAR,
1734
- arguments=args,
1735
- output_datatype=DataType.INTEGER,
1736
- output_purpose=Purpose.PROPERTY,
1737
- valid_inputs={DataType.DATE, DataType.TIMESTAMP, DataType.DATETIME},
1738
- arg_count=1,
1739
- )
1483
+ return self.function_factory.create_function(args, FunctionType.YEAR, meta)
1740
1484
 
1741
1485
  # utility functions
1742
1486
  @v_args(meta=True)
1743
1487
  def fcast(self, meta, args) -> Function:
1488
+ # if it's casting a constant, we'll process that directly
1744
1489
  args = process_function_args(args, meta=meta, environment=self.environment)
1745
1490
  if isinstance(args[0], str):
1746
1491
  processed: date | datetime | int | float | bool | str
@@ -1760,134 +1505,39 @@ class ParseToObjects(Transformer):
1760
1505
  processed = args[0]
1761
1506
  else:
1762
1507
  raise SyntaxError(f"Invalid cast type {args[1]}")
1763
- return Function(
1764
- operator=FunctionType.CONSTANT,
1765
- output_datatype=args[1],
1766
- output_purpose=Purpose.CONSTANT,
1767
- arguments=[processed],
1508
+ return self.function_factory.create_function(
1509
+ [processed], FunctionType.CONSTANT, meta
1768
1510
  )
1769
- output_datatype = args[1]
1770
- return Function(
1771
- operator=FunctionType.CAST,
1772
- arguments=args,
1773
- output_datatype=output_datatype,
1774
- output_purpose=function_args_to_output_purpose(args),
1775
- valid_inputs={
1776
- DataType.INTEGER,
1777
- DataType.STRING,
1778
- DataType.FLOAT,
1779
- DataType.NUMBER,
1780
- DataType.NUMERIC,
1781
- DataType.BOOL,
1782
- },
1783
- arg_count=2,
1784
- )
1511
+ return self.function_factory.create_function(args, FunctionType.CAST, meta)
1785
1512
 
1786
1513
  # math functions
1787
1514
  @v_args(meta=True)
1788
1515
  def fadd(self, meta, args) -> Function:
1789
- args = process_function_args(args, meta=meta, environment=self.environment)
1790
- output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1791
- return Function(
1792
- operator=FunctionType.ADD,
1793
- arguments=args,
1794
- output_datatype=output_datatype,
1795
- output_purpose=function_args_to_output_purpose(args),
1796
- valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1797
- arg_count=-1,
1798
- )
1516
+ return self.function_factory.create_function(args, FunctionType.ADD, meta)
1799
1517
 
1800
1518
  @v_args(meta=True)
1801
1519
  def fsub(self, meta, args) -> Function:
1802
- args = process_function_args(args, meta=meta, environment=self.environment)
1803
- output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1804
- return Function(
1805
- operator=FunctionType.SUBTRACT,
1806
- arguments=args,
1807
- output_datatype=output_datatype,
1808
- output_purpose=function_args_to_output_purpose(args),
1809
- valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1810
- arg_count=-1,
1811
- )
1520
+ return self.function_factory.create_function(args, FunctionType.SUBTRACT, meta)
1812
1521
 
1813
1522
  @v_args(meta=True)
1814
1523
  def fmul(self, meta, args) -> Function:
1815
- args = process_function_args(args, meta=meta, environment=self.environment)
1816
- output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1817
- return Function(
1818
- operator=FunctionType.MULTIPLY,
1819
- arguments=args,
1820
- output_datatype=output_datatype,
1821
- output_purpose=function_args_to_output_purpose(args),
1822
- valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1823
- arg_count=-1,
1824
- )
1524
+ return self.function_factory.create_function(args, FunctionType.MULTIPLY, meta)
1825
1525
 
1826
1526
  @v_args(meta=True)
1827
- def fdiv(self, meta: Meta, args):
1828
- args = process_function_args(args, meta=meta, environment=self.environment)
1829
- # 2024-11-18 - this is a bit of a hack, but division always returns a float
1830
- # output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1831
- return Function(
1832
- operator=FunctionType.DIVIDE,
1833
- arguments=args,
1834
- output_datatype=DataType.FLOAT, # division always returns a float
1835
- output_purpose=function_args_to_output_purpose(args),
1836
- valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1837
- arg_count=-1,
1838
- )
1527
+ def fdiv(self, meta: Meta, args) -> Function:
1528
+ return self.function_factory.create_function(args, FunctionType.DIVIDE, meta)
1839
1529
 
1840
1530
  @v_args(meta=True)
1841
- def fmod(self, meta: Meta, args):
1842
- args = process_function_args(args, meta=meta, environment=self.environment)
1843
- return Function(
1844
- operator=FunctionType.MOD,
1845
- arguments=args,
1846
- output_datatype=DataType.INTEGER,
1847
- output_purpose=function_args_to_output_purpose(args),
1848
- valid_inputs=[
1849
- {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1850
- {DataType.INTEGER},
1851
- ],
1852
- arg_count=2,
1853
- )
1531
+ def fmod(self, meta: Meta, args) -> Function:
1532
+ return self.function_factory.create_function(args, FunctionType.MOD, meta)
1854
1533
 
1855
1534
  @v_args(meta=True)
1856
1535
  def fround(self, meta, args) -> Function:
1857
- args = process_function_args(args, meta=meta, environment=self.environment)
1858
- output_datatype = arg_to_datatype(args[0])
1859
- return Function(
1860
- operator=FunctionType.ROUND,
1861
- arguments=args,
1862
- output_datatype=output_datatype,
1863
- output_purpose=function_args_to_output_purpose(args),
1864
- valid_inputs=[
1865
- {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
1866
- {DataType.INTEGER},
1867
- ],
1868
- arg_count=2,
1869
- )
1536
+ return self.function_factory.create_function(args, FunctionType.ROUND, meta)
1870
1537
 
1871
- def fcase(self, args: List[Union[CaseWhen, CaseElse]]):
1872
- datatypes = set()
1873
- mapz = dict()
1874
- for arg in args:
1875
- output_datatype = arg_to_datatype(arg.expr)
1876
- if output_datatype != DataType.NULL:
1877
- datatypes.add(output_datatype)
1878
- mapz[str(arg.expr)] = output_datatype
1879
- if not len(datatypes) == 1:
1880
- raise SyntaxError(
1881
- f"All case expressions must have the same output datatype, got {datatypes} from {mapz}"
1882
- )
1883
- return Function(
1884
- operator=FunctionType.CASE,
1885
- arguments=args,
1886
- output_datatype=datatypes.pop(),
1887
- output_purpose=Purpose.PROPERTY,
1888
- # valid_inputs=[{DataType.INTEGER, DataType.FLOAT, DataType.NUMBER}, {DataType.INTEGER}],
1889
- arg_count=InfiniteFunctionArgs,
1890
- )
1538
+ @v_args(meta=True)
1539
+ def fcase(self, meta, args: List[Union[CaseWhen, CaseElse]]) -> Function:
1540
+ return self.function_factory.create_function(args, FunctionType.CASE, meta)
1891
1541
 
1892
1542
  @v_args(meta=True)
1893
1543
  def fcase_when(self, meta, args) -> CaseWhen:
@@ -1902,23 +1552,19 @@ class ParseToObjects(Transformer):
1902
1552
 
1903
1553
  @v_args(meta=True)
1904
1554
  def fcurrent_date(self, meta, args):
1905
- args = process_function_args(args, meta=meta, environment=self.environment)
1906
1555
  return CurrentDate([])
1907
1556
 
1908
1557
  @v_args(meta=True)
1909
1558
  def fcurrent_datetime(self, meta, args):
1910
- args = process_function_args(args, meta=meta, environment=self.environment)
1911
1559
  return CurrentDatetime([])
1912
1560
 
1913
1561
  @v_args(meta=True)
1914
1562
  def fnot(self, meta, args):
1915
- args = process_function_args(args, meta=meta, environment=self.environment)
1916
- return IsNull(args)
1563
+ return self.function_factory.create_function(args, FunctionType.IS_NULL, meta)
1917
1564
 
1918
1565
  @v_args(meta=True)
1919
1566
  def fbool(self, meta, args):
1920
- args = process_function_args(args, meta=meta, environment=self.environment)
1921
- return Bool(args)
1567
+ return self.function_factory.create_function(args, FunctionType.BOOL, meta)
1922
1568
 
1923
1569
 
1924
1570
  def unpack_visit_error(e: VisitError):