pytrilogy 0.0.3.113__py3-none-any.whl → 0.0.3.116__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (30) hide show
  1. {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/RECORD +30 -30
  3. trilogy/__init__.py +1 -1
  4. trilogy/constants.py +29 -0
  5. trilogy/core/enums.py +6 -1
  6. trilogy/core/functions.py +33 -0
  7. trilogy/core/models/author.py +126 -2
  8. trilogy/core/models/build.py +70 -7
  9. trilogy/core/models/environment.py +2 -1
  10. trilogy/core/optimization.py +3 -2
  11. trilogy/core/optimizations/hide_unused_concept.py +1 -5
  12. trilogy/core/processing/concept_strategies_v3.py +26 -5
  13. trilogy/core/processing/discovery_node_factory.py +2 -2
  14. trilogy/core/processing/discovery_utility.py +11 -4
  15. trilogy/core/processing/node_generators/basic_node.py +26 -15
  16. trilogy/core/processing/node_generators/common.py +4 -1
  17. trilogy/core/processing/node_generators/filter_node.py +7 -0
  18. trilogy/core/processing/node_generators/multiselect_node.py +3 -3
  19. trilogy/core/processing/node_generators/unnest_node.py +77 -6
  20. trilogy/core/statements/author.py +4 -1
  21. trilogy/dialect/base.py +42 -2
  22. trilogy/executor.py +1 -1
  23. trilogy/parsing/common.py +117 -20
  24. trilogy/parsing/parse_engine.py +115 -5
  25. trilogy/parsing/render.py +2 -1
  26. trilogy/parsing/trilogy.lark +20 -7
  27. {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/WHEEL +0 -0
  28. {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/entry_points.txt +0 -0
  29. {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/licenses/LICENSE.md +0 -0
  30. {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/top_level.txt +0 -0
trilogy/dialect/base.py CHANGED
@@ -176,6 +176,20 @@ def struct_arg(args):
176
176
  return [f"{x[1]}: {x[0]}" for x in zip(args[::2], args[1::2])]
177
177
 
178
178
 
179
+ def hash_from_args(val, hash_type):
180
+ hash_type = hash_type[1:-1]
181
+ if hash_type.lower() == "md5":
182
+ return f"md5({val})"
183
+ elif hash_type.lower() == "sha1":
184
+ return f"sha1({val})"
185
+ elif hash_type.lower() == "sha256":
186
+ return f"sha256({val})"
187
+ elif hash_type.lower() == "sha512":
188
+ return f"sha512({val})"
189
+ else:
190
+ raise ValueError(f"Unsupported hash type: {hash_type}")
191
+
192
+
179
193
  FUNCTION_MAP = {
180
194
  # generic types
181
195
  FunctionType.ALIAS: lambda x: f"{x[0]}",
@@ -194,6 +208,13 @@ FUNCTION_MAP = {
194
208
  FunctionType.INDEX_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
195
209
  FunctionType.MAP_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
196
210
  FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
211
+ FunctionType.DATE_SPINE: lambda x: f"""unnest(
212
+ generate_series(
213
+ {x[0]},
214
+ {x[1]},
215
+ INTERVAL '1 day'
216
+ )
217
+ )""",
197
218
  FunctionType.RECURSE_EDGE: lambda x: f"CASE WHEN {x[1]} IS NULL THEN {x[0]} ELSE {x[1]} END",
198
219
  FunctionType.ATTR_ACCESS: lambda x: f"""{x[0]}.{x[1].replace("'", "")}""",
199
220
  FunctionType.STRUCT: lambda x: f"{{{', '.join(struct_arg(x))}}}",
@@ -213,6 +234,9 @@ FUNCTION_MAP = {
213
234
  FunctionType.ARRAY_TO_STRING: lambda args: (
214
235
  f"array_to_string({args[0]}, {args[1]})"
215
236
  ),
237
+ FunctionType.ARRAY_FILTER: lambda args: (
238
+ f"array_filter({args[0]}, {args[1]} -> {args[2]})"
239
+ ),
216
240
  # math
217
241
  FunctionType.ADD: lambda x: " + ".join(x),
218
242
  FunctionType.ABS: lambda x: f"abs({x[0]})",
@@ -237,6 +261,7 @@ FUNCTION_MAP = {
237
261
  FunctionType.AVG: lambda x: f"avg({x[0]})",
238
262
  FunctionType.MAX: lambda x: f"max({x[0]})",
239
263
  FunctionType.MIN: lambda x: f"min({x[0]})",
264
+ FunctionType.ANY: lambda x: f"any_value({x[0]})",
240
265
  # string types
241
266
  FunctionType.LIKE: lambda x: f" {x[0]} like {x[1]} ",
242
267
  FunctionType.UPPER: lambda x: f"UPPER({x[0]}) ",
@@ -249,6 +274,7 @@ FUNCTION_MAP = {
249
274
  FunctionType.REGEXP_REPLACE: lambda x: f"REGEXP_REPLACE({x[0]},{x[1]}, {x[2]})",
250
275
  FunctionType.TRIM: lambda x: f"TRIM({x[0]})",
251
276
  FunctionType.REPLACE: lambda x: f"REPLACE({x[0]},{x[1]},{x[2]})",
277
+ FunctionType.HASH: lambda x: hash_from_args(x[0], x[1]),
252
278
  # FunctionType.NOT_LIKE: lambda x: f" CASE WHEN {x[0]} like {x[1]} THEN 0 ELSE 1 END",
253
279
  # date types
254
280
  FunctionType.DATE_TRUNCATE: lambda x: f"date_trunc({x[0]},{x[1]})",
@@ -285,6 +311,7 @@ FUNCTION_GRAIN_MATCH_MAP = {
285
311
  FunctionType.AVG: lambda args: f"{args[0]}",
286
312
  FunctionType.MAX: lambda args: f"{args[0]}",
287
313
  FunctionType.MIN: lambda args: f"{args[0]}",
314
+ FunctionType.ANY: lambda args: f"{args[0]}",
288
315
  }
289
316
 
290
317
 
@@ -472,7 +499,20 @@ class BaseDialect:
472
499
  elif isinstance(c.lineage, BuildRowsetItem):
473
500
  rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False, raise_invalid=raise_invalid)}"
474
501
  elif isinstance(c.lineage, BuildMultiSelectLineage):
475
- rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False, raise_invalid=raise_invalid)}"
502
+ if c.address in c.lineage.calculated_derivations:
503
+ assert c.lineage.derive is not None
504
+ for x in c.lineage.derive.items:
505
+ if x.address == c.address:
506
+ rval = self.render_expr(
507
+ x.expr,
508
+ cte=cte,
509
+ raise_invalid=raise_invalid,
510
+ )
511
+ break
512
+ else:
513
+ rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False, raise_invalid=raise_invalid)}"
514
+ elif isinstance(c.lineage, BuildComparison):
515
+ rval = f"{self.render_expr(c.lineage.left, cte=cte, raise_invalid=raise_invalid)} {c.lineage.operator.value} {self.render_expr(c.lineage.right, cte=cte, raise_invalid=raise_invalid)}"
476
516
  elif isinstance(c.lineage, AGGREGATE_ITEMS):
477
517
  args = [
478
518
  self.render_expr(v, cte) # , alias=False)
@@ -804,7 +844,7 @@ class BaseDialect:
804
844
  if self.rendering.parameters:
805
845
  if e.concept.namespace == DEFAULT_NAMESPACE:
806
846
  return f":{e.concept.name}"
807
- return f":{e.concept.address}"
847
+ return f":{e.concept.address.replace('.', '_')}"
808
848
  elif e.concept.lineage:
809
849
  return self.render_expr(e.concept.lineage, cte=cte, cte_map=cte_map)
810
850
  return f"{self.QUOTE_CHARACTER}{e.concept.address}{self.QUOTE_CHARACTER}"
trilogy/executor.py CHANGED
@@ -397,7 +397,7 @@ class Executor(object):
397
397
  if v.safe_address == param or v.address == param
398
398
  ]
399
399
  if not matched:
400
- raise SyntaxError(f"No concept found for parameter {param}")
400
+ raise SyntaxError(f"No concept found for parameter {param};")
401
401
 
402
402
  concept: Concept = matched.pop()
403
403
  return self._concept_to_value(concept, local_concepts=local_concepts)
trilogy/parsing/common.py CHANGED
@@ -3,9 +3,7 @@ from typing import Iterable, List, Sequence, Tuple
3
3
 
4
4
  from lark.tree import Meta
5
5
 
6
- from trilogy.constants import (
7
- VIRTUAL_CONCEPT_PREFIX,
8
- )
6
+ from trilogy.constants import DEFAULT_NAMESPACE, VIRTUAL_CONCEPT_PREFIX
9
7
  from trilogy.core.constants import ALL_ROWS_CONCEPT
10
8
  from trilogy.core.enums import (
11
9
  ConceptSource,
@@ -64,10 +62,12 @@ ARBITRARY_INPUTS = (
64
62
  | Parenthetical
65
63
  | ListWrapper
66
64
  | MapWrapper
65
+ | Comparison
67
66
  | int
68
67
  | float
69
68
  | str
70
69
  | date
70
+ | bool
71
71
  )
72
72
 
73
73
 
@@ -91,7 +91,7 @@ def process_function_arg(
91
91
  # to simplify anonymous function handling
92
92
  if (
93
93
  arg.operator not in FunctionClass.AGGREGATE_FUNCTIONS.value
94
- and arg.operator != FunctionType.UNNEST
94
+ and arg.operator not in FunctionClass.ONE_TO_MANY.value
95
95
  ):
96
96
  return arg
97
97
  id_hash = string_to_hash(str(arg))
@@ -311,13 +311,18 @@ def concept_is_relevant(
311
311
  if concept.purpose in (Purpose.METRIC,):
312
312
  if all([c in others for c in concept.grain.components]):
313
313
  return False
314
+ if (
315
+ concept.derivation in (Derivation.BASIC,)
316
+ and isinstance(concept.lineage, Function)
317
+ and concept.lineage.operator == FunctionType.DATE_SPINE
318
+ ):
319
+ return True
314
320
  if concept.derivation in (Derivation.BASIC,) and isinstance(
315
321
  concept.lineage, (Function, CaseWhen)
316
322
  ):
317
323
  relevant = False
318
324
  for arg in concept.lineage.arguments:
319
325
  relevant = atom_is_relevant(arg, others, environment) or relevant
320
-
321
326
  return relevant
322
327
  if concept.derivation in (Derivation.BASIC,) and isinstance(
323
328
  concept.lineage, Parenthetical
@@ -394,6 +399,10 @@ def _get_relevant_parent_concepts(arg) -> tuple[list[ConceptRef], bool]:
394
399
  return [x.reference for x in arg.by], True
395
400
  elif isinstance(arg, FunctionCallWrapper):
396
401
  return get_relevant_parent_concepts(arg.content)
402
+ elif isinstance(arg, Comparison):
403
+ left, lflag = get_relevant_parent_concepts(arg.left)
404
+ right, rflag = get_relevant_parent_concepts(arg.right)
405
+ return left + right, lflag or rflag
397
406
  return get_concept_arguments(arg), False
398
407
 
399
408
 
@@ -529,7 +538,7 @@ def function_to_concept(
529
538
  elif parent.operator == FunctionType.UNION:
530
539
  derivation = Derivation.UNION
531
540
  granularity = Granularity.MULTI_ROW
532
- elif parent.operator == FunctionType.UNNEST:
541
+ elif parent.operator in FunctionClass.ONE_TO_MANY.value:
533
542
  derivation = Derivation.UNNEST
534
543
  granularity = Granularity.MULTI_ROW
535
544
  elif parent.operator == FunctionType.RECURSE_EDGE:
@@ -770,6 +779,27 @@ def align_item_to_concept(
770
779
  return new
771
780
 
772
781
 
782
+ def derive_item_to_concept(
783
+ parent: ARBITRARY_INPUTS,
784
+ name: str,
785
+ lineage: MultiSelectLineage,
786
+ namespace: str | None = None,
787
+ ) -> Concept:
788
+ datatype = arg_to_datatype(parent)
789
+ grain = Grain()
790
+ new = Concept(
791
+ name=name,
792
+ datatype=datatype,
793
+ purpose=Purpose.PROPERTY,
794
+ lineage=lineage,
795
+ grain=grain,
796
+ namespace=namespace or DEFAULT_NAMESPACE,
797
+ granularity=Granularity.MULTI_ROW,
798
+ derivation=Derivation.MULTISELECT,
799
+ )
800
+ return new
801
+
802
+
773
803
  def rowset_concept(
774
804
  orig_address: ConceptRef,
775
805
  environment: Environment,
@@ -851,20 +881,7 @@ def rowset_to_concepts(rowset: RowsetDerivationStatement, environment: Environme
851
881
 
852
882
 
853
883
  def generate_concept_name(
854
- parent: (
855
- AggregateWrapper
856
- | FunctionCallWrapper
857
- | WindowItem
858
- | FilterItem
859
- | Function
860
- | ListWrapper
861
- | MapWrapper
862
- | Parenthetical
863
- | int
864
- | float
865
- | str
866
- | date
867
- ),
884
+ parent: ARBITRARY_INPUTS,
868
885
  ) -> str:
869
886
  """Generate a name for a concept based on its parent type and content."""
870
887
  if isinstance(parent, AggregateWrapper):
@@ -885,6 +902,8 @@ def generate_concept_name(
885
902
  return f"{VIRTUAL_CONCEPT_PREFIX}_paren_{string_to_hash(str(parent))}"
886
903
  elif isinstance(parent, FunctionCallWrapper):
887
904
  return f"{VIRTUAL_CONCEPT_PREFIX}_{parent.name}_{string_to_hash(str(parent))}"
905
+ elif isinstance(parent, Comparison):
906
+ return f"{VIRTUAL_CONCEPT_PREFIX}_comp_{string_to_hash(str(parent))}"
888
907
  else: # ListWrapper, MapWrapper, or primitive types
889
908
  return f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
890
909
 
@@ -909,6 +928,82 @@ def parenthetical_to_concept(
909
928
  )
910
929
 
911
930
 
931
+ def comparison_to_concept(
932
+ parent: Comparison,
933
+ name: str,
934
+ namespace: str,
935
+ environment: Environment,
936
+ metadata: Metadata | None = None,
937
+ ):
938
+ fmetadata = metadata or Metadata()
939
+
940
+ pkeys: List[Concept] = []
941
+ namespace = namespace or environment.namespace
942
+ is_metric = False
943
+ ref_args, is_metric = get_relevant_parent_concepts(parent)
944
+ concrete_args = [environment.concepts[c.address] for c in ref_args]
945
+ pkeys += [
946
+ x
947
+ for x in concrete_args
948
+ if not x.derivation == Derivation.CONSTANT
949
+ and not (x.derivation == Derivation.AGGREGATE and not x.grain.components)
950
+ ]
951
+ grain: Grain | None = Grain()
952
+ for x in pkeys:
953
+ grain += x.grain
954
+ if parent.operator in FunctionClass.ONE_TO_MANY.value:
955
+ # if the function will create more rows, we don't know what grain this is at
956
+ grain = None
957
+ modifiers = get_upstream_modifiers(pkeys, environment)
958
+ key_grain: list[str] = []
959
+ for x in pkeys:
960
+ # metrics will group to keys, so do not do key traversal
961
+ if is_metric:
962
+ key_grain.append(x.address)
963
+ # otherwse, for row ops, assume keys are transitive
964
+ elif x.keys:
965
+ key_grain += [*x.keys]
966
+ else:
967
+ key_grain.append(x.address)
968
+ keys = set(key_grain)
969
+ if is_metric:
970
+ purpose = Purpose.METRIC
971
+ elif not pkeys:
972
+ purpose = Purpose.CONSTANT
973
+ else:
974
+ purpose = Purpose.PROPERTY
975
+ fmetadata = metadata or Metadata()
976
+
977
+ if grain is not None:
978
+ r = Concept(
979
+ name=name,
980
+ datatype=parent.output_datatype,
981
+ purpose=purpose,
982
+ lineage=parent,
983
+ namespace=namespace,
984
+ keys=keys,
985
+ modifiers=modifiers,
986
+ grain=grain,
987
+ metadata=fmetadata,
988
+ derivation=Derivation.BASIC,
989
+ granularity=Granularity.MULTI_ROW,
990
+ )
991
+ return r
992
+
993
+ return Concept(
994
+ name=name,
995
+ datatype=parent.output_datatype,
996
+ purpose=purpose,
997
+ lineage=parent,
998
+ namespace=namespace,
999
+ keys=keys,
1000
+ modifiers=modifiers,
1001
+ metadata=fmetadata,
1002
+ derivation=Derivation.BASIC,
1003
+ granularity=Granularity.MULTI_ROW,
1004
+ )
1005
+
1006
+
912
1007
  def arbitrary_to_concept(
913
1008
  parent: ARBITRARY_INPUTS,
914
1009
  environment: Environment,
@@ -968,5 +1063,7 @@ def arbitrary_to_concept(
968
1063
  return constant_to_concept(parent, name, namespace, metadata)
969
1064
  elif isinstance(parent, Parenthetical):
970
1065
  return parenthetical_to_concept(parent, name, namespace, environment, metadata)
1066
+ elif isinstance(parent, Comparison):
1067
+ return comparison_to_concept(parent, name, namespace, environment, metadata)
971
1068
  else:
972
1069
  return constant_to_concept(parent, name, namespace, metadata)
@@ -62,6 +62,8 @@ from trilogy.core.models.author import (
62
62
  Conditional,
63
63
  CustomFunctionFactory,
64
64
  CustomType,
65
+ DeriveClause,
66
+ DeriveItem,
65
67
  Expr,
66
68
  FilterItem,
67
69
  Function,
@@ -69,6 +71,7 @@ from trilogy.core.models.author import (
69
71
  Grain,
70
72
  HavingClause,
71
73
  Metadata,
74
+ MultiSelectLineage,
72
75
  OrderBy,
73
76
  OrderItem,
74
77
  Parenthetical,
@@ -135,6 +138,7 @@ from trilogy.parsing.common import (
135
138
  align_item_to_concept,
136
139
  arbitrary_to_concept,
137
140
  constant_to_concept,
141
+ derive_item_to_concept,
138
142
  process_function_args,
139
143
  rowset_to_concepts,
140
144
  )
@@ -603,6 +607,9 @@ class ParseToObjects(Transformer):
603
607
  def PROPERTY(self, args):
604
608
  return Purpose.PROPERTY
605
609
 
610
+ def HASH_TYPE(self, args):
611
+ return args.value
612
+
606
613
  @v_args(meta=True)
607
614
  def prop_ident(self, meta: Meta, args) -> Tuple[List[Concept], str]:
608
615
  return [self.environment.concepts[grain] for grain in args[:-1]], args[-1]
@@ -707,7 +714,14 @@ class ParseToObjects(Transformer):
707
714
 
708
715
  if isinstance(
709
716
  source_value,
710
- (FilterItem, WindowItem, AggregateWrapper, Function, FunctionCallWrapper),
717
+ (
718
+ FilterItem,
719
+ WindowItem,
720
+ AggregateWrapper,
721
+ Function,
722
+ FunctionCallWrapper,
723
+ Comparison,
724
+ ),
711
725
  ):
712
726
  concept = arbitrary_to_concept(
713
727
  source_value,
@@ -1275,6 +1289,17 @@ class ParseToObjects(Transformer):
1275
1289
  def align_clause(self, meta: Meta, args) -> AlignClause:
1276
1290
  return AlignClause(items=args)
1277
1291
 
1292
+ @v_args(meta=True)
1293
+ def derive_item(self, meta: Meta, args) -> DeriveItem:
1294
+ return DeriveItem(
1295
+ expr=args[0], name=args[1], namespace=self.environment.namespace
1296
+ )
1297
+
1298
+ @v_args(meta=True)
1299
+ def derive_clause(self, meta: Meta, args) -> DeriveClause:
1300
+
1301
+ return DeriveClause(items=args)
1302
+
1278
1303
  @v_args(meta=True)
1279
1304
  def multi_select_statement(self, meta: Meta, args) -> MultiSelectStatement:
1280
1305
 
@@ -1284,6 +1309,7 @@ class ParseToObjects(Transformer):
1284
1309
  order_by: OrderBy | None = None
1285
1310
  where: WhereClause | None = None
1286
1311
  having: HavingClause | None = None
1312
+ derive: DeriveClause | None = None
1287
1313
  for arg in args:
1288
1314
  if isinstance(arg, SelectStatement):
1289
1315
  selects.append(arg)
@@ -1297,11 +1323,24 @@ class ParseToObjects(Transformer):
1297
1323
  having = arg
1298
1324
  elif isinstance(arg, AlignClause):
1299
1325
  align = arg
1326
+ elif isinstance(arg, DeriveClause):
1327
+ derive = arg
1300
1328
 
1301
1329
  assert align
1302
1330
  assert align is not None
1303
1331
 
1304
1332
  derived_concepts = []
1333
+ new_selects = [x.as_lineage(self.environment) for x in selects]
1334
+ lineage = MultiSelectLineage(
1335
+ selects=new_selects,
1336
+ align=align,
1337
+ derive=derive,
1338
+ namespace=self.environment.namespace,
1339
+ where_clause=where,
1340
+ having_clause=having,
1341
+ limit=limit,
1342
+ hidden_components=set(y for x in new_selects for y in x.hidden_components),
1343
+ )
1305
1344
  for x in align.items:
1306
1345
  concept = align_item_to_concept(
1307
1346
  x,
@@ -1314,6 +1353,19 @@ class ParseToObjects(Transformer):
1314
1353
  )
1315
1354
  derived_concepts.append(concept)
1316
1355
  self.environment.add_concept(concept, meta=meta)
1356
+ if derive:
1357
+ for derived in derive.items:
1358
+ derivation = derived.expr
1359
+ name = derived.name
1360
+ if not isinstance(derivation, (Function, Comparison, WindowItem)):
1361
+ raise SyntaxError(
1362
+ f"Invalid derive expression {derivation} in {meta.line}, must be a function or conditional"
1363
+ )
1364
+ concept = derive_item_to_concept(
1365
+ derivation, name, lineage, self.environment.namespace
1366
+ )
1367
+ derived_concepts.append(concept)
1368
+ self.environment.add_concept(concept, meta=meta)
1317
1369
  multi = MultiSelectStatement(
1318
1370
  selects=selects,
1319
1371
  align=align,
@@ -1323,6 +1375,7 @@ class ParseToObjects(Transformer):
1323
1375
  limit=limit,
1324
1376
  meta=Metadata(line_number=meta.line),
1325
1377
  derived_concepts=derived_concepts,
1378
+ derive=derive,
1326
1379
  )
1327
1380
  return multi
1328
1381
 
@@ -1819,6 +1872,10 @@ class ParseToObjects(Transformer):
1819
1872
  def array_agg(self, meta, args):
1820
1873
  return self.function_factory.create_function(args, FunctionType.ARRAY_AGG, meta)
1821
1874
 
1875
+ @v_args(meta=True)
1876
+ def any(self, meta, args):
1877
+ return self.function_factory.create_function(args, FunctionType.ANY, meta)
1878
+
1822
1879
  @v_args(meta=True)
1823
1880
  def avg(self, meta, args):
1824
1881
  return self.function_factory.create_function(args, FunctionType.AVG, meta)
@@ -1879,6 +1936,10 @@ class ParseToObjects(Transformer):
1879
1936
  def ftrim(self, meta, args):
1880
1937
  return self.function_factory.create_function(args, FunctionType.TRIM, meta)
1881
1938
 
1939
+ @v_args(meta=True)
1940
+ def fhash(self, meta, args):
1941
+ return self.function_factory.create_function(args, FunctionType.HASH, meta)
1942
+
1882
1943
  @v_args(meta=True)
1883
1944
  def fsubstring(self, meta, args):
1884
1945
  return self.function_factory.create_function(args, FunctionType.SUBSTRING, meta)
@@ -2022,6 +2083,12 @@ class ParseToObjects(Transformer):
2022
2083
  )
2023
2084
  return self.function_factory.create_function(args, FunctionType.CAST, meta)
2024
2085
 
2086
+ @v_args(meta=True)
2087
+ def fdate_spine(self, meta, args) -> Function:
2088
+ return self.function_factory.create_function(
2089
+ args, FunctionType.DATE_SPINE, meta
2090
+ )
2091
+
2025
2092
  # utility functions
2026
2093
  @v_args(meta=True)
2027
2094
  def fcast(self, meta, args) -> Function:
@@ -2191,6 +2258,33 @@ class ParseToObjects(Transformer):
2191
2258
  meta,
2192
2259
  )
2193
2260
 
2261
+ @v_args(meta=True)
2262
+ def farray_filter(self, meta, args) -> Function:
2263
+ factory: CustomFunctionFactory = args[1]
2264
+ if not len(factory.function_arguments) == 1:
2265
+ raise InvalidSyntaxException(
2266
+ "Array filter function must have exactly one argument;"
2267
+ )
2268
+ array_type = arg_to_datatype(args[0])
2269
+ if not isinstance(array_type, ArrayType):
2270
+ raise InvalidSyntaxException(
2271
+ f"Array filter function must be applied to an array, not {array_type}"
2272
+ )
2273
+ return self.function_factory.create_function(
2274
+ [
2275
+ args[0],
2276
+ factory.function_arguments[0],
2277
+ factory(
2278
+ ArgBinding(
2279
+ name=factory.function_arguments[0].name,
2280
+ datatype=array_type.value_data_type,
2281
+ )
2282
+ ),
2283
+ ],
2284
+ FunctionType.ARRAY_FILTER,
2285
+ meta,
2286
+ )
2287
+
2194
2288
 
2195
2289
  def unpack_visit_error(e: VisitError, text: str | None = None):
2196
2290
  """This is required to get exceptions from imports, which would
@@ -2227,6 +2321,7 @@ ERROR_CODES: dict[int, str] = {
2227
2321
  101: "Using FROM keyword? Trilogy does not have a FROM clause (Datasource resolution is automatic).",
2228
2322
  # 200 codes relate to required explicit syntax (we could loosen these?)
2229
2323
  201: 'Missing alias? Alias must be specified with "AS" - e.g. `SELECT x+1 AS y`',
2324
+ 202: "Missing closing semicolon? Statements must be terminated with a semicolon `;`.",
2230
2325
  210: "Missing order direction? Order by must be explicit about direction - specify `asc` or `desc`.",
2231
2326
  }
2232
2327
 
@@ -2307,7 +2402,7 @@ def parse_text(
2307
2402
  )
2308
2403
 
2309
2404
  def _handle_unexpected_token(e: UnexpectedToken, text: str) -> None:
2310
- """Handle UnexpectedToken errors with specific logic."""
2405
+ """Handle UnexpectedToken errors to make friendlier error messages."""
2311
2406
  # Handle ordering direction error
2312
2407
  pos = e.pos_in_stream or 0
2313
2408
  if e.expected == {"ORDERING_DIRECTION"}:
@@ -2319,12 +2414,27 @@ def parse_text(
2319
2414
  )
2320
2415
  if parsed_tokens == ["FROM"]:
2321
2416
  raise _create_syntax_error(101, pos, text)
2322
-
2323
- # Attempt recovery for aliasing
2417
+ # check if they are missing a semicolon
2418
+ try:
2419
+ e.interactive_parser.feed_token(Token("_TERMINATOR", ";"))
2420
+ state = e.interactive_parser.lexer_thread.state
2421
+ if state and state.last_token:
2422
+ new_pos = state.last_token.end_pos or pos
2423
+ else:
2424
+ new_pos = pos
2425
+ raise _create_syntax_error(202, new_pos, text)
2426
+ except UnexpectedToken:
2427
+ pass
2428
+ # check if they forgot an as
2324
2429
  try:
2325
2430
  e.interactive_parser.feed_token(Token("AS", "AS"))
2431
+ state = e.interactive_parser.lexer_thread.state
2432
+ if state and state.last_token:
2433
+ new_pos = state.last_token.end_pos or pos
2434
+ else:
2435
+ new_pos = pos
2326
2436
  e.interactive_parser.feed_token(Token("IDENTIFIER", e.token.value))
2327
- raise _create_syntax_error(201, pos, text)
2437
+ raise _create_syntax_error(201, new_pos, text)
2328
2438
  except UnexpectedToken:
2329
2439
  pass
2330
2440
 
trilogy/parsing/render.py CHANGED
@@ -662,7 +662,8 @@ class Renderer:
662
662
  pair_strings.append(self.indent_lines(pair_line))
663
663
  inputs = ",\n".join(pair_strings)
664
664
  return f"struct(\n{inputs}\n{self.indent_context.current_indent})"
665
-
665
+ if arg.operator == FunctionType.ALIAS:
666
+ return f"{self.to_string(arg.arguments[0])}"
666
667
  inputs = ",".join(args)
667
668
  return f"{arg.operator.value}({inputs})"
668
669
 
@@ -74,12 +74,16 @@
74
74
  select_statement: where? "select"i select_list where? having? order_by? limit?
75
75
 
76
76
  // multiple_selects
77
- multi_select_statement: select_statement ("merge" select_statement)+ "align"i align_clause where? order_by? limit?
77
+ multi_select_statement: select_statement ("merge" select_statement)+ "align"i align_clause ("derive" derive_clause)? where? order_by? limit?
78
78
 
79
79
  align_item: IDENTIFIER ":" IDENTIFIER ("," IDENTIFIER)* ","?
80
80
 
81
81
  align_clause: align_item ("AND"i align_item)* "AND"i?
82
82
 
83
+ derive_item: expr "->" IDENTIFIER
84
+
85
+ derive_clause: derive_item ("," derive_item)* ","?
86
+
83
87
  merge_statement: "merge"i WILDCARD_IDENTIFIER "into"i SHORTHAND_MODIFIER? WILDCARD_IDENTIFIER
84
88
 
85
89
  // raw sql statement
@@ -113,7 +117,7 @@
113
117
  filter_item: _filter_base | _filter_alt
114
118
 
115
119
  // rank/lag/lead
116
- WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i | "avg"i | "max"i | "min"i ) /[\s]+/
120
+ WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i | "avg"i | "max"i | "min"i | "count"i ) /[\s]+/
117
121
 
118
122
  window_item_over: ("OVER"i over_list)
119
123
 
@@ -308,8 +312,11 @@
308
312
  fregexp_contains: _REGEXP_CONTAINS expr "," expr ")"
309
313
  _REGEXP_REPLACE.1: "regexp_replace("
310
314
  fregexp_replace: _REGEXP_REPLACE expr "," expr "," expr ")"
315
+ _HASH.1: "hash("
316
+ HASH_TYPE: "md5"i | "sha1"i | "sha256"i | "sha512"i
317
+ fhash: _HASH expr "," HASH_TYPE ")"
311
318
 
312
- _string_functions: like | ilike | upper | flower | fsplit | fstrpos | fsubstring | fcontains | ftrim | freplace | fregexp_extract | fregexp_contains | fregexp_replace
319
+ _string_functions: like | ilike | upper | flower | fsplit | fstrpos | fsubstring | fcontains | ftrim | freplace | fregexp_extract | fregexp_contains | fregexp_replace | fhash
313
320
 
314
321
  //array_functions
315
322
  _ARRAY_SUM.1: "array_sum("i
@@ -323,8 +330,10 @@
323
330
  _ARRAY_TRANSFORM.1: "array_transform("i
324
331
  transform_lambda: "@" IDENTIFIER
325
332
  farray_transform: _ARRAY_TRANSFORM expr "," transform_lambda ")"
333
+ _ARRAY_FILTER.1: "array_filter("i
334
+ farray_filter: _ARRAY_FILTER expr "," transform_lambda ")"
326
335
 
327
- _array_functions: farray_sum | farray_distinct | farray_sort | farray_transform | farray_to_string
336
+ _array_functions: farray_sum | farray_distinct | farray_sort | farray_transform | farray_to_string | farray_filter
328
337
 
329
338
  //map_functions
330
339
  _MAP_KEYS.1: "map_keys("i
@@ -358,11 +367,13 @@
358
367
  min: _MIN expr ")"
359
368
  _ARRAY_AGG.1: "array_agg("i
360
369
  array_agg: _ARRAY_AGG expr ")"
370
+ _ANY.1: "any("i
371
+ any: _ANY expr ")"
361
372
 
362
373
  //aggregates can force a grain
363
374
  aggregate_all: "*"
364
375
  aggregate_over: ("BY"i (aggregate_all | over_list))
365
- aggregate_functions: (count | count_distinct | sum | avg | max | min | array_agg) aggregate_over?
376
+ aggregate_functions: (count | count_distinct | sum | avg | max | min | array_agg | any) aggregate_over?
366
377
 
367
378
  // date functions
368
379
  _DATE.1: "date("i
@@ -405,8 +416,10 @@
405
416
  fdate_sub: _DATE_SUB expr "," DATE_PART "," expr ")"
406
417
  _DATE_DIFF.1: "date_diff("i
407
418
  fdate_diff: _DATE_DIFF expr "," expr "," DATE_PART ")"
419
+ _DATE_SPINE.1: "date_spine("i
420
+ fdate_spine: _DATE_SPINE expr "," expr ")"
408
421
 
409
- _date_functions: fdate | fdate_add | fdate_sub | fdate_diff | fdatetime | ftimestamp | fsecond | fminute | fhour | fday |fday_name | fday_of_week | fweek | fmonth | fmonth_name | fquarter | fyear | fdate_part | fdate_trunc
422
+ _date_functions: fdate | fdate_add | fdate_sub | fdate_diff | fdatetime | ftimestamp | fsecond | fminute | fhour | fday |fday_name | fday_of_week | fweek | fmonth | fmonth_name | fquarter | fyear | fdate_part | fdate_trunc | fdate_spine
410
423
 
411
424
  _static_functions: _string_functions | _math_functions | _array_functions | _map_functions
412
425
 
@@ -434,7 +447,7 @@
434
447
 
435
448
  float_lit: /\-?[0-9]*\.[0-9]+/
436
449
 
437
- array_lit: "[" (literal ",")* literal ","? "]"()
450
+ array_lit: "[" (expr ",")* expr ","? "]"()
438
451
 
439
452
  tuple_lit: "(" (literal ",")* literal ","? ")"
440
453