pytrilogy 0.0.3.37__py3-none-any.whl → 0.0.3.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -48,9 +48,10 @@ def gen_filter_node(
48
48
  if isinstance(x.lineage, FILTER_TYPES):
49
49
  if concept.lineage.where == where:
50
50
  logger.info(
51
- f"{padding(depth)}{LOGGER_PREFIX} fetching {x.lineage.content.address} as optional parent with same filter conditions "
51
+ f"{padding(depth)}{LOGGER_PREFIX} fetching {x.lineage.content.address} as optional parent from optional {x} with same filter conditions "
52
52
  )
53
- parent_row_concepts.append(x.lineage.content)
53
+ if x.lineage.content.address not in parent_row_concepts:
54
+ parent_row_concepts.append(x.lineage.content)
54
55
  optional_included.append(x)
55
56
  continue
56
57
  if conditions and conditions == where:
@@ -3,6 +3,8 @@ from typing import List
3
3
  from trilogy.constants import logger
4
4
  from trilogy.core.models.build import BuildConcept, BuildFunction, BuildWhereClause
5
5
  from trilogy.core.models.build_environment import BuildEnvironment
6
+
7
+ # C:\Users\ethan\coding_projects\pytrilogy\trilogy\core\processing\node_generators\group_to_node.py
6
8
  from trilogy.core.processing.nodes import (
7
9
  GroupNode,
8
10
  History,
@@ -30,6 +32,7 @@ def gen_group_to_node(
30
32
  f"Group to should have function lineage, is {type(concept.lineage)}"
31
33
  )
32
34
  group_arg = concept.lineage.arguments[0]
35
+
33
36
  parent_concepts: List[BuildConcept] = concept.lineage.concept_arguments
34
37
  logger.info(
35
38
  f"{padding(depth)}{LOGGER_PREFIX} group by node has required parents {[x.address for x in parent_concepts]}"
@@ -1,7 +1,7 @@
1
1
  from typing import List
2
2
 
3
3
  from trilogy.constants import logger
4
- from trilogy.core.enums import FunctionType, Purpose
4
+ from trilogy.core.enums import FunctionType
5
5
  from trilogy.core.models.build import BuildConcept, BuildFunction, BuildWhereClause
6
6
  from trilogy.core.processing.nodes import History, StrategyNode, UnionNode
7
7
  from trilogy.core.processing.utility import padding
@@ -16,6 +16,38 @@ def is_union(c: BuildConcept):
16
16
  )
17
17
 
18
18
 
19
+ def build_layers(
20
+ concepts: list[BuildConcept],
21
+ ) -> tuple[list[list[BuildConcept]], list[BuildConcept]]:
22
+ sources = {
23
+ x.address: x.lineage.concept_arguments if x.lineage else [] for x in concepts
24
+ }
25
+ root = concepts[0]
26
+
27
+ built_layers = []
28
+ layers = root.lineage.concept_arguments if root.lineage else []
29
+ sourced = set()
30
+ while layers:
31
+ layer = []
32
+ current = layers.pop()
33
+ sourced.add(current.address)
34
+ layer.append(current)
35
+ for key, values in sources.items():
36
+ if key == current.address:
37
+ continue
38
+ for value in values:
39
+ if value.address in (current.keys or []) or current.address in (
40
+ value.keys or []
41
+ ):
42
+ layer.append(value)
43
+ sourced.add(value.address)
44
+ built_layers.append(layer)
45
+ complete = [
46
+ x for x in concepts if all([x.address in sourced for x in sources[x.address]])
47
+ ]
48
+ return built_layers, complete
49
+
50
+
19
51
  def gen_union_node(
20
52
  concept: BuildConcept,
21
53
  local_optional: List[BuildConcept],
@@ -27,27 +59,15 @@ def gen_union_node(
27
59
  conditions: BuildWhereClause | None = None,
28
60
  ) -> StrategyNode | None:
29
61
  all_unions = [x for x in local_optional if is_union(x)] + [concept]
30
-
31
- parents = []
32
- keys = [x for x in all_unions if x.purpose == Purpose.KEY]
33
- base = keys.pop()
34
- remaining = [x for x in all_unions if x.address != base.address]
35
- arguments = []
36
- if isinstance(base.lineage, BuildFunction):
37
- arguments = base.lineage.concept_arguments
38
- for arg in arguments:
39
- relevant_parents: list[BuildConcept] = []
40
- for other_union in remaining:
41
- assert other_union.lineage
42
- potential_parents = [z for z in other_union.lineage.concept_arguments]
43
- relevant_parents += [
44
- x for x in potential_parents if x.keys and arg.address in x.keys
45
- ]
62
+ logger.info(f"{padding(depth)}{LOGGER_PREFIX} found unions {all_unions}")
63
+ parent_nodes = []
64
+ layers, resolved = build_layers(all_unions)
65
+ for layer in layers:
46
66
  logger.info(
47
- f"For parent arg {arg.address}, including additional union inputs {[c.address for c in relevant_parents]}"
67
+ f"{padding(depth)}{LOGGER_PREFIX} fetching layer {layer} with resolved {resolved}"
48
68
  )
49
69
  parent: StrategyNode = source_concepts(
50
- mandatory_list=[arg] + relevant_parents,
70
+ mandatory_list=layer,
51
71
  environment=environment,
52
72
  g=g,
53
73
  depth=depth + 1,
@@ -55,12 +75,8 @@ def gen_union_node(
55
75
  conditions=conditions,
56
76
  )
57
77
  parent.hide_output_concepts(parent.output_concepts)
58
- # parent.remove_output_concepts(parent.output_concepts)
59
- parent.add_output_concept(concept)
60
- for x in remaining:
61
- parent.add_output_concept(x)
62
-
63
- parents.append(parent)
78
+ parent.add_output_concepts(resolved)
79
+ parent_nodes.append(parent)
64
80
  if not parent:
65
81
  logger.info(
66
82
  f"{padding(depth)}{LOGGER_PREFIX} could not find union node parents"
@@ -68,8 +84,8 @@ def gen_union_node(
68
84
  return None
69
85
 
70
86
  return UnionNode(
71
- input_concepts=[concept] + local_optional,
72
- output_concepts=[concept] + local_optional,
87
+ input_concepts=resolved,
88
+ output_concepts=resolved,
73
89
  environment=environment,
74
- parents=parents,
90
+ parents=parent_nodes,
75
91
  )
@@ -45,6 +45,11 @@ class History(BaseModel):
45
45
  self.history[
46
46
  self._concepts_to_lookup(search, accept_partial, conditions=conditions)
47
47
  ] = output
48
+ self.log_end(
49
+ search,
50
+ accept_partial=accept_partial,
51
+ conditions=conditions,
52
+ )
48
53
 
49
54
  def get_history(
50
55
  self,
@@ -83,6 +88,20 @@ class History(BaseModel):
83
88
  )
84
89
  )
85
90
 
91
+ def log_end(
92
+ self,
93
+ search: list[BuildConcept],
94
+ accept_partial: bool = False,
95
+ conditions: BuildWhereClause | None = None,
96
+ ):
97
+ self.started.discard(
98
+ self._concepts_to_lookup(
99
+ search,
100
+ accept_partial=accept_partial,
101
+ conditions=conditions,
102
+ )
103
+ )
104
+
86
105
  def check_started(
87
106
  self,
88
107
  search: list[BuildConcept],
@@ -272,6 +272,8 @@ class MergeNode(StrategyNode):
272
272
  f"{self.logging_prefix}{LOGGER_PREFIX} Merge node has only one parent with the same"
273
273
  " outputs as this merge node, dropping merge node "
274
274
  )
275
+ # push up any conditions we need
276
+ final.ordering = self.ordering
275
277
  return final
276
278
 
277
279
  # if we have multiple candidates, see if one is good enough
@@ -293,6 +295,7 @@ class MergeNode(StrategyNode):
293
295
  f" has all required output properties with partial {[c.address for c in dataset.partial_concepts]}"
294
296
  f" and self has no conditions ({self.conditions})"
295
297
  )
298
+ dataset.ordering = self.ordering
296
299
  return dataset
297
300
 
298
301
  pregrain = BuildGrain()
@@ -603,15 +603,29 @@ def find_nullable_concepts(
603
603
  def sort_select_output_processed(
604
604
  cte: CTE | UnionCTE, query: ProcessedQuery
605
605
  ) -> CTE | UnionCTE:
606
- output_addresses = [
607
- c.address for c in query.output_columns if c.address not in query.hidden_columns
608
- ]
606
+ output_addresses = [c.address for c in query.output_columns]
609
607
 
610
608
  mapping = {x.address: x for x in cte.output_columns}
611
609
 
612
- new_output = []
610
+ new_output: list[BuildConcept] = []
613
611
  for x in output_addresses:
614
612
  new_output.append(mapping[x])
613
+
614
+ for oc in cte.output_columns:
615
+ # add hidden back
616
+ if oc.address not in output_addresses:
617
+ new_output.append(oc)
618
+
619
+ cte.hidden_concepts = set(
620
+ [
621
+ c.address
622
+ for c in cte.output_columns
623
+ if (
624
+ c.address not in query.output_columns
625
+ or c.address in query.hidden_columns
626
+ )
627
+ ]
628
+ )
615
629
  cte.output_columns = new_output
616
630
  return cte
617
631
 
@@ -619,18 +633,28 @@ def sort_select_output_processed(
619
633
  def sort_select_output(
620
634
  cte: CTE | UnionCTE, query: SelectStatement | MultiSelectStatement | ProcessedQuery
621
635
  ) -> CTE | UnionCTE:
636
+
622
637
  if isinstance(query, ProcessedQuery):
623
638
  return sort_select_output_processed(cte, query)
639
+
624
640
  output_addresses = [
625
641
  c.address
626
642
  for c in query.output_components
627
- if c.address not in query.hidden_components
643
+ # if c.address not in query.hidden_components
628
644
  ]
629
645
 
630
646
  mapping = {x.address: x for x in cte.output_columns}
631
647
 
632
- new_output = []
648
+ new_output: list[BuildConcept] = []
633
649
  for x in output_addresses:
634
650
  new_output.append(mapping[x])
635
651
  cte.output_columns = new_output
652
+ cte.hidden_concepts = set(
653
+ [
654
+ c.address
655
+ for c in query.output_components
656
+ if c.address in query.hidden_components
657
+ ]
658
+ )
659
+
636
660
  return cte
@@ -431,7 +431,9 @@ def get_query_datasources(
431
431
  hooks: Optional[List[BaseHook]] = None,
432
432
  ) -> QueryDatasource:
433
433
  ds = get_query_node(environment, statement.as_lineage(environment))
434
+
434
435
  final_qds = ds.resolve()
436
+
435
437
  if hooks:
436
438
  for hook in hooks:
437
439
  hook.process_root_strategy_node(ds)
@@ -510,6 +512,7 @@ def process_query(
510
512
  hook.process_root_datasource(root_datasource)
511
513
  # this should always return 1 - TODO, refactor
512
514
  root_cte = datasource_to_cte(root_datasource, environment.cte_name_map)
515
+
513
516
  for hook in hooks:
514
517
  hook.process_root_cte(root_cte)
515
518
  raw_ctes: List[CTE | UnionCTE] = list(reversed(flatten_ctes(root_cte)))
@@ -135,6 +135,7 @@ class SelectStatement(HasUUID, SelectTypeMixin, BaseModel):
135
135
  )
136
136
 
137
137
  output.grain = output.calculate_grain(environment)
138
+
138
139
  for x in selection:
139
140
  if x.is_undefined and environment.concepts.fail_on_missing:
140
141
  environment.concepts.raise_undefined(
@@ -166,6 +167,7 @@ class SelectStatement(HasUUID, SelectTypeMixin, BaseModel):
166
167
  targets = []
167
168
  for x in self.selection:
168
169
  targets.append(x.concept)
170
+
169
171
  result = Grain.from_concepts(
170
172
  targets, where_clause=self.where_clause, environment=environment
171
173
  )
trilogy/dialect/base.py CHANGED
@@ -171,6 +171,7 @@ FUNCTION_MAP = {
171
171
  FunctionType.DATETIME_LITERAL: lambda x: f"datetime '{x}'",
172
172
  # math
173
173
  FunctionType.ADD: lambda x: " + ".join(x),
174
+ FunctionType.ABS: lambda x: f"abs({x[0]})",
174
175
  FunctionType.SUBTRACT: lambda x: " - ".join(x),
175
176
  FunctionType.DIVIDE: lambda x: " / ".join(x),
176
177
  FunctionType.MULTIPLY: lambda x: " * ".join(x),
trilogy/parsing/common.py CHANGED
@@ -102,8 +102,10 @@ def process_function_arg(
102
102
  concept.metadata.line_number = meta.line
103
103
  environment.add_concept(concept, meta=meta)
104
104
  return concept
105
+ elif isinstance(arg, Concept):
106
+ return arg.reference
105
107
  elif isinstance(arg, ConceptRef):
106
- return environment.concepts[arg.address]
108
+ return environment.concepts[arg.address].reference
107
109
  return arg
108
110
 
109
111
 
@@ -111,8 +113,8 @@ def process_function_args(
111
113
  args,
112
114
  meta: Meta | None,
113
115
  environment: Environment,
114
- ) -> List[Concept | Function | str | int | float | date | datetime]:
115
- final: List[Concept | Function | str | int | float | date | datetime] = []
116
+ ) -> List[ConceptRef | Function | str | int | float | date | datetime]:
117
+ final: List[ConceptRef | Function | str | int | float | date | datetime] = []
116
118
  for arg in args:
117
119
  final.append(process_function_arg(arg, meta, environment))
118
120
  return final
@@ -220,7 +222,6 @@ def concept_is_relevant(
220
222
  if all([c in others for c in concept.grain.components]):
221
223
  return False
222
224
  if concept.derivation in (Derivation.BASIC,):
223
-
224
225
  return any(
225
226
  concept_is_relevant(c, others, environment)
226
227
  for c in concept.concept_arguments
@@ -235,6 +236,7 @@ def concepts_to_grain_concepts(
235
236
  ) -> list[Concept]:
236
237
  pconcepts: list[Concept] = []
237
238
  for c in concepts:
239
+
238
240
  if isinstance(c, Concept):
239
241
  pconcepts.append(c)
240
242
  elif isinstance(c, ConceptRef) and environment:
@@ -281,6 +283,76 @@ def get_relevant_parent_concepts(arg):
281
283
  return results
282
284
 
283
285
 
286
+ def group_function_to_concept(
287
+ parent: Function,
288
+ name: str,
289
+ environment: Environment,
290
+ namespace: str | None = None,
291
+ metadata: Metadata | None = None,
292
+ ):
293
+ pkeys: List[Concept] = []
294
+ namespace = namespace or environment.namespace
295
+ is_metric = False
296
+ ref_args, is_metric = get_relevant_parent_concepts(parent)
297
+ concrete_args = [environment.concepts[c.address] for c in ref_args]
298
+ pkeys += [x for x in concrete_args if not x.derivation == Derivation.CONSTANT]
299
+ modifiers = get_upstream_modifiers(pkeys, environment)
300
+ key_grain: list[str] = []
301
+ for x in pkeys:
302
+ # for a group to, if we have a dynamic metric, ignore it
303
+ # it will end up with the group target grain
304
+ if x.purpose == Purpose.METRIC and not x.keys:
305
+ continue
306
+ # metrics will group to keys, so do no do key traversal
307
+ elif is_metric:
308
+ key_grain.append(x.address)
309
+ else:
310
+ key_grain.append(x.address)
311
+ keys = set(key_grain)
312
+
313
+ grain = Grain.from_concepts(keys, environment)
314
+ if is_metric:
315
+ purpose = Purpose.METRIC
316
+ elif not pkeys:
317
+ purpose = Purpose.CONSTANT
318
+ else:
319
+ purpose = parent.output_purpose
320
+ fmetadata = metadata or Metadata()
321
+ granularity = Granularity.MULTI_ROW
322
+
323
+ if grain is not None:
324
+ # deduplicte
325
+ grain = Grain.from_concepts(grain.components, environment)
326
+
327
+ r = Concept(
328
+ name=name,
329
+ datatype=parent.output_datatype,
330
+ purpose=purpose,
331
+ lineage=parent,
332
+ namespace=namespace,
333
+ keys=keys,
334
+ modifiers=modifiers,
335
+ grain=grain,
336
+ metadata=fmetadata,
337
+ derivation=Derivation.BASIC,
338
+ granularity=granularity,
339
+ )
340
+ return r
341
+
342
+ return Concept(
343
+ name=name,
344
+ datatype=parent.output_datatype,
345
+ purpose=purpose,
346
+ lineage=parent,
347
+ namespace=namespace,
348
+ keys=keys,
349
+ modifiers=modifiers,
350
+ metadata=fmetadata,
351
+ derivation=Derivation.BASIC,
352
+ granularity=granularity,
353
+ )
354
+
355
+
284
356
  def function_to_concept(
285
357
  parent: Function,
286
358
  name: str,
@@ -347,7 +419,6 @@ def function_to_concept(
347
419
  else:
348
420
  derivation = Derivation.BASIC
349
421
  granularity = Granularity.MULTI_ROW
350
- # granularity = Concept.calculate_granularity(derivation, grain, parent)
351
422
 
352
423
  if grain is not None:
353
424
  r = Concept(
@@ -695,6 +766,14 @@ def arbitrary_to_concept(
695
766
  elif isinstance(parent, Function):
696
767
  if not name:
697
768
  name = f"{VIRTUAL_CONCEPT_PREFIX}_func_{parent.operator.value}_{string_to_hash(str(parent))}"
769
+ if parent.operator == FunctionType.GROUP:
770
+ return group_function_to_concept(
771
+ parent,
772
+ name,
773
+ environment=environment,
774
+ namespace=namespace,
775
+ metadata=metadata,
776
+ )
698
777
  return function_to_concept(
699
778
  parent,
700
779
  name,
@@ -1,6 +1,7 @@
1
1
  from dataclasses import dataclass
2
2
  from datetime import date, datetime
3
3
  from enum import Enum
4
+ from logging import getLogger
4
5
  from os.path import dirname, join
5
6
  from pathlib import Path
6
7
  from re import IGNORECASE
@@ -137,6 +138,8 @@ from trilogy.parsing.common import (
137
138
  )
138
139
  from trilogy.parsing.exceptions import ParseError
139
140
 
141
+ perf_logger = getLogger("trilogy.parse.performance")
142
+
140
143
 
141
144
  class ParsePass(Enum):
142
145
  INITIAL = 1
@@ -407,6 +410,7 @@ class ParseToObjects(Transformer):
407
410
  mapping = self.environment.concepts[address]
408
411
  datatype = mapping.output_datatype
409
412
  return ConceptRef(
413
+ # this is load-bearing to handle pseudonyms
410
414
  address=mapping.address,
411
415
  metadata=Metadata(line_number=meta.line),
412
416
  datatype=datatype,
@@ -754,6 +758,11 @@ class ParseToObjects(Transformer):
754
758
  components=set([self.environment.concepts[a].address for a in args[0]])
755
759
  )
756
760
 
761
+ @v_args(meta=True)
762
+ def aggregate_by(self, meta: Meta, args):
763
+ args = [self.environment.concepts[a] for a in args]
764
+ return self.function_factory.create_function(args, FunctionType.GROUP, meta)
765
+
757
766
  def whole_grain_clause(self, args) -> WholeGrainWrapper:
758
767
  return WholeGrainWrapper(where=args[0])
759
768
 
@@ -1005,6 +1014,7 @@ class ParseToObjects(Transformer):
1005
1014
  return text
1006
1015
 
1007
1016
  def import_statement(self, args: list[str]) -> ImportStatement:
1017
+ start = datetime.now()
1008
1018
  if len(args) == 2:
1009
1019
  alias = args[-1]
1010
1020
  cache_key = args[-1]
@@ -1041,9 +1051,11 @@ class ParseToObjects(Transformer):
1041
1051
  )
1042
1052
 
1043
1053
  if token_lookup in self.tokens:
1054
+ perf_logger.debug(f"\tTokens cached for {token_lookup}")
1044
1055
  raw_tokens = self.tokens[token_lookup]
1045
1056
  text = self.text_lookup[token_lookup]
1046
1057
  else:
1058
+ perf_logger.debug(f"\tTokens not cached for {token_lookup}, resolving")
1047
1059
  text = self.resolve_import_address(target, is_stdlib)
1048
1060
  self.text_lookup[token_lookup] = text
1049
1061
 
@@ -1056,12 +1068,19 @@ class ParseToObjects(Transformer):
1056
1068
  self.tokens[token_lookup] = raw_tokens
1057
1069
 
1058
1070
  if cache_lookup in self.parsed:
1071
+ perf_logger.debug(f"\tEnvironment cached for {token_lookup}")
1059
1072
  nparser = self.parsed[cache_lookup]
1060
1073
  new_env = nparser.environment
1061
1074
  if nparser.parse_pass != ParsePass.VALIDATION:
1062
1075
  # nparser.transform(raw_tokens)
1076
+ second_pass_start = datetime.now()
1063
1077
  nparser.run_second_parse_pass()
1078
+ second_pass_end = datetime.now()
1079
+ perf_logger.debug(
1080
+ f"{second_pass_end - second_pass_start} seconds | Import {alias} key ({cache_key}) second pass took {second_pass_end - second_pass_start} to parse, {len(new_env.concepts)} concepts"
1081
+ )
1064
1082
  else:
1083
+ perf_logger.debug(f"\tParsing new for {token_lookup}")
1065
1084
  try:
1066
1085
  new_env = Environment(
1067
1086
  working_path=dirname(target),
@@ -1093,6 +1112,10 @@ class ParseToObjects(Transformer):
1093
1112
  self.environment.add_import(
1094
1113
  alias, new_env, Import(alias=alias, path=parsed_path)
1095
1114
  )
1115
+ end = datetime.now()
1116
+ perf_logger.debug(
1117
+ f"{end - start} seconds | Import {alias} key ({cache_key}) took to parse, {len(new_env.concepts)} concepts"
1118
+ )
1096
1119
  return imps
1097
1120
 
1098
1121
  @v_args(meta=True)
@@ -1268,8 +1291,8 @@ class ParseToObjects(Transformer):
1268
1291
  @v_args(meta=True)
1269
1292
  def function_binding_item(self, meta: Meta, args) -> ArgBinding:
1270
1293
  if len(args) == 2:
1271
- return ArgBinding(name=args[0], default=args[1])
1272
- return ArgBinding(name=args[0], default=None)
1294
+ return ArgBinding.model_construct(name=args[0], default=args[1])
1295
+ return ArgBinding.model_construct(name=args[0], default=None)
1273
1296
 
1274
1297
  @v_args(meta=True)
1275
1298
  def raw_function(self, meta: Meta, args) -> FunctionDeclaration:
@@ -1436,12 +1459,14 @@ class ParseToObjects(Transformer):
1436
1459
  return args[0]
1437
1460
 
1438
1461
  def window(self, args):
1462
+
1439
1463
  return Window(count=args[1].value, window_order=args[0])
1440
1464
 
1441
1465
  def WINDOW_TYPE(self, args):
1442
1466
  return WindowType(args.strip())
1443
1467
 
1444
1468
  def window_item_over(self, args):
1469
+
1445
1470
  return WindowItemOver(contents=args[0])
1446
1471
 
1447
1472
  def window_item_order(self, args):
@@ -1850,7 +1875,7 @@ def parse_text(
1850
1875
  parser = ParseToObjects(
1851
1876
  environment=environment, import_keys=["root"], parse_config=parse_config
1852
1877
  )
1853
-
1878
+ start = datetime.now()
1854
1879
  try:
1855
1880
  parser.set_text(text)
1856
1881
  # disable fail on missing to allow for circular dependencies
@@ -1860,6 +1885,10 @@ def parse_text(
1860
1885
  pass_two = parser.run_second_parse_pass()
1861
1886
  output = [v for v in pass_two if v]
1862
1887
  environment.concepts.fail_on_missing = True
1888
+ end = datetime.now()
1889
+ perf_logger.debug(
1890
+ f"Parse time: {end - start} for {len(text)} characters, {len(output)} objects"
1891
+ )
1863
1892
  except VisitError as e:
1864
1893
  unpack_visit_error(e)
1865
1894
  # this will never be reached