pytrilogy 0.0.3.90__py3-none-any.whl → 0.0.3.92__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pytrilogy
3
- Version: 0.0.3.90
3
+ Version: 0.0.3.92
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -52,12 +52,12 @@ The Trilogy language is an experiment in better SQL for analytics - a streamline
52
52
  - Testability
53
53
  - Easy to use for humans and LLMs
54
54
 
55
- Trilogy is epsecially targeted at data consumption, providing a rich metadata layer that makes visualizing Trilogy easy and expressive.
55
+ Trilogy is especially powerful for data consumption, providing a rich metadata layer that makes creating, interperting, and visualizing queries easy and expressive.
56
56
 
57
57
  > [!TIP]
58
58
  > You can try Trilogy in a [open-source studio](https://trilogydata.dev/trilogy-studio-core/). More details on the language can be found on the [documentation](https://trilogydata.dev/).
59
59
 
60
- Start in the studio to explore Trilogy. For deeper work and integration, `pytrilogy` can be run locally to parse and execute trilogy model [.preql] files using the `trilogy` CLI tool, or can be run in python by importing the `trilogy` package.
60
+ We recommend starting with the studio to explore Trilogy. For integration, `pytrilogy` can be run locally to parse and execute trilogy model [.preql] files using the `trilogy` CLI tool, or can be run in python by importing the `trilogy` package.
61
61
 
62
62
  Installation: `pip install pytrilogy`
63
63
 
@@ -104,10 +104,13 @@ Save the following code in a file named `hello.preql`
104
104
 
105
105
  ```python
106
106
  # semantic model is abstract from data
107
+
108
+ type word string; # types can be used to provide expressive metadata tags that propagate through dataflow
109
+
107
110
  key sentence_id int;
108
- property sentence_id.word_one string; # comments after a definition
109
- property sentence_id.word_two string; # are syntactic sugar for adding
110
- property sentence_id.word_three string; # a description to it
111
+ property sentence_id.word_one string::word; # comments after a definition
112
+ property sentence_id.word_two string::word; # are syntactic sugar for adding
113
+ property sentence_id.word_three string::word; # a description to it
111
114
 
112
115
  # comments in other places are just comments
113
116
 
@@ -1,5 +1,5 @@
1
- pytrilogy-0.0.3.90.dist-info/licenses/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
2
- trilogy/__init__.py,sha256=xsnAVhMdPDgMBudr3tOEYEMfxl0t6RWAK_231sFSxAU,303
1
+ pytrilogy-0.0.3.92.dist-info/licenses/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
2
+ trilogy/__init__.py,sha256=f4OB7VJAEeTW-QiyNBxHqoUJrj_7Dk6fryVxppzYalc,303
3
3
  trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  trilogy/constants.py,sha256=eKb_EJvSqjN9tGbdVEViwdtwwh8fZ3-jpOEDqL71y70,1691
5
5
  trilogy/engine.py,sha256=OK2RuqCIUId6yZ5hfF8J1nxGP0AJqHRZiafcowmW0xc,1728
@@ -12,21 +12,21 @@ trilogy/authoring/__init__.py,sha256=e74k-Jep4DLYPQU_2m0aVsYlw5HKTOucAKtdTbd6f2g
12
12
  trilogy/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  trilogy/core/constants.py,sha256=nizWYDCJQ1bigQMtkNIEMNTcN0NoEAXiIHLzpelxQ24,201
14
14
  trilogy/core/enums.py,sha256=RQRkpGHLtcBKAO6jZnmGVtSUnb00Q2rP56ltYGdfTok,8294
15
- trilogy/core/env_processor.py,sha256=pFsxnluKIusGKx1z7tTnfsd_xZcPy9pZDungkjkyvI0,3170
15
+ trilogy/core/env_processor.py,sha256=pD_YYuDG6CMybmwW9H2w958RloA7lEeVbzKXP6ltz2o,4078
16
16
  trilogy/core/environment_helpers.py,sha256=VvPIiFemqaLLpIpLIqprfu63K7muZ1YzNg7UZIUph8w,8267
17
17
  trilogy/core/ergonomics.py,sha256=e-7gE29vPLFdg0_A1smQ7eOrUwKl5VYdxRSTddHweRA,1631
18
18
  trilogy/core/exceptions.py,sha256=jYEduuMehcMkmCpf-OC_taELPZm7qNfeSNzIWkDYScs,707
19
19
  trilogy/core/functions.py,sha256=hnfcNjAD-XQ572vEwuUEAdBf8zKFWYwPeHIpESjUpZs,32928
20
- trilogy/core/graph_models.py,sha256=BYhJzHKSgnZHVLJs1CfsgrxTPHqKqPNeA64RlozGY0A,3498
20
+ trilogy/core/graph_models.py,sha256=zBzUwhYpnDJG91pWtk9ngw1WiTgHkMawyrqXptcGWGA,3847
21
21
  trilogy/core/internal.py,sha256=wFx4e1I0mtx159YFShSXeUBSQ82NINtAbOI-92RX4i8,2151
22
22
  trilogy/core/optimization.py,sha256=ojpn-p79lr03SSVQbbw74iPCyoYpDYBmj1dbZ3oXCjI,8860
23
23
  trilogy/core/query_processor.py,sha256=5aFgv-2LVM1Uku9cR_tFuTRDwyLnxc95bCMAHeFy2AY,20332
24
24
  trilogy/core/utility.py,sha256=3VC13uSQWcZNghgt7Ot0ZTeEmNqs__cx122abVq9qhM,410
25
25
  trilogy/core/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- trilogy/core/models/author.py,sha256=tcsr42hHiQ2PHh2_le9sr5IV5nv3twxUv2EXr5iDGxg,80201
27
- trilogy/core/models/build.py,sha256=CyrSo4xgU-uDKW3xUVYs5cTk3Z3Z2BMWdGQNHnHZOqU,66127
26
+ trilogy/core/models/author.py,sha256=iNnj2exehqj8gzoIwlfpTxhZpS-WqVJwc8OZbiDx1f4,81537
27
+ trilogy/core/models/build.py,sha256=vPMVF7yv_1-sVVpUMupMfWt4BNXFYNSB-WjAL7Eo6lk,71968
28
28
  trilogy/core/models/build_environment.py,sha256=s_C9xAHuD3yZ26T15pWVBvoqvlp2LdZ8yjsv2_HdXLk,5363
29
- trilogy/core/models/core.py,sha256=NOvonI4Ip4thpz5WoJZWbbBa44PFfpd2hXGx2Cbi4CE,12521
29
+ trilogy/core/models/core.py,sha256=nnz3ZROlVT18uygEWqqbfbHmcJkm2UC3VVCrsri_-K0,12836
30
30
  trilogy/core/models/datasource.py,sha256=wogTevZ-9CyUW2a8gjzqMCieircxi-J5lkI7EOAZnck,9596
31
31
  trilogy/core/models/environment.py,sha256=0IHSCFf5e5b4LPQN3vmjumtfM1iD1tN4WMoUr0UqxZI,27855
32
32
  trilogy/core/models/execute.py,sha256=sVWhjwWull-T6pUJizhrYVGCWHY3eZivVN6KNlhcHig,41839
@@ -50,7 +50,7 @@ trilogy/core/processing/node_generators/filter_node.py,sha256=ArBsQJl-4fWBJWCE28
50
50
  trilogy/core/processing/node_generators/group_node.py,sha256=1QJhRxsTklJ5xq8wHlAURZaN9gL9FPpeCa1OJ7IwXnY,6769
51
51
  trilogy/core/processing/node_generators/group_to_node.py,sha256=jKcNCDOY6fNblrdZwaRU0sbUSr9H0moQbAxrGgX6iGA,3832
52
52
  trilogy/core/processing/node_generators/multiselect_node.py,sha256=GWV5yLmKTe1yyPhN60RG1Rnrn4ktfn9lYYXi_FVU4UI,7061
53
- trilogy/core/processing/node_generators/node_merge_node.py,sha256=sUKS9bSTeYNCyF9jibrkac1_QkmxD-k4x35nQK1b9cM,18312
53
+ trilogy/core/processing/node_generators/node_merge_node.py,sha256=rb6ltJyhAUFairG6LZJ111Zm2uQhXWsSZfSERahUNGc,18258
54
54
  trilogy/core/processing/node_generators/recursive_node.py,sha256=l5zdh0dURKwmAy8kK4OpMtZfyUEQRk6N-PwSWIyBpSM,2468
55
55
  trilogy/core/processing/node_generators/rowset_node.py,sha256=5L5u6xz1In8EaHQdcYgR2si-tz9WB9YLXURo4AkUT9A,6630
56
56
  trilogy/core/processing/node_generators/select_merge_node.py,sha256=Cv2GwNiYSmwewjuK8T3JB3pbgrLZFPsB75DCP153BMA,22818
@@ -94,13 +94,13 @@ trilogy/hooks/graph_hook.py,sha256=5BfR7Dt0bgEsCLgwjowgCsVkboGYfVJGOz8g9mqpnos,4
94
94
  trilogy/hooks/query_debugger.py,sha256=1npRjww94sPV5RRBBlLqMJRaFkH9vhEY6o828MeoEcw,5583
95
95
  trilogy/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
96
  trilogy/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
- trilogy/parsing/common.py,sha256=yV1AckK0h8u1OFeGQBTMu-wuW5m63c5CcZuPicsTH_w,30660
97
+ trilogy/parsing/common.py,sha256=550-L0444GUuBFdiDWkOg_DxnMXtcJFUMES2R5zlwik,31026
98
98
  trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
99
99
  trilogy/parsing/exceptions.py,sha256=Xwwsv2C9kSNv2q-HrrKC1f60JNHShXcCMzstTSEbiCw,154
100
100
  trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
101
- trilogy/parsing/parse_engine.py,sha256=fgqCtV6sf9HrkViEjf6XXdRpPf4hJ1gSyzLXZ9sLBHs,80148
101
+ trilogy/parsing/parse_engine.py,sha256=minAI04kKs5uZqRumafMCvC9lRwlcXCLmDigcNOF_7w,80639
102
102
  trilogy/parsing/render.py,sha256=HSNntD82GiiwHT-TWPLXAaIMWLYVV5B5zQEsgwrHIBE,19605
103
- trilogy/parsing/trilogy.lark,sha256=ySzMMLxyPjn74MjFHZxXPTW-jHW68KLPJpiszPvZaO0,15780
103
+ trilogy/parsing/trilogy.lark,sha256=e2YVSxqzRov08AydtDSA8aqSJU2M1eJaidMEkHCdsYE,15896
104
104
  trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
105
  trilogy/scripts/trilogy.py,sha256=1L0XrH4mVHRt1C9T1HnaDv2_kYEfbWTb5_-cBBke79w,3774
106
106
  trilogy/std/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -111,8 +111,8 @@ trilogy/std/money.preql,sha256=XWwvAV3WxBsHX9zfptoYRnBigcfYwrYtBHXTME0xJuQ,2082
111
111
  trilogy/std/net.preql,sha256=WZCuvH87_rZntZiuGJMmBDMVKkdhTtxeHOkrXNwJ1EE,416
112
112
  trilogy/std/ranking.preql,sha256=LDoZrYyz4g3xsII9XwXfmstZD-_92i1Eox1UqkBIfi8,83
113
113
  trilogy/std/report.preql,sha256=LbV-XlHdfw0jgnQ8pV7acG95xrd1-p65fVpiIc-S7W4,202
114
- pytrilogy-0.0.3.90.dist-info/METADATA,sha256=X046-UiVgiZTCb6DQXtQuzAe9Qm4DNTefqbNXixNx5g,9589
115
- pytrilogy-0.0.3.90.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
116
- pytrilogy-0.0.3.90.dist-info/entry_points.txt,sha256=ewBPU2vLnVexZVnB-NrVj-p3E-4vukg83Zk8A55Wp2w,56
117
- pytrilogy-0.0.3.90.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
118
- pytrilogy-0.0.3.90.dist-info/RECORD,,
114
+ pytrilogy-0.0.3.92.dist-info/METADATA,sha256=6MSVrJ4ZFyG-BbBG4-pDrhVYne9GLVR0sc8Olis00_g,9746
115
+ pytrilogy-0.0.3.92.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
116
+ pytrilogy-0.0.3.92.dist-info/entry_points.txt,sha256=ewBPU2vLnVexZVnB-NrVj-p3E-4vukg83Zk8A55Wp2w,56
117
+ pytrilogy-0.0.3.92.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
118
+ pytrilogy-0.0.3.92.dist-info/RECORD,,
trilogy/__init__.py CHANGED
@@ -4,6 +4,6 @@ from trilogy.dialect.enums import Dialects
4
4
  from trilogy.executor import Executor
5
5
  from trilogy.parser import parse
6
6
 
7
- __version__ = "0.0.3.90"
7
+ __version__ = "0.0.3.92"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
@@ -8,26 +8,34 @@ from trilogy.core.models.build_environment import BuildEnvironment
8
8
 
9
9
 
10
10
  def add_concept(
11
- concept: BuildConcept, g: ReferenceGraph, concept_mapping: dict[str, BuildConcept]
11
+ concept: BuildConcept,
12
+ g: ReferenceGraph,
13
+ concept_mapping: dict[str, BuildConcept],
14
+ default_concept_graph: dict[str, BuildConcept],
15
+ seen: set[str],
12
16
  ):
13
- g.add_node(concept)
17
+
14
18
  # if we have sources, recursively add them
15
19
  node_name = concept_to_node(concept)
20
+ if node_name in seen:
21
+ return
22
+ seen.add(node_name)
23
+ g.add_node(concept)
16
24
  if concept.concept_arguments:
17
25
  for source in concept.concept_arguments:
18
26
  if not isinstance(source, BuildConcept):
19
27
  raise ValueError(
20
28
  f"Invalid non-build concept {source} passed into graph generation from {concept}"
21
29
  )
22
- generic = source.with_default_grain()
23
- add_concept(generic, g, concept_mapping)
30
+ generic = get_default_grain_concept(source, default_concept_graph)
31
+ add_concept(generic, g, concept_mapping, default_concept_graph, seen)
24
32
 
25
33
  g.add_edge(generic, node_name)
26
34
  for ps_address in concept.pseudonyms:
27
35
  if ps_address not in concept_mapping:
28
36
  raise SyntaxError(f"Concept {concept} has invalid pseudonym {ps_address}")
29
37
  pseudonym = concept_mapping[ps_address]
30
- pseudonym = pseudonym.with_default_grain()
38
+ pseudonym = get_default_grain_concept(pseudonym, default_concept_graph)
31
39
  pseudonym_node = concept_to_node(pseudonym)
32
40
  if (pseudonym_node, node_name) in g.edges and (
33
41
  node_name,
@@ -38,16 +46,29 @@ def add_concept(
38
46
  continue
39
47
  g.add_edge(pseudonym_node, node_name, pseudonym=True)
40
48
  g.add_edge(node_name, pseudonym_node, pseudonym=True)
41
- add_concept(pseudonym, g, concept_mapping)
49
+ add_concept(pseudonym, g, concept_mapping, default_concept_graph, seen)
50
+
51
+
52
+ def get_default_grain_concept(
53
+ concept: BuildConcept, default_concept_graph: dict[str, BuildConcept]
54
+ ) -> BuildConcept:
55
+ """Get the default grain concept from the graph."""
56
+ if concept.address in default_concept_graph:
57
+ return default_concept_graph[concept.address]
58
+ default = concept.with_default_grain()
59
+ default_concept_graph[concept.address] = default
60
+ return default
42
61
 
43
62
 
44
63
  def generate_adhoc_graph(
45
64
  concepts: list[BuildConcept],
46
65
  datasources: list[BuildDatasource],
66
+ default_concept_graph: dict[str, BuildConcept],
47
67
  restrict_to_listed: bool = False,
48
68
  ) -> ReferenceGraph:
49
69
  g = ReferenceGraph()
50
70
  concept_mapping = {x.address: x for x in concepts}
71
+ seen: set[str] = set()
51
72
  for concept in concepts:
52
73
  if not isinstance(concept, BuildConcept):
53
74
  raise ValueError(f"Invalid non-build concept {concept}")
@@ -55,7 +76,7 @@ def generate_adhoc_graph(
55
76
  # add all parsed concepts
56
77
  for concept in concepts:
57
78
 
58
- add_concept(concept, g, concept_mapping)
79
+ add_concept(concept, g, concept_mapping, default_concept_graph, seen)
59
80
 
60
81
  for dataset in datasources:
61
82
  node = datasource_to_node(dataset)
@@ -69,7 +90,7 @@ def generate_adhoc_graph(
69
90
  # if there is a key on a table at a different grain
70
91
  # add an FK edge to the canonical source, if it exists
71
92
  # for example, order ID on order product table
72
- default = concept.with_default_grain()
93
+ default = get_default_grain_concept(concept, default_concept_graph)
73
94
  if concept != default:
74
95
  g.add_edge(concept, default)
75
96
  g.add_edge(default, concept)
@@ -79,9 +100,10 @@ def generate_adhoc_graph(
79
100
  def generate_graph(
80
101
  environment: BuildEnvironment,
81
102
  ) -> ReferenceGraph:
82
-
103
+ default_concept_graph: dict[str, BuildConcept] = {}
83
104
  return generate_adhoc_graph(
84
105
  list(environment.concepts.values())
85
106
  + list(environment.alias_origin_lookup.values()),
86
107
  list(environment.datasources.values()),
108
+ default_concept_graph=default_concept_graph,
87
109
  )
@@ -54,7 +54,7 @@ def prune_sources_for_conditions(
54
54
  def concept_to_node(input: BuildConcept) -> str:
55
55
  # if input.purpose == Purpose.METRIC:
56
56
  # return f"c~{input.namespace}.{input.name}@{input.grain}"
57
- return f"c~{input.address}@{input.grain.without_condition()}"
57
+ return f"c~{input.address}@{input.grain.str_no_condition}"
58
58
 
59
59
 
60
60
  def datasource_to_node(input: BuildDatasource) -> str:
@@ -72,11 +72,15 @@ class ReferenceGraph(nx.DiGraph):
72
72
  def add_node(self, node_for_adding, **attr):
73
73
  if isinstance(node_for_adding, BuildConcept):
74
74
  node_name = concept_to_node(node_for_adding)
75
+ # if node_name in self.nodes:
76
+ # return
75
77
  attr["type"] = "concept"
76
78
  attr["concept"] = node_for_adding
77
79
  attr["grain"] = node_for_adding.grain
78
80
  elif isinstance(node_for_adding, BuildDatasource):
79
81
  node_name = datasource_to_node(node_for_adding)
82
+ # if node_name in self.nodes:
83
+ # return
80
84
  attr["type"] = "datasource"
81
85
  attr["ds"] = node_for_adding
82
86
  attr["grain"] = node_for_adding.grain
@@ -91,7 +95,10 @@ class ReferenceGraph(nx.DiGraph):
91
95
  if u_of_edge not in self.nodes:
92
96
  self.add_node(orig)
93
97
  elif isinstance(u_of_edge, BuildDatasource):
98
+ orig = u_of_edge
94
99
  u_of_edge = datasource_to_node(u_of_edge)
100
+ if u_of_edge not in self.nodes:
101
+ self.add_node(orig)
95
102
 
96
103
  if isinstance(v_of_edge, BuildConcept):
97
104
  orig = v_of_edge
@@ -99,5 +106,8 @@ class ReferenceGraph(nx.DiGraph):
99
106
  if v_of_edge not in self.nodes:
100
107
  self.add_node(orig)
101
108
  elif isinstance(v_of_edge, BuildDatasource):
109
+ orig = v_of_edge
102
110
  v_of_edge = datasource_to_node(v_of_edge)
111
+ if v_of_edge not in self.nodes:
112
+ self.add_node(orig)
103
113
  super().add_edge(u_of_edge, v_of_edge, **attr)
@@ -460,6 +460,8 @@ class HavingClause(WhereClause):
460
460
  class Grain(Namespaced, BaseModel):
461
461
  components: set[str] = Field(default_factory=set)
462
462
  where_clause: Optional["WhereClause"] = None
463
+ _str: str | None = None
464
+ _abstract: bool = False
463
465
 
464
466
  def without_condition(self):
465
467
  return Grain(components=self.components)
@@ -484,12 +486,9 @@ class Grain(Namespaced, BaseModel):
484
486
  from trilogy.parsing.common import concepts_to_grain_concepts
485
487
 
486
488
  x = Grain.model_construct(
487
- components={
488
- c.address
489
- for c in concepts_to_grain_concepts(
490
- concepts, environment=environment, local_concepts=local_concepts
491
- )
492
- },
489
+ components=concepts_to_grain_concepts(
490
+ concepts, environment=environment, local_concepts=local_concepts
491
+ ),
493
492
  where_clause=where_clause,
494
493
  )
495
494
 
@@ -550,17 +549,22 @@ class Grain(Namespaced, BaseModel):
550
549
  where_clause=self.where_clause,
551
550
  )
552
551
 
553
- @property
554
- def abstract(self):
552
+ def _gen_abstract(self) -> bool:
555
553
  return not self.components or all(
556
554
  [c.endswith(ALL_ROWS_CONCEPT) for c in self.components]
557
555
  )
558
556
 
557
+ @property
558
+ def abstract(self):
559
+ if not self._abstract:
560
+ self._abstract = self._gen_abstract()
561
+ return self._abstract
562
+
559
563
  def __eq__(self, other: object):
560
564
  if isinstance(other, list):
561
- if not all([isinstance(c, Concept) for c in other]):
562
- return False
563
- return self.components == set([c.address for c in other])
565
+ if all([isinstance(c, Concept) for c in other]):
566
+ return self.components == set([c.address for c in other])
567
+ return False
564
568
  if not isinstance(other, Grain):
565
569
  return False
566
570
  if self.components == other.components:
@@ -581,15 +585,20 @@ class Grain(Namespaced, BaseModel):
581
585
  intersection = self.components.intersection(other.components)
582
586
  return Grain(components=intersection)
583
587
 
584
- def __str__(self):
588
+ def _gen_str(self) -> str:
585
589
  if self.abstract:
586
590
  base = "Grain<Abstract>"
587
591
  else:
588
- base = "Grain<" + ",".join([c for c in sorted(list(self.components))]) + ">"
592
+ base = "Grain<" + ",".join(sorted(self.components)) + ">"
589
593
  if self.where_clause:
590
594
  base += f"|{str(self.where_clause)}"
591
595
  return base
592
596
 
597
+ def __str__(self):
598
+ if not self._str:
599
+ self._str = self._gen_str()
600
+ return self._str
601
+
593
602
  def __radd__(self, other) -> "Grain":
594
603
  if other == 0:
595
604
  return self
@@ -1666,7 +1675,6 @@ class Function(DataTyped, ConceptArgs, Mergeable, Namespaced, BaseModel):
1666
1675
  def parse_output_datatype(cls, v, info: ValidationInfo):
1667
1676
  values = info.data
1668
1677
  if values.get("operator") == FunctionType.ATTR_ACCESS:
1669
- print(v)
1670
1678
  if isinstance(v, StructType):
1671
1679
  raise SyntaxError
1672
1680
  return v
@@ -2335,11 +2343,16 @@ class AlignItem(Namespaced, BaseModel):
2335
2343
 
2336
2344
  class CustomFunctionFactory:
2337
2345
  def __init__(
2338
- self, function: Expr, namespace: str, function_arguments: list[ArgBinding]
2346
+ self,
2347
+ function: Expr,
2348
+ namespace: str,
2349
+ function_arguments: list[ArgBinding],
2350
+ name: str,
2339
2351
  ):
2340
2352
  self.namespace = namespace
2341
2353
  self.function = function
2342
2354
  self.function_arguments = function_arguments
2355
+ self.name = name
2343
2356
 
2344
2357
  def with_namespace(self, namespace: str):
2345
2358
  self.namespace = namespace
@@ -2364,7 +2377,27 @@ class CustomFunctionFactory:
2364
2377
  for binding in self.function_arguments[len(creation_arg_list) :]:
2365
2378
  if binding.default is None:
2366
2379
  raise ValueError(f"Missing argument {binding.name}")
2380
+
2367
2381
  creation_arg_list.append(binding.default)
2382
+ for arg_idx, arg in enumerate(self.function_arguments):
2383
+ if not arg.datatype or arg.datatype == DataType.UNKNOWN:
2384
+ continue
2385
+ if arg_idx > len(creation_arg_list):
2386
+ continue
2387
+ comparison = arg_to_datatype(creation_arg_list[arg_idx])
2388
+ if comparison != arg.datatype:
2389
+ raise TypeError(
2390
+ f"Invalid type passed into custom function @{self.name} in position {arg_idx+1} for argument {arg.name}, expected {arg.datatype}, got {comparison}"
2391
+ )
2392
+ if isinstance(arg.datatype, TraitDataType):
2393
+ if not (
2394
+ isinstance(comparison, TraitDataType)
2395
+ and all(x in comparison.traits for x in arg.datatype.traits)
2396
+ ):
2397
+ raise TypeError(
2398
+ f"Invalid argument type passed into custom function @{self.name} in position {arg_idx+1} for argument {arg.name}, expected traits {arg.datatype.traits}, got {comparison}"
2399
+ )
2400
+
2368
2401
  if isinstance(nout, Mergeable):
2369
2402
  for idx, x in enumerate(creation_arg_list):
2370
2403
  if self.namespace == DEFAULT_NAMESPACE:
@@ -93,7 +93,6 @@ from trilogy.core.models.datasource import (
93
93
  RawColumnExpr,
94
94
  )
95
95
  from trilogy.core.models.environment import Environment
96
- from trilogy.utility import unique
97
96
 
98
97
  # TODO: refactor to avoid these
99
98
  if TYPE_CHECKING:
@@ -149,8 +148,8 @@ def concept_is_relevant(
149
148
 
150
149
  def concepts_to_build_grain_concepts(
151
150
  concepts: Iterable[BuildConcept | str], environment: "BuildEnvironment" | None
152
- ) -> list[BuildConcept]:
153
- pconcepts = []
151
+ ) -> set[str]:
152
+ pconcepts: list[BuildConcept] = []
154
153
  for c in concepts:
155
154
  if isinstance(c, BuildConcept):
156
155
  pconcepts.append(c)
@@ -162,14 +161,13 @@ def concepts_to_build_grain_concepts(
162
161
  f"Unable to resolve input {c} without environment provided to concepts_to_grain call"
163
162
  )
164
163
 
165
- final: List[BuildConcept] = []
164
+ final: set[str] = set()
166
165
  for sub in pconcepts:
167
166
  if not concept_is_relevant(sub, pconcepts):
168
167
  continue
169
- final.append(sub)
170
- final = unique(final, "address")
171
- v2 = sorted(final, key=lambda x: x.name)
172
- return v2
168
+ final.add(sub.address)
169
+
170
+ return final
173
171
 
174
172
 
175
173
  class LooseBuildConceptList(BaseModel):
@@ -268,8 +266,12 @@ class BuildParamaterizedConceptReference(DataTyped, BaseModel):
268
266
  class BuildGrain(BaseModel):
269
267
  components: set[str] = Field(default_factory=set)
270
268
  where_clause: Optional[BuildWhereClause] = None
269
+ _str: str | None = None
270
+ _str_no_condition: str | None = None
271
271
 
272
272
  def without_condition(self):
273
+ if not self.where_clause:
274
+ return self
273
275
  return BuildGrain.model_construct(components=self.components)
274
276
 
275
277
  @classmethod
@@ -280,13 +282,10 @@ class BuildGrain(BaseModel):
280
282
  where_clause: BuildWhereClause | None = None,
281
283
  ) -> "BuildGrain":
282
284
 
283
- return BuildGrain(
284
- components={
285
- c.address
286
- for c in concepts_to_build_grain_concepts(
287
- concepts, environment=environment
288
- )
289
- },
285
+ return BuildGrain.model_construct(
286
+ components=concepts_to_build_grain_concepts(
287
+ concepts, environment=environment
288
+ ),
290
289
  where_clause=where_clause,
291
290
  )
292
291
 
@@ -366,15 +365,35 @@ class BuildGrain(BaseModel):
366
365
  intersection = self.components.intersection(other.components)
367
366
  return BuildGrain(components=intersection)
368
367
 
369
- def __str__(self):
368
+ def _calculate_string(self):
370
369
  if self.abstract:
371
370
  base = "Grain<Abstract>"
372
371
  else:
373
- base = "Grain<" + ",".join([c for c in sorted(list(self.components))]) + ">"
372
+ base = "Grain<" + ",".join(sorted(self.components)) + ">"
374
373
  if self.where_clause:
375
374
  base += f"|{str(self.where_clause)}"
376
375
  return base
377
376
 
377
+ def _calculate_string_no_condition(self):
378
+ if self.abstract:
379
+ base = "Grain<Abstract>"
380
+ else:
381
+ base = "Grain<" + ",".join(sorted(self.components)) + ">"
382
+ return base
383
+
384
+ @property
385
+ def str_no_condition(self):
386
+ if self._str_no_condition:
387
+ return self._str_no_condition
388
+ self._str_no_condition = self._calculate_string_no_condition()
389
+ return self._str_no_condition
390
+
391
+ def __str__(self):
392
+ if self._str:
393
+ return self._str
394
+ self._str = self._calculate_string()
395
+ return self._str
396
+
378
397
  def __radd__(self, other) -> "BuildGrain":
379
398
  if other == 0:
380
399
  return self
@@ -1141,13 +1160,13 @@ class BuildFunction(DataTyped, BuildConceptArgs, BaseModel):
1141
1160
  @property
1142
1161
  def output_grain(self):
1143
1162
  # aggregates have an abstract grain
1144
- base_grain = BuildGrain(components=[])
1145
1163
  if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
1146
- return base_grain
1164
+ return BuildGrain.model_construct(components=[])
1147
1165
  # scalars have implicit grain of all arguments
1166
+ args = set()
1148
1167
  for input in self.concept_arguments:
1149
- base_grain += input.grain
1150
- return base_grain
1168
+ args += input.grain.components
1169
+ return BuildGrain.model_construct(components=args)
1151
1170
 
1152
1171
 
1153
1172
  class BuildAggregateWrapper(BuildConceptArgs, DataTyped, BaseModel):
@@ -1507,13 +1526,15 @@ class Factory:
1507
1526
  environment: Environment,
1508
1527
  local_concepts: dict[str, BuildConcept] | None = None,
1509
1528
  grain: Grain | None = None,
1529
+ pseudonym_map: dict[str, set[str]] | None = None,
1510
1530
  ):
1511
1531
  self.grain = grain or Grain()
1512
1532
  self.environment = environment
1513
1533
  self.local_concepts: dict[str, BuildConcept] = (
1514
1534
  {} if local_concepts is None else local_concepts
1515
1535
  )
1516
- self.pseudonym_map = get_canonical_pseudonyms(environment)
1536
+ self.local_non_build_concepts: dict[str, Concept] = {}
1537
+ self.pseudonym_map = pseudonym_map or get_canonical_pseudonyms(environment)
1517
1538
 
1518
1539
  def instantiate_concept(
1519
1540
  self,
@@ -1531,14 +1552,19 @@ class Factory:
1531
1552
  | date
1532
1553
  ),
1533
1554
  ) -> tuple[Concept, BuildConcept]:
1534
- from trilogy.parsing.common import arbitrary_to_concept
1555
+ from trilogy.parsing.common import arbitrary_to_concept, generate_concept_name
1535
1556
 
1557
+ name = generate_concept_name(arg)
1558
+ if name in self.local_concepts and name in self.local_non_build_concepts:
1559
+ # if we already have this concept, return it
1560
+ return self.local_non_build_concepts[name], self.local_concepts[name]
1536
1561
  new = arbitrary_to_concept(
1537
1562
  arg,
1538
1563
  environment=self.environment,
1539
1564
  )
1540
- built = self.build(new)
1541
- self.local_concepts[new.address] = built
1565
+ built = self._build_concept(new)
1566
+ self.local_concepts[name] = built
1567
+ self.local_non_build_concepts[name] = new
1542
1568
  return new, built
1543
1569
 
1544
1570
  @singledispatchmethod
@@ -1576,15 +1602,23 @@ class Factory:
1576
1602
  | DatePart
1577
1603
  | NumericType
1578
1604
  ):
1605
+ return self._build_primitive(base)
1606
+
1607
+ def _build_primitive(self, base):
1579
1608
  return base
1580
1609
 
1581
1610
  @build.register
1582
1611
  def _(self, base: None) -> None:
1612
+ return self._build_none(base)
1613
+
1614
+ def _build_none(self, base):
1583
1615
  return base
1584
1616
 
1585
1617
  @build.register
1586
1618
  def _(self, base: Function) -> BuildFunction | BuildAggregateWrapper:
1619
+ return self._build_function(base)
1587
1620
 
1621
+ def _build_function(self, base: Function) -> BuildFunction | BuildAggregateWrapper:
1588
1622
  raw_args: list[Concept | FuncArgs] = []
1589
1623
  for arg in base.arguments:
1590
1624
  # to do proper discovery, we need to inject virtual intermediate concepts
@@ -1645,19 +1679,24 @@ class Factory:
1645
1679
 
1646
1680
  @build.register
1647
1681
  def _(self, base: ConceptRef) -> BuildConcept:
1682
+ return self._build_concept_ref(base)
1683
+
1684
+ def _build_concept_ref(self, base: ConceptRef) -> BuildConcept:
1648
1685
  if base.address in self.local_concepts:
1649
1686
  full = self.local_concepts[base.address]
1650
1687
  if isinstance(full, BuildConcept):
1651
1688
  return full
1652
1689
  if base.address in self.environment.concepts:
1653
1690
  raw = self.environment.concepts[base.address]
1654
- return self.build(raw)
1691
+ return self._build_concept(raw)
1655
1692
  # this will error by design - TODO - more helpful message?
1656
- return self.build(self.environment.concepts[base.address])
1693
+ return self._build_concept(self.environment.concepts[base.address])
1657
1694
 
1658
1695
  @build.register
1659
1696
  def _(self, base: CaseWhen) -> BuildCaseWhen:
1697
+ return self._build_case_when(base)
1660
1698
 
1699
+ def _build_case_when(self, base: CaseWhen) -> BuildCaseWhen:
1661
1700
  comparison = base.comparison
1662
1701
  expr: Concept | FuncArgs = base.expr
1663
1702
  validation = requires_concept_nesting(expr)
@@ -1670,6 +1709,9 @@ class Factory:
1670
1709
 
1671
1710
  @build.register
1672
1711
  def _(self, base: CaseElse) -> BuildCaseElse:
1712
+ return self._build_case_else(base)
1713
+
1714
+ def _build_case_else(self, base: CaseElse) -> BuildCaseElse:
1673
1715
  expr: Concept | FuncArgs = base.expr
1674
1716
  validation = requires_concept_nesting(expr)
1675
1717
  if validation:
@@ -1678,7 +1720,9 @@ class Factory:
1678
1720
 
1679
1721
  @build.register
1680
1722
  def _(self, base: Concept) -> BuildConcept:
1723
+ return self._build_concept(base)
1681
1724
 
1725
+ def _build_concept(self, base: Concept) -> BuildConcept:
1682
1726
  # TODO: if we are using parameters, wrap it in a new model and use that in rendering
1683
1727
  if base.address in self.local_concepts:
1684
1728
  return self.local_concepts[base.address]
@@ -1713,7 +1757,7 @@ class Factory:
1713
1757
  purpose=base.purpose,
1714
1758
  metadata=base.metadata,
1715
1759
  lineage=build_lineage,
1716
- grain=self.build(final_grain),
1760
+ grain=self._build_grain(final_grain),
1717
1761
  namespace=base.namespace,
1718
1762
  keys=base.keys,
1719
1763
  modifiers=base.modifiers,
@@ -1723,10 +1767,14 @@ class Factory:
1723
1767
  granularity=granularity,
1724
1768
  build_is_aggregate=is_aggregate,
1725
1769
  )
1770
+ self.local_concepts[base.address] = rval
1726
1771
  return rval
1727
1772
 
1728
1773
  @build.register
1729
1774
  def _(self, base: AggregateWrapper) -> BuildAggregateWrapper:
1775
+ return self._build_aggregate_wrapper(base)
1776
+
1777
+ def _build_aggregate_wrapper(self, base: AggregateWrapper) -> BuildAggregateWrapper:
1730
1778
  if not base.by:
1731
1779
  by = [
1732
1780
  self.build(self.environment.concepts[c]) for c in self.grain.components
@@ -1739,18 +1787,23 @@ class Factory:
1739
1787
 
1740
1788
  @build.register
1741
1789
  def _(self, base: ColumnAssignment) -> BuildColumnAssignment:
1790
+ return self._build_column_assignment(base)
1791
+
1792
+ def _build_column_assignment(self, base: ColumnAssignment) -> BuildColumnAssignment:
1742
1793
  address = base.concept.address
1743
1794
  fetched = (
1744
- self.build(
1795
+ self._build_concept(
1745
1796
  self.environment.alias_origin_lookup[address].with_grain(self.grain)
1746
1797
  )
1747
1798
  if address in self.environment.alias_origin_lookup
1748
- else self.build(self.environment.concepts[address].with_grain(self.grain))
1799
+ else self._build_concept(
1800
+ self.environment.concepts[address].with_grain(self.grain)
1801
+ )
1749
1802
  )
1750
1803
 
1751
1804
  return BuildColumnAssignment.model_construct(
1752
1805
  alias=(
1753
- self.build(base.alias)
1806
+ self._build_function(base.alias)
1754
1807
  if isinstance(base.alias, Function)
1755
1808
  else base.alias
1756
1809
  ),
@@ -1760,17 +1813,25 @@ class Factory:
1760
1813
 
1761
1814
  @build.register
1762
1815
  def _(self, base: OrderBy) -> BuildOrderBy:
1816
+ return self._build_order_by(base)
1817
+
1818
+ def _build_order_by(self, base: OrderBy) -> BuildOrderBy:
1763
1819
  return BuildOrderBy.model_construct(items=[self.build(x) for x in base.items])
1764
1820
 
1765
1821
  @build.register
1766
1822
  def _(self, base: FunctionCallWrapper) -> BuildExpr:
1823
+ return self._build_function_call_wrapper(base)
1824
+
1825
+ def _build_function_call_wrapper(self, base: FunctionCallWrapper) -> BuildExpr:
1767
1826
  # function calls are kept around purely for the parse tree
1768
1827
  # so discard at the build point
1769
1828
  return self.build(base.content)
1770
1829
 
1771
1830
  @build.register
1772
1831
  def _(self, base: OrderItem) -> BuildOrderItem:
1832
+ return self._build_order_item(base)
1773
1833
 
1834
+ def _build_order_item(self, base: OrderItem) -> BuildOrderItem:
1774
1835
  bexpr: Any
1775
1836
  validation = requires_concept_nesting(base.expr)
1776
1837
  if validation:
@@ -1784,20 +1845,27 @@ class Factory:
1784
1845
 
1785
1846
  @build.register
1786
1847
  def _(self, base: WhereClause) -> BuildWhereClause:
1848
+ return self._build_where_clause(base)
1787
1849
 
1850
+ def _build_where_clause(self, base: WhereClause) -> BuildWhereClause:
1788
1851
  return BuildWhereClause.model_construct(
1789
1852
  conditional=self.build(base.conditional)
1790
1853
  )
1791
1854
 
1792
1855
  @build.register
1793
1856
  def _(self, base: HavingClause) -> BuildHavingClause:
1857
+ return self._build_having_clause(base)
1858
+
1859
+ def _build_having_clause(self, base: HavingClause) -> BuildHavingClause:
1794
1860
  return BuildHavingClause.model_construct(
1795
1861
  conditional=self.build(base.conditional)
1796
1862
  )
1797
1863
 
1798
1864
  @build.register
1799
1865
  def _(self, base: WindowItem) -> BuildWindowItem:
1866
+ return self._build_window_item(base)
1800
1867
 
1868
+ def _build_window_item(self, base: WindowItem) -> BuildWindowItem:
1801
1869
  content: Concept | FuncArgs = base.content
1802
1870
  validation = requires_concept_nesting(base.content)
1803
1871
  if validation:
@@ -1821,6 +1889,9 @@ class Factory:
1821
1889
 
1822
1890
  @build.register
1823
1891
  def _(self, base: Conditional) -> BuildConditional:
1892
+ return self._build_conditional(base)
1893
+
1894
+ def _build_conditional(self, base: Conditional) -> BuildConditional:
1824
1895
  return BuildConditional.model_construct(
1825
1896
  left=self.handle_constant(self.build(base.left)),
1826
1897
  right=self.handle_constant(self.build(base.right)),
@@ -1829,6 +1900,11 @@ class Factory:
1829
1900
 
1830
1901
  @build.register
1831
1902
  def _(self, base: SubselectComparison) -> BuildSubselectComparison:
1903
+ return self._build_subselect_comparison(base)
1904
+
1905
+ def _build_subselect_comparison(
1906
+ self, base: SubselectComparison
1907
+ ) -> BuildSubselectComparison:
1832
1908
  right: Any = base.right
1833
1909
  # this has specialized logic - include all Functions
1834
1910
  if isinstance(base.right, (AggregateWrapper, WindowItem, FilterItem, Function)):
@@ -1842,7 +1918,9 @@ class Factory:
1842
1918
 
1843
1919
  @build.register
1844
1920
  def _(self, base: Comparison) -> BuildComparison:
1921
+ return self._build_comparison(base)
1845
1922
 
1923
+ def _build_comparison(self, base: Comparison) -> BuildComparison:
1846
1924
  left = base.left
1847
1925
  validation = requires_concept_nesting(base.left)
1848
1926
  if validation:
@@ -1861,6 +1939,9 @@ class Factory:
1861
1939
 
1862
1940
  @build.register
1863
1941
  def _(self, base: AlignItem) -> BuildAlignItem:
1942
+ return self._build_align_item(base)
1943
+
1944
+ def _build_align_item(self, base: AlignItem) -> BuildAlignItem:
1864
1945
  return BuildAlignItem.model_construct(
1865
1946
  alias=base.alias,
1866
1947
  concepts=[self.build(x) for x in base.concepts],
@@ -1869,24 +1950,34 @@ class Factory:
1869
1950
 
1870
1951
  @build.register
1871
1952
  def _(self, base: AlignClause) -> BuildAlignClause:
1953
+ return self._build_align_clause(base)
1954
+
1955
+ def _build_align_clause(self, base: AlignClause) -> BuildAlignClause:
1872
1956
  return BuildAlignClause.model_construct(
1873
1957
  items=[self.build(x) for x in base.items]
1874
1958
  )
1875
1959
 
1876
1960
  @build.register
1877
1961
  def _(self, base: RowsetItem) -> BuildRowsetItem:
1962
+ return self._build_rowset_item(base)
1878
1963
 
1964
+ def _build_rowset_item(self, base: RowsetItem) -> BuildRowsetItem:
1879
1965
  factory = Factory(
1880
1966
  environment=self.environment,
1881
1967
  local_concepts={},
1882
1968
  grain=base.rowset.select.grain,
1969
+ pseudonym_map=self.pseudonym_map,
1883
1970
  )
1884
1971
  return BuildRowsetItem(
1885
- content=factory.build(base.content), rowset=factory.build(base.rowset)
1972
+ content=factory._build_concept_ref(base.content),
1973
+ rowset=factory._build_rowset_lineage(base.rowset),
1886
1974
  )
1887
1975
 
1888
1976
  @build.register
1889
1977
  def _(self, base: RowsetLineage) -> BuildRowsetLineage:
1978
+ return self._build_rowset_lineage(base)
1979
+
1980
+ def _build_rowset_lineage(self, base: RowsetLineage) -> BuildRowsetLineage:
1890
1981
  out = BuildRowsetLineage(
1891
1982
  name=base.name,
1892
1983
  derived_concepts=[x.address for x in base.derived_concepts],
@@ -1896,8 +1987,13 @@ class Factory:
1896
1987
 
1897
1988
  @build.register
1898
1989
  def _(self, base: Grain) -> BuildGrain:
1990
+ return self._build_grain(base)
1991
+
1992
+ def _build_grain(self, base: Grain) -> BuildGrain:
1899
1993
  if base.where_clause:
1900
- factory = Factory(environment=self.environment)
1994
+ factory = Factory(
1995
+ environment=self.environment, pseudonym_map=self.pseudonym_map
1996
+ )
1901
1997
  where = factory.build(base.where_clause)
1902
1998
  else:
1903
1999
  where = None
@@ -1907,10 +2003,16 @@ class Factory:
1907
2003
 
1908
2004
  @build.register
1909
2005
  def _(self, base: TupleWrapper) -> TupleWrapper:
2006
+ return self._build_tuple_wrapper(base)
2007
+
2008
+ def _build_tuple_wrapper(self, base: TupleWrapper) -> TupleWrapper:
1910
2009
  return TupleWrapper(val=[self.build(x) for x in base.val], type=base.type)
1911
2010
 
1912
2011
  @build.register
1913
2012
  def _(self, base: FilterItem) -> BuildFilterItem:
2013
+ return self._build_filter_item(base)
2014
+
2015
+ def _build_filter_item(self, base: FilterItem) -> BuildFilterItem:
1914
2016
  if isinstance(
1915
2017
  base.content, (Function, AggregateWrapper, WindowItem, FilterItem)
1916
2018
  ):
@@ -1924,11 +2026,16 @@ class Factory:
1924
2026
 
1925
2027
  @build.register
1926
2028
  def _(self, base: Parenthetical) -> BuildParenthetical:
2029
+ return self._build_parenthetical(base)
2030
+
2031
+ def _build_parenthetical(self, base: Parenthetical) -> BuildParenthetical:
1927
2032
  return BuildParenthetical.model_construct(content=(self.build(base.content)))
1928
2033
 
1929
2034
  @build.register
1930
2035
  def _(self, base: SelectLineage) -> BuildSelectLineage:
2036
+ return self._build_select_lineage(base)
1931
2037
 
2038
+ def _build_select_lineage(self, base: SelectLineage) -> BuildSelectLineage:
1932
2039
  from trilogy.core.models.build import (
1933
2040
  BuildSelectLineage,
1934
2041
  Factory,
@@ -1936,12 +2043,18 @@ class Factory:
1936
2043
 
1937
2044
  materialized: dict[str, BuildConcept] = {}
1938
2045
  factory = Factory(
1939
- grain=base.grain, environment=self.environment, local_concepts=materialized
2046
+ grain=base.grain,
2047
+ environment=self.environment,
2048
+ local_concepts=materialized,
2049
+ pseudonym_map=self.pseudonym_map,
1940
2050
  )
1941
2051
  for k, v in base.local_concepts.items():
1942
2052
  materialized[k] = factory.build(v)
1943
2053
  where_factory = Factory(
1944
- grain=Grain(), environment=self.environment, local_concepts={}
2054
+ grain=Grain(),
2055
+ environment=self.environment,
2056
+ local_concepts={},
2057
+ pseudonym_map=self.pseudonym_map,
1945
2058
  )
1946
2059
  where_clause = (
1947
2060
  where_factory.build(base.where_clause) if base.where_clause else None
@@ -1985,7 +2098,11 @@ class Factory:
1985
2098
 
1986
2099
  @build.register
1987
2100
  def _(self, base: MultiSelectLineage) -> BuildMultiSelectLineage:
2101
+ return self._build_multi_select_lineage(base)
1988
2102
 
2103
+ def _build_multi_select_lineage(
2104
+ self, base: MultiSelectLineage
2105
+ ) -> BuildMultiSelectLineage:
1989
2106
  local_build_cache: dict[str, BuildConcept] = {}
1990
2107
 
1991
2108
  parents: list[BuildSelectLineage] = [self.build(x) for x in base.selects]
@@ -2023,8 +2140,11 @@ class Factory:
2023
2140
  grain=base.grain,
2024
2141
  environment=self.environment,
2025
2142
  local_concepts=local_build_cache,
2143
+ pseudonym_map=self.pseudonym_map,
2144
+ )
2145
+ where_factory = Factory(
2146
+ environment=self.environment, pseudonym_map=self.pseudonym_map
2026
2147
  )
2027
- where_factory = Factory(environment=self.environment)
2028
2148
  lineage = BuildMultiSelectLineage.model_construct(
2029
2149
  # we don't build selects here; they'll be built automatically in query discovery
2030
2150
  selects=base.selects,
@@ -2053,6 +2173,9 @@ class Factory:
2053
2173
 
2054
2174
  @build.register
2055
2175
  def _(self, base: Environment):
2176
+ return self._build_environment(base)
2177
+
2178
+ def _build_environment(self, base: Environment):
2056
2179
  from trilogy.core.models.build_environment import BuildEnvironment
2057
2180
 
2058
2181
  new = BuildEnvironment(
@@ -2061,14 +2184,14 @@ class Factory:
2061
2184
  )
2062
2185
 
2063
2186
  for k, v in base.concepts.items():
2064
- new.concepts[k] = self.build(v)
2187
+ new.concepts[k] = self._build_concept(v)
2065
2188
  for (
2066
2189
  k,
2067
2190
  d,
2068
2191
  ) in base.datasources.items():
2069
- new.datasources[k] = self.build(d)
2192
+ new.datasources[k] = self._build_datasource(d)
2070
2193
  for k, a in base.alias_origin_lookup.items():
2071
- new.alias_origin_lookup[k] = self.build(a)
2194
+ new.alias_origin_lookup[k] = self._build_concept(a)
2072
2195
  # add in anything that was built as a side-effect
2073
2196
  for bk, bv in self.local_concepts.items():
2074
2197
  if bk not in new.concepts:
@@ -2078,39 +2201,63 @@ class Factory:
2078
2201
 
2079
2202
  @build.register
2080
2203
  def _(self, base: TraitDataType):
2204
+ return self._build_trait_data_type(base)
2205
+
2206
+ def _build_trait_data_type(self, base: TraitDataType):
2081
2207
  return base
2082
2208
 
2083
2209
  @build.register
2084
2210
  def _(self, base: ArrayType):
2211
+ return self._build_array_type(base)
2212
+
2213
+ def _build_array_type(self, base: ArrayType):
2085
2214
  return base
2086
2215
 
2087
2216
  @build.register
2088
2217
  def _(self, base: StructType):
2218
+ return self._build_struct_type(base)
2219
+
2220
+ def _build_struct_type(self, base: StructType):
2089
2221
  return base
2090
2222
 
2091
2223
  @build.register
2092
2224
  def _(self, base: MapType):
2225
+ return self._build_map_type(base)
2226
+
2227
+ def _build_map_type(self, base: MapType):
2093
2228
  return base
2094
2229
 
2095
2230
  @build.register
2096
2231
  def _(self, base: ArgBinding):
2232
+ return self._build_arg_binding(base)
2233
+
2234
+ def _build_arg_binding(self, base: ArgBinding):
2097
2235
  return base
2098
2236
 
2099
2237
  @build.register
2100
2238
  def _(self, base: Ordering):
2239
+ return self._build_ordering(base)
2240
+
2241
+ def _build_ordering(self, base: Ordering):
2101
2242
  return base
2102
2243
 
2103
2244
  @build.register
2104
2245
  def _(self, base: Datasource):
2246
+ return self._build_datasource(base)
2247
+
2248
+ def _build_datasource(self, base: Datasource):
2105
2249
  local_cache: dict[str, BuildConcept] = {}
2106
2250
  factory = Factory(
2107
- grain=base.grain, environment=self.environment, local_concepts=local_cache
2251
+ grain=base.grain,
2252
+ environment=self.environment,
2253
+ local_concepts=local_cache,
2254
+ pseudonym_map=self.pseudonym_map,
2108
2255
  )
2109
2256
  return BuildDatasource.model_construct(
2110
2257
  name=base.name,
2111
- columns=[factory.build(c) for c in base.columns],
2258
+ columns=[factory._build_column_assignment(c) for c in base.columns],
2112
2259
  address=base.address,
2113
- grain=factory.build(base.grain),
2260
+ grain=factory._build_grain(base.grain),
2114
2261
  namespace=base.namespace,
2115
2262
  metadata=base.metadata,
2116
2263
  where=(factory.build(base.where) if base.where else None),
@@ -225,6 +225,18 @@ class StructType(BaseModel):
225
225
  fields: Sequence[StructComponent | TYPEDEF_TYPES]
226
226
  fields_map: Dict[str, DataTyped | int | float | str | StructComponent]
227
227
 
228
+ def __repr__(self):
229
+ return "struct<{}>".format(
230
+ ", ".join(
231
+ f"{field.name}:{field.type.name}"
232
+ for field in self.fields
233
+ if isinstance(field, StructComponent)
234
+ )
235
+ )
236
+
237
+ def __str__(self) -> str:
238
+ return self.__repr__()
239
+
228
240
  @field_validator("fields", mode="plain")
229
241
  def validate_type(cls, v):
230
242
  final = []
@@ -453,6 +453,7 @@ def gen_merge_node(
453
453
  else:
454
454
  all_search_concepts = all_concepts
455
455
  all_search_concepts = sorted(all_search_concepts, key=lambda x: x.address)
456
+ break_set = set([x.address for x in all_search_concepts])
456
457
  for filter_downstream in [True, False]:
457
458
  weak_resolve = resolve_weak_components(
458
459
  all_search_concepts,
@@ -466,27 +467,28 @@ def gen_merge_node(
466
467
  logger.info(
467
468
  f"{padding(depth)}{LOGGER_PREFIX} wasn't able to resolve graph through intermediate concept injection with accept_partial {accept_partial}, filter_downstream {filter_downstream}"
468
469
  )
469
- else:
470
- log_graph = [[y.address for y in x] for x in weak_resolve]
471
- logger.info(
472
- f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
473
- )
474
- for flat in log_graph:
475
- if set(flat) == set([x.address for x in all_search_concepts]):
476
- logger.info(
477
- f"{padding(depth)}{LOGGER_PREFIX} expanded concept resolution was identical to search resolution; breaking to avoid recursion error."
478
- )
479
- return None
480
- return subgraphs_to_merge_node(
481
- weak_resolve,
482
- depth=depth,
483
- all_concepts=all_search_concepts,
484
- environment=environment,
485
- g=g,
486
- source_concepts=source_concepts,
487
- history=history,
488
- conditions=conditions,
489
- search_conditions=search_conditions,
490
- output_concepts=all_concepts,
491
- )
470
+ continue
471
+
472
+ log_graph = [[y.address for y in x] for x in weak_resolve]
473
+ logger.info(
474
+ f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
475
+ )
476
+ for flat in log_graph:
477
+ if set(flat) == break_set:
478
+ logger.info(
479
+ f"{padding(depth)}{LOGGER_PREFIX} expanded concept resolution was identical to search resolution; breaking to avoid recursion error."
480
+ )
481
+ return None
482
+ return subgraphs_to_merge_node(
483
+ weak_resolve,
484
+ depth=depth,
485
+ all_concepts=all_search_concepts,
486
+ environment=environment,
487
+ g=g,
488
+ source_concepts=source_concepts,
489
+ history=history,
490
+ conditions=conditions,
491
+ search_conditions=search_conditions,
492
+ output_concepts=all_concepts,
493
+ )
492
494
  return None
trilogy/parsing/common.py CHANGED
@@ -309,7 +309,7 @@ def concepts_to_grain_concepts(
309
309
  concepts: Iterable[Concept | ConceptRef | str],
310
310
  environment: Environment | None,
311
311
  local_concepts: dict[str, Concept] | None = None,
312
- ) -> list[Concept]:
312
+ ) -> set[str]:
313
313
  preconcepts: list[Concept] = []
314
314
  for c in concepts:
315
315
  if isinstance(c, Concept):
@@ -329,7 +329,7 @@ def concepts_to_grain_concepts(
329
329
  raise ValueError(
330
330
  f"Unable to resolve input {c} without environment provided to concepts_to_grain call"
331
331
  )
332
- pconcepts = []
332
+ pconcepts: list[Concept] = []
333
333
  for x in preconcepts:
334
334
  if (
335
335
  x.lineage
@@ -340,14 +340,16 @@ def concepts_to_grain_concepts(
340
340
  pconcepts.append(environment.concepts[x.lineage.arguments[0].address]) # type: ignore
341
341
  else:
342
342
  pconcepts.append(x)
343
- final: List[Concept] = []
343
+
344
+ seen = set()
344
345
  for sub in pconcepts:
346
+ if sub.address in seen:
347
+ continue
345
348
  if not concept_is_relevant(sub, pconcepts, environment): # type: ignore
346
349
  continue
347
- final.append(sub)
348
- final = unique(final, "address")
349
- v2 = sorted(final, key=lambda x: x.name)
350
- return v2
350
+ seen.add(sub.address)
351
+
352
+ return seen
351
353
 
352
354
 
353
355
  def _get_relevant_parent_concepts(arg) -> tuple[list[ConceptRef], bool]:
@@ -823,6 +825,40 @@ def rowset_to_concepts(rowset: RowsetDerivationStatement, environment: Environme
823
825
  return pre_output
824
826
 
825
827
 
828
+ def generate_concept_name(
829
+ parent: (
830
+ AggregateWrapper
831
+ | FunctionCallWrapper
832
+ | WindowItem
833
+ | FilterItem
834
+ | Function
835
+ | ListWrapper
836
+ | MapWrapper
837
+ | int
838
+ | float
839
+ | str
840
+ | date
841
+ ),
842
+ ) -> str:
843
+ """Generate a name for a concept based on its parent type and content."""
844
+ if isinstance(parent, AggregateWrapper):
845
+ return f"{VIRTUAL_CONCEPT_PREFIX}_agg_{parent.function.operator.value}_{string_to_hash(str(parent))}"
846
+ elif isinstance(parent, WindowItem):
847
+ return f"{VIRTUAL_CONCEPT_PREFIX}_window_{parent.type.value}_{string_to_hash(str(parent))}"
848
+ elif isinstance(parent, FilterItem):
849
+ if isinstance(parent.content, ConceptRef):
850
+ return f"{VIRTUAL_CONCEPT_PREFIX}_filter_{parent.content.name}_{string_to_hash(str(parent))}"
851
+ else:
852
+ return f"{VIRTUAL_CONCEPT_PREFIX}_filter_{string_to_hash(str(parent))}"
853
+ elif isinstance(parent, Function):
854
+ if parent.operator == FunctionType.GROUP:
855
+ return f"{VIRTUAL_CONCEPT_PREFIX}_group_to_{string_to_hash(str(parent))}"
856
+ else:
857
+ return f"{VIRTUAL_CONCEPT_PREFIX}_func_{parent.operator.value}_{string_to_hash(str(parent))}"
858
+ else: # ListWrapper, MapWrapper, or primitive types
859
+ return f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
860
+
861
+
826
862
  def arbitrary_to_concept(
827
863
  parent: (
828
864
  AggregateWrapper
@@ -843,20 +879,22 @@ def arbitrary_to_concept(
843
879
  metadata: Metadata | None = None,
844
880
  ) -> Concept:
845
881
  namespace = namespace or environment.namespace
882
+
846
883
  # this is purely for the parse tree, discard from derivation
847
884
  if isinstance(parent, FunctionCallWrapper):
848
885
  return arbitrary_to_concept(
849
886
  parent.content, environment, namespace, name, metadata # type: ignore
850
887
  )
851
- elif isinstance(parent, AggregateWrapper):
852
- if not name:
853
- name = f"{VIRTUAL_CONCEPT_PREFIX}_agg_{parent.function.operator.value}_{string_to_hash(str(parent))}"
888
+
889
+ # Generate name if not provided
890
+ if not name:
891
+ name = generate_concept_name(parent)
892
+
893
+ if isinstance(parent, AggregateWrapper):
854
894
  return agg_wrapper_to_concept(
855
895
  parent, namespace, name, metadata=metadata, environment=environment
856
896
  )
857
897
  elif isinstance(parent, WindowItem):
858
- if not name:
859
- name = f"{VIRTUAL_CONCEPT_PREFIX}_window_{parent.type.value}_{string_to_hash(str(parent))}"
860
898
  return window_item_to_concept(
861
899
  parent,
862
900
  name,
@@ -865,11 +903,6 @@ def arbitrary_to_concept(
865
903
  metadata=metadata,
866
904
  )
867
905
  elif isinstance(parent, FilterItem):
868
- if not name:
869
- if isinstance(parent.content, ConceptRef):
870
- name = f"{VIRTUAL_CONCEPT_PREFIX}_filter_{parent.content.name}_{string_to_hash(str(parent))}"
871
- else:
872
- name = f"{VIRTUAL_CONCEPT_PREFIX}_filter_{string_to_hash(str(parent))}"
873
906
  return filter_item_to_concept(
874
907
  parent,
875
908
  name,
@@ -878,14 +911,6 @@ def arbitrary_to_concept(
878
911
  metadata=metadata,
879
912
  )
880
913
  elif isinstance(parent, Function):
881
- if not name:
882
- if parent.operator == FunctionType.GROUP:
883
- name = (
884
- f"{VIRTUAL_CONCEPT_PREFIX}_group_to_{string_to_hash(str(parent))}"
885
- )
886
- else:
887
- name = f"{VIRTUAL_CONCEPT_PREFIX}_func_{parent.operator.value}_{string_to_hash(str(parent))}"
888
-
889
914
  if parent.operator == FunctionType.GROUP:
890
915
  return group_function_to_concept(
891
916
  parent,
@@ -902,10 +927,6 @@ def arbitrary_to_concept(
902
927
  namespace=namespace,
903
928
  )
904
929
  elif isinstance(parent, ListWrapper):
905
- if not name:
906
- name = f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
907
930
  return constant_to_concept(parent, name, namespace, metadata)
908
931
  else:
909
- if not name:
910
- name = f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
911
932
  return constant_to_concept(parent, name, namespace, metadata)
@@ -161,6 +161,11 @@ class WholeGrainWrapper:
161
161
  where: WhereClause
162
162
 
163
163
 
164
+ @dataclass
165
+ class FunctionBindingType:
166
+ type: DataType | TraitDataType | None = None
167
+
168
+
164
169
  with open(join(dirname(__file__), "trilogy.lark"), "r") as f:
165
170
  PARSER = Lark(
166
171
  f.read(),
@@ -1373,11 +1378,24 @@ class ParseToObjects(Transformer):
1373
1378
  def function_binding_list(self, meta: Meta, args) -> list[ArgBinding]:
1374
1379
  return args
1375
1380
 
1381
+ @v_args(meta=True)
1382
+ def function_binding_type(self, meta: Meta, args) -> FunctionBindingType:
1383
+ return FunctionBindingType(type=args[0])
1384
+
1385
+ @v_args(meta=True)
1386
+ def function_binding_default(self, meta: Meta, args):
1387
+ return args[1]
1388
+
1376
1389
  @v_args(meta=True)
1377
1390
  def function_binding_item(self, meta: Meta, args) -> ArgBinding:
1378
- if len(args) == 2:
1379
- return ArgBinding.model_construct(name=args[0], default=args[1])
1380
- return ArgBinding.model_construct(name=args[0], default=None)
1391
+ default = None
1392
+ type = None
1393
+ for arg in args[1:]:
1394
+ if isinstance(arg, FunctionBindingType):
1395
+ type = arg.type
1396
+ else:
1397
+ default = arg
1398
+ return ArgBinding.model_construct(name=args[0], datatype=type, default=default)
1381
1399
 
1382
1400
  @v_args(meta=True)
1383
1401
  def raw_function(self, meta: Meta, args) -> FunctionDeclaration:
@@ -1389,6 +1407,7 @@ class ParseToObjects(Transformer):
1389
1407
  function=output,
1390
1408
  namespace=self.environment.namespace,
1391
1409
  function_arguments=function_arguments,
1410
+ name=identity,
1392
1411
  )
1393
1412
  return FunctionDeclaration(name=identity, args=function_arguments, expr=output)
1394
1413
 
@@ -91,7 +91,9 @@
91
91
 
92
92
  // FUNCTION blocks
93
93
  function: raw_function
94
- function_binding_item: IDENTIFIER ("=" literal)?
94
+ function_binding_type: ":" data_type
95
+ function_binding_default: /=/ expr
96
+ function_binding_item: IDENTIFIER function_binding_type? function_binding_default?
95
97
  function_binding_list: (function_binding_item ",")* function_binding_item ","?
96
98
  raw_function: "def" IDENTIFIER "(" function_binding_list ")" "->" expr
97
99