pytrilogy 0.0.1.102__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (77) hide show
  1. pytrilogy-0.0.1.102.dist-info/LICENSE.md +19 -0
  2. pytrilogy-0.0.1.102.dist-info/METADATA +277 -0
  3. pytrilogy-0.0.1.102.dist-info/RECORD +77 -0
  4. pytrilogy-0.0.1.102.dist-info/WHEEL +5 -0
  5. pytrilogy-0.0.1.102.dist-info/entry_points.txt +2 -0
  6. pytrilogy-0.0.1.102.dist-info/top_level.txt +1 -0
  7. trilogy/__init__.py +8 -0
  8. trilogy/compiler.py +0 -0
  9. trilogy/constants.py +30 -0
  10. trilogy/core/__init__.py +0 -0
  11. trilogy/core/constants.py +3 -0
  12. trilogy/core/enums.py +270 -0
  13. trilogy/core/env_processor.py +33 -0
  14. trilogy/core/environment_helpers.py +156 -0
  15. trilogy/core/ergonomics.py +187 -0
  16. trilogy/core/exceptions.py +23 -0
  17. trilogy/core/functions.py +320 -0
  18. trilogy/core/graph_models.py +55 -0
  19. trilogy/core/internal.py +37 -0
  20. trilogy/core/models.py +3145 -0
  21. trilogy/core/processing/__init__.py +0 -0
  22. trilogy/core/processing/concept_strategies_v3.py +603 -0
  23. trilogy/core/processing/graph_utils.py +44 -0
  24. trilogy/core/processing/node_generators/__init__.py +25 -0
  25. trilogy/core/processing/node_generators/basic_node.py +71 -0
  26. trilogy/core/processing/node_generators/common.py +239 -0
  27. trilogy/core/processing/node_generators/concept_merge.py +152 -0
  28. trilogy/core/processing/node_generators/filter_node.py +83 -0
  29. trilogy/core/processing/node_generators/group_node.py +92 -0
  30. trilogy/core/processing/node_generators/group_to_node.py +99 -0
  31. trilogy/core/processing/node_generators/merge_node.py +148 -0
  32. trilogy/core/processing/node_generators/multiselect_node.py +189 -0
  33. trilogy/core/processing/node_generators/rowset_node.py +130 -0
  34. trilogy/core/processing/node_generators/select_node.py +328 -0
  35. trilogy/core/processing/node_generators/unnest_node.py +37 -0
  36. trilogy/core/processing/node_generators/window_node.py +85 -0
  37. trilogy/core/processing/nodes/__init__.py +76 -0
  38. trilogy/core/processing/nodes/base_node.py +251 -0
  39. trilogy/core/processing/nodes/filter_node.py +49 -0
  40. trilogy/core/processing/nodes/group_node.py +110 -0
  41. trilogy/core/processing/nodes/merge_node.py +326 -0
  42. trilogy/core/processing/nodes/select_node_v2.py +198 -0
  43. trilogy/core/processing/nodes/unnest_node.py +54 -0
  44. trilogy/core/processing/nodes/window_node.py +34 -0
  45. trilogy/core/processing/utility.py +278 -0
  46. trilogy/core/query_processor.py +331 -0
  47. trilogy/dialect/__init__.py +0 -0
  48. trilogy/dialect/base.py +679 -0
  49. trilogy/dialect/bigquery.py +80 -0
  50. trilogy/dialect/common.py +43 -0
  51. trilogy/dialect/config.py +55 -0
  52. trilogy/dialect/duckdb.py +83 -0
  53. trilogy/dialect/enums.py +95 -0
  54. trilogy/dialect/postgres.py +86 -0
  55. trilogy/dialect/presto.py +82 -0
  56. trilogy/dialect/snowflake.py +82 -0
  57. trilogy/dialect/sql_server.py +89 -0
  58. trilogy/docs/__init__.py +0 -0
  59. trilogy/engine.py +48 -0
  60. trilogy/executor.py +242 -0
  61. trilogy/hooks/__init__.py +0 -0
  62. trilogy/hooks/base_hook.py +37 -0
  63. trilogy/hooks/graph_hook.py +24 -0
  64. trilogy/hooks/query_debugger.py +133 -0
  65. trilogy/metadata/__init__.py +0 -0
  66. trilogy/parser.py +10 -0
  67. trilogy/parsing/__init__.py +0 -0
  68. trilogy/parsing/common.py +176 -0
  69. trilogy/parsing/config.py +5 -0
  70. trilogy/parsing/exceptions.py +2 -0
  71. trilogy/parsing/helpers.py +1 -0
  72. trilogy/parsing/parse_engine.py +1951 -0
  73. trilogy/parsing/render.py +483 -0
  74. trilogy/py.typed +0 -0
  75. trilogy/scripts/__init__.py +0 -0
  76. trilogy/scripts/trilogy.py +127 -0
  77. trilogy/utility.py +31 -0
@@ -0,0 +1,328 @@
1
+ from itertools import combinations
2
+ from typing import List, Optional
3
+
4
+ from trilogy.core.enums import PurposeLineage
5
+ from trilogy.core.models import Concept, Environment, Grain, LooseConceptList
6
+ from trilogy.core.processing.nodes import (
7
+ StrategyNode,
8
+ SelectNode,
9
+ MergeNode,
10
+ GroupNode,
11
+ ConstantNode,
12
+ )
13
+ from trilogy.core.exceptions import NoDatasourceException
14
+ import networkx as nx
15
+ from trilogy.core.graph_models import concept_to_node, datasource_to_node
16
+ from trilogy.constants import logger
17
+ from trilogy.core.processing.utility import padding
18
+
19
+ LOGGER_PREFIX = "[GEN_SELECT_NODE]"
20
+
21
+
22
+ def gen_select_node_from_table(
23
+ target_concept: Concept,
24
+ all_concepts: List[Concept],
25
+ g: nx.DiGraph,
26
+ environment: Environment,
27
+ depth: int,
28
+ target_grain: Grain,
29
+ accept_partial: bool = False,
30
+ ) -> Optional[StrategyNode]:
31
+ # if we have only constants
32
+ # we don't need a table
33
+ # so verify nothing, select node will render
34
+ all_lcl = LooseConceptList(concepts=all_concepts)
35
+ if all([c.derivation == PurposeLineage.CONSTANT for c in all_concepts]):
36
+ logger.info(
37
+ f"{padding(depth)}{LOGGER_PREFIX} All concepts {[x.address for x in all_concepts]} are constants, returning constant node"
38
+ )
39
+ return ConstantNode(
40
+ output_concepts=all_concepts,
41
+ input_concepts=[],
42
+ environment=environment,
43
+ g=g,
44
+ parents=[],
45
+ depth=depth,
46
+ # no partial for constants
47
+ partial_concepts=[],
48
+ force_group=False,
49
+ )
50
+ candidates: dict[str, StrategyNode] = {}
51
+ scores: dict[str, int] = {}
52
+ # otherwise, we need to look for a table
53
+ for datasource in environment.datasources.values():
54
+ all_found = True
55
+ for raw_concept in all_concepts:
56
+ # look for connection to abstract grain
57
+ req_concept = raw_concept.with_default_grain()
58
+ # if we don't have a concept in the graph
59
+ # exit early
60
+ if concept_to_node(req_concept) not in g.nodes:
61
+ raise ValueError(concept_to_node(req_concept))
62
+ try:
63
+ path = nx.shortest_path(
64
+ g,
65
+ source=datasource_to_node(datasource),
66
+ target=concept_to_node(req_concept),
67
+ )
68
+ except nx.NodeNotFound as e:
69
+ # just to provide better error
70
+ ncandidates = [
71
+ datasource_to_node(datasource),
72
+ concept_to_node(req_concept),
73
+ ]
74
+ for ncandidate in ncandidates:
75
+ try:
76
+ g.nodes[ncandidate]
77
+ except KeyError:
78
+ raise SyntaxError(
79
+ "Could not find node for {}".format(ncandidate)
80
+ )
81
+ raise e
82
+ except nx.exception.NetworkXNoPath:
83
+ all_found = False
84
+ break
85
+ # 2023-10-18 - more strict condition then below
86
+ # 2023-10-20 - we need this to get through abstract concepts
87
+ # but we may want to add a filter to avoid using this for anything with lineage
88
+ # if len(path) != 2:
89
+ # all_found = False
90
+ # break
91
+ if len([p for p in path if g.nodes[p]["type"] == "datasource"]) != 1:
92
+ all_found = False
93
+ break
94
+ for node in path:
95
+ if g.nodes[node]["type"] == "datasource":
96
+ continue
97
+ if g.nodes[node]["concept"].address == raw_concept.address:
98
+ continue
99
+ all_found = False
100
+ break
101
+
102
+ if all_found:
103
+ partial_concepts = [
104
+ c.concept
105
+ for c in datasource.columns
106
+ if not c.is_complete and c.concept in all_lcl
107
+ ]
108
+ partial_lcl = LooseConceptList(concepts=partial_concepts)
109
+ if not accept_partial and target_concept in partial_lcl:
110
+ continue
111
+ logger.info(
112
+ f"{padding(depth)}{LOGGER_PREFIX} target grain is {str(target_grain)}"
113
+ )
114
+ if target_grain and target_grain.issubset(datasource.grain):
115
+
116
+ if all([x in all_lcl for x in target_grain.components]):
117
+ force_group = False
118
+ # if we are not returning the grain
119
+ # we have to group
120
+ else:
121
+ logger.info(
122
+ f"{padding(depth)}{LOGGER_PREFIX} not all grain components are in output {str(all_lcl)}, group to actual grain"
123
+ )
124
+ force_group = True
125
+ elif all([x in all_lcl for x in datasource.grain.components]):
126
+ logger.info(
127
+ f"{padding(depth)}{LOGGER_PREFIX} query output includes all grain components, no reason to group further"
128
+ )
129
+ force_group = False
130
+ else:
131
+ logger.info(
132
+ f"{padding(depth)}{LOGGER_PREFIX} target grain is not subset of datasource grain {datasource.grain}, required to group"
133
+ )
134
+ force_group = True
135
+
136
+ bcandidate: StrategyNode = SelectNode(
137
+ input_concepts=[c.concept for c in datasource.columns],
138
+ output_concepts=all_concepts,
139
+ environment=environment,
140
+ g=g,
141
+ parents=[],
142
+ depth=depth,
143
+ partial_concepts=[c for c in all_concepts if c in partial_lcl],
144
+ accept_partial=accept_partial,
145
+ datasource=datasource,
146
+ grain=Grain(components=all_concepts),
147
+ )
148
+ # we need to ntest the group node one further
149
+ if force_group is True:
150
+ candidate: StrategyNode = GroupNode(
151
+ output_concepts=all_concepts,
152
+ input_concepts=all_concepts,
153
+ environment=environment,
154
+ g=g,
155
+ parents=[bcandidate],
156
+ depth=depth,
157
+ partial_concepts=bcandidate.partial_concepts,
158
+ )
159
+ else:
160
+ candidate = bcandidate
161
+ logger.info(
162
+ f"{padding(depth)}{LOGGER_PREFIX} found select node with {datasource.identifier}, returning {candidate.output_lcl}"
163
+ )
164
+ candidates[datasource.identifier] = candidate
165
+ scores[datasource.identifier] = -len(partial_concepts)
166
+ if not candidates:
167
+ return None
168
+ final = max(candidates, key=lambda x: scores[x])
169
+ return candidates[final]
170
+
171
+
172
+ def gen_select_node(
173
+ concept: Concept,
174
+ local_optional: List[Concept],
175
+ environment: Environment,
176
+ g,
177
+ depth: int,
178
+ accept_partial: bool = False,
179
+ fail_if_not_found: bool = True,
180
+ accept_partial_optional: bool = True,
181
+ target_grain: Grain | None = None,
182
+ ) -> StrategyNode | None:
183
+ all_concepts = [concept] + local_optional
184
+ all_lcl = LooseConceptList(concepts=all_concepts)
185
+ materialized_lcl = LooseConceptList(
186
+ concepts=[
187
+ x
188
+ for x in all_concepts
189
+ if x.address in [z.address for z in environment.materialized_concepts]
190
+ or x.derivation == PurposeLineage.CONSTANT
191
+ ]
192
+ )
193
+ if not target_grain:
194
+ target_grain = Grain()
195
+ for ac in all_concepts:
196
+ target_grain += ac.grain
197
+ if materialized_lcl != all_lcl:
198
+ logger.info(
199
+ f"{padding(depth)}{LOGGER_PREFIX} Skipping select node generation for {concept.address} "
200
+ f" as it + optional (looking for all {all_lcl}) includes non-materialized concepts {all_lcl.difference(materialized_lcl)} vs materialized: {materialized_lcl}"
201
+ )
202
+ if fail_if_not_found:
203
+ raise NoDatasourceException(f"No datasource exists for {concept}")
204
+ return None
205
+
206
+ ds: StrategyNode | None = None
207
+
208
+ # attempt to select all concepts from table
209
+ ds = gen_select_node_from_table(
210
+ concept,
211
+ [concept] + local_optional,
212
+ g=g,
213
+ environment=environment,
214
+ depth=depth,
215
+ accept_partial=accept_partial,
216
+ target_grain=target_grain,
217
+ )
218
+ if ds:
219
+ logger.info(
220
+ f"{padding(depth)}{LOGGER_PREFIX} Found select node with all target concepts, force group is {ds.force_group}, target grain {target_grain}"
221
+ )
222
+ return ds
223
+ # if we cannot find a match
224
+ parents: List[StrategyNode] = []
225
+ found: List[Concept] = []
226
+ logger.info(
227
+ f"{padding(depth)}{LOGGER_PREFIX} looking for multiple sources that can satisfy"
228
+ )
229
+ all_found = False
230
+ for x in reversed(range(1, len(local_optional) + 1)):
231
+ if all_found:
232
+ break
233
+ for combo in combinations(local_optional, x):
234
+ if all_found:
235
+ break
236
+ # filter to just the original ones we need to get
237
+ local_combo = [x for x in combo if x not in found]
238
+ # skip if nothing new in this combo
239
+ if not local_combo:
240
+ continue
241
+ # include core concept as join
242
+ all_concepts = [concept, *local_combo]
243
+
244
+ ds = gen_select_node_from_table(
245
+ concept,
246
+ all_concepts,
247
+ g=g,
248
+ environment=environment,
249
+ depth=depth + 1,
250
+ accept_partial=accept_partial,
251
+ target_grain=Grain(components=all_concepts),
252
+ )
253
+ if ds:
254
+ logger.info(
255
+ f"{padding(depth)}{LOGGER_PREFIX} found a source with {[x.address for x in all_concepts]}"
256
+ )
257
+ parents.append(ds)
258
+ found += [x for x in ds.output_concepts if x != concept]
259
+ if {x.address for x in found} == {c.address for c in local_optional}:
260
+ logger.info(
261
+ f"{padding(depth)}{LOGGER_PREFIX} found all optional {[c.address for c in local_optional]}"
262
+ )
263
+ all_found = True
264
+ if parents and (all_found or accept_partial_optional):
265
+ if all_found:
266
+ logger.info(
267
+ f"{padding(depth)}{LOGGER_PREFIX} found all optional {[c.address for c in local_optional]} via joins"
268
+ )
269
+ else:
270
+ logger.info(
271
+ f"{padding(depth)}{LOGGER_PREFIX} found some optional {[c.address for c in found]}, returning"
272
+ )
273
+ all_partial = [
274
+ c
275
+ for c in all_concepts
276
+ if all(
277
+ [c.address in [x.address for x in p.partial_concepts] for p in parents]
278
+ )
279
+ ]
280
+ force_group = False
281
+ for candidate in parents:
282
+ if candidate.grain and not candidate.grain.issubset(target_grain):
283
+ force_group = True
284
+ if len(parents) == 1:
285
+ candidate = parents[0]
286
+ else:
287
+ candidate = MergeNode(
288
+ output_concepts=[concept] + found,
289
+ input_concepts=[concept] + found,
290
+ environment=environment,
291
+ g=g,
292
+ parents=parents,
293
+ depth=depth,
294
+ partial_concepts=all_partial,
295
+ grain=sum([x.grain for x in parents if x.grain], Grain()),
296
+ )
297
+ candidate.depth += 1
298
+ source_grain = candidate.grain
299
+ if force_group:
300
+ logger.info(
301
+ f"{padding(depth)}{LOGGER_PREFIX} datasource grain {source_grain} does not match target grain {target_grain} for select, adding group node"
302
+ )
303
+ return GroupNode(
304
+ output_concepts=candidate.output_concepts,
305
+ input_concepts=candidate.output_concepts,
306
+ environment=environment,
307
+ g=g,
308
+ parents=[candidate],
309
+ depth=depth,
310
+ partial_concepts=candidate.partial_concepts,
311
+ )
312
+ return candidate
313
+
314
+ if not accept_partial_optional:
315
+ return None
316
+ ds = gen_select_node_from_table(
317
+ concept,
318
+ [concept],
319
+ g=g,
320
+ environment=environment,
321
+ depth=depth,
322
+ accept_partial=accept_partial,
323
+ target_grain=Grain(components=[concept]),
324
+ )
325
+
326
+ if not ds and fail_if_not_found:
327
+ raise NoDatasourceException(f"No datasource exists for {concept}")
328
+ return ds
@@ -0,0 +1,37 @@
1
+ from typing import List
2
+
3
+
4
+ from trilogy.core.models import Concept, Function
5
+ from trilogy.core.processing.nodes import UnnestNode, History
6
+
7
+
8
+ def gen_unnest_node(
9
+ concept: Concept,
10
+ local_optional: List[Concept],
11
+ environment,
12
+ g,
13
+ depth: int,
14
+ source_concepts,
15
+ history: History | None = None,
16
+ ) -> UnnestNode | None:
17
+ arguments = []
18
+ if isinstance(concept.lineage, Function):
19
+ arguments = concept.lineage.concept_arguments
20
+ if arguments or local_optional:
21
+ parent = source_concepts(
22
+ mandatory_list=arguments + local_optional,
23
+ environment=environment,
24
+ g=g,
25
+ depth=depth + 1,
26
+ history=history,
27
+ )
28
+ if not parent:
29
+ return None
30
+ return UnnestNode(
31
+ unnest_concept=concept,
32
+ input_concepts=arguments + local_optional,
33
+ output_concepts=[concept] + local_optional,
34
+ environment=environment,
35
+ g=g,
36
+ parents=([parent] if (arguments or local_optional) else []),
37
+ )
@@ -0,0 +1,85 @@
1
+ from typing import List
2
+
3
+
4
+ from trilogy.core.models import Concept, WindowItem, Environment
5
+ from trilogy.utility import unique
6
+ from trilogy.core.processing.nodes import (
7
+ WindowNode,
8
+ )
9
+ from trilogy.core.processing.nodes import MergeNode, History
10
+
11
+ from trilogy.constants import logger
12
+ from trilogy.core.processing.utility import padding, create_log_lambda
13
+ from trilogy.core.processing.node_generators.common import (
14
+ gen_enrichment_node,
15
+ concept_to_relevant_joins,
16
+ )
17
+
18
+ LOGGER_PREFIX = "[GEN_WINDOW_NODE]"
19
+
20
+
21
+ def resolve_window_parent_concepts(concept: Concept) -> List[Concept]:
22
+ if not isinstance(concept.lineage, WindowItem):
23
+ raise ValueError
24
+ base = [concept.lineage.content]
25
+ if concept.lineage.over:
26
+ base += concept.lineage.over
27
+ if concept.lineage.order_by:
28
+ for item in concept.lineage.order_by:
29
+ base += [item.expr.output]
30
+ return unique(base, "address")
31
+
32
+
33
+ def gen_window_node(
34
+ concept: Concept,
35
+ local_optional: list[Concept],
36
+ environment: Environment,
37
+ g,
38
+ depth: int,
39
+ source_concepts,
40
+ history: History | None = None,
41
+ ) -> WindowNode | MergeNode | None:
42
+ parent_concepts = resolve_window_parent_concepts(concept)
43
+
44
+ parent_node = source_concepts(
45
+ mandatory_list=parent_concepts,
46
+ environment=environment,
47
+ g=g,
48
+ depth=depth + 1,
49
+ history=history,
50
+ )
51
+ if not parent_node:
52
+ logger.info(f"{padding(depth)}{LOGGER_PREFIX} window node parents unresolvable")
53
+ return None
54
+ _window_node = WindowNode(
55
+ input_concepts=parent_concepts,
56
+ output_concepts=[concept] + parent_concepts,
57
+ environment=environment,
58
+ g=g,
59
+ parents=[
60
+ parent_node,
61
+ ],
62
+ )
63
+ window_node = MergeNode(
64
+ parents=[_window_node],
65
+ environment=environment,
66
+ g=g,
67
+ input_concepts=_window_node.input_concepts,
68
+ output_concepts=_window_node.output_concepts,
69
+ grain=_window_node.grain,
70
+ force_group=False,
71
+ )
72
+ if not local_optional:
73
+ return window_node
74
+ logger.info(f"{padding(depth)}{LOGGER_PREFIX} group node requires enrichment")
75
+ return gen_enrichment_node(
76
+ window_node,
77
+ join_keys=concept_to_relevant_joins(parent_concepts),
78
+ local_optional=local_optional,
79
+ environment=environment,
80
+ g=g,
81
+ depth=depth,
82
+ source_concepts=source_concepts,
83
+ log_lambda=create_log_lambda(LOGGER_PREFIX, depth, logger),
84
+ history=history,
85
+ )
@@ -0,0 +1,76 @@
1
+ from .filter_node import FilterNode
2
+ from .group_node import GroupNode
3
+ from .merge_node import MergeNode
4
+ from .select_node_v2 import SelectNode, StaticSelectNode, ConstantNode
5
+ from .window_node import WindowNode
6
+ from .base_node import StrategyNode, NodeJoin
7
+ from .unnest_node import UnnestNode
8
+ from pydantic import BaseModel, Field, ConfigDict
9
+ from trilogy.core.models import Concept
10
+
11
+
12
+ class History(BaseModel):
13
+ history: dict[str, StrategyNode | None] = Field(default_factory=dict)
14
+ started: set[str] = Field(default_factory=set)
15
+ model_config = ConfigDict(arbitrary_types_allowed=True)
16
+
17
+ def _concepts_to_lookup(self, search: list[Concept], accept_partial: bool) -> str:
18
+ return "-".join([c.address for c in search]) + str(accept_partial)
19
+
20
+ def search_to_history(
21
+ self, search: list[Concept], accept_partial: bool, output: StrategyNode | None
22
+ ):
23
+ self.history[self._concepts_to_lookup(search, accept_partial)] = output
24
+
25
+ def get_history(
26
+ self,
27
+ search: list[Concept],
28
+ accept_partial: bool = False,
29
+ ) -> StrategyNode | None | bool:
30
+ return self.history.get(
31
+ self._concepts_to_lookup(
32
+ search,
33
+ accept_partial,
34
+ ),
35
+ False,
36
+ )
37
+
38
+ def log_start(
39
+ self,
40
+ search: list[Concept],
41
+ accept_partial: bool = False,
42
+ ):
43
+ self.started.add(
44
+ self._concepts_to_lookup(
45
+ search,
46
+ accept_partial,
47
+ )
48
+ )
49
+
50
+ def check_started(
51
+ self,
52
+ search: list[Concept],
53
+ accept_partial: bool = False,
54
+ ):
55
+ return (
56
+ self._concepts_to_lookup(
57
+ search,
58
+ accept_partial,
59
+ )
60
+ in self.started
61
+ )
62
+
63
+
64
+ __all__ = [
65
+ "FilterNode",
66
+ "GroupNode",
67
+ "MergeNode",
68
+ "SelectNode",
69
+ "StaticSelectNode",
70
+ "WindowNode",
71
+ "StrategyNode",
72
+ "NodeJoin",
73
+ "ConstantNode",
74
+ "UnnestNode",
75
+ "History",
76
+ ]