pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (76) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1859 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +701 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +178 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +55 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +18 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +70 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +148 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/__init__.py +4 -0
  65. trilogy/hooks/base_hook.py +6 -4
  66. trilogy/hooks/query_debugger.py +113 -97
  67. trilogy/parser.py +1 -1
  68. trilogy/parsing/common.py +307 -64
  69. trilogy/parsing/parse_engine.py +277 -618
  70. trilogy/parsing/render.py +50 -26
  71. trilogy/scripts/trilogy.py +2 -1
  72. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  73. trilogy/core/models.py +0 -4960
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
  76. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
trilogy/executor.py CHANGED
@@ -7,29 +7,29 @@ from sqlalchemy import text
7
7
  from sqlalchemy.engine import CursorResult, Engine
8
8
 
9
9
  from trilogy.constants import logger
10
- from trilogy.core.enums import Granularity, IOType
11
- from trilogy.core.models import (
12
- Concept,
10
+ from trilogy.core.enums import FunctionType, Granularity, IOType
11
+ from trilogy.core.models.author import Concept, Function
12
+ from trilogy.core.models.build import BuildConcept, BuildFunction
13
+ from trilogy.core.models.core import ListWrapper, MapWrapper
14
+ from trilogy.core.models.datasource import Datasource
15
+ from trilogy.core.models.environment import Environment
16
+ from trilogy.core.statements.author import (
13
17
  ConceptDeclarationStatement,
14
18
  CopyStatement,
15
- Datasource,
16
- Environment,
17
- Function,
18
- FunctionType,
19
19
  ImportStatement,
20
- ListWrapper,
21
- MapWrapper,
22
20
  MergeStatementV2,
23
21
  MultiSelectStatement,
24
22
  PersistStatement,
23
+ RawSQLStatement,
24
+ SelectStatement,
25
+ ShowStatement,
26
+ )
27
+ from trilogy.core.statements.execute import (
25
28
  ProcessedCopyStatement,
26
29
  ProcessedQuery,
27
30
  ProcessedQueryPersist,
28
31
  ProcessedRawSQLStatement,
29
32
  ProcessedShowStatement,
30
- RawSQLStatement,
31
- SelectStatement,
32
- ShowStatement,
33
33
  )
34
34
  from trilogy.dialect.base import BaseDialect
35
35
  from trilogy.dialect.enums import Dialects
@@ -58,7 +58,9 @@ class MockResult:
58
58
  return self.columns
59
59
 
60
60
 
61
- def generate_result_set(columns: List[Concept], output_data: list[Any]) -> MockResult:
61
+ def generate_result_set(
62
+ columns: List[BuildConcept], output_data: list[Any]
63
+ ) -> MockResult:
62
64
  names = [x.address.replace(".", "_") for x in columns]
63
65
  return MockResult(
64
66
  values=[dict(zip(names, [row])) for row in output_data], columns=names
@@ -394,6 +396,32 @@ class Executor(object):
394
396
  if persist and isinstance(x, ProcessedQueryPersist):
395
397
  self.environment.add_datasource(x.datasource)
396
398
 
399
+ def _concept_to_value(
400
+ self,
401
+ concept: Concept,
402
+ local_concepts: dict[str, Concept] | None = None,
403
+ ) -> Any:
404
+ if not concept.granularity == Granularity.SINGLE_ROW:
405
+ raise SyntaxError(f"Cannot bind non-singleton concept {concept.address}")
406
+ # TODO: to get rid of function here - need to figure out why it's getting passed in
407
+ if (
408
+ isinstance(concept.lineage, (BuildFunction, Function))
409
+ and concept.lineage.operator == FunctionType.CONSTANT
410
+ ):
411
+ rval = concept.lineage.arguments[0]
412
+ if isinstance(rval, ListWrapper):
413
+ return [x for x in rval]
414
+ if isinstance(rval, MapWrapper):
415
+ return {k: v for k, v in rval.items()}
416
+ # if isinstance(rval, ConceptRef):
417
+ # return self._concept_to_value(self.environment.concepts[rval.address], local_concepts=local_concepts)
418
+ return rval
419
+ else:
420
+ results = self.execute_query(f"select {concept.name} limit 1;").fetchone()
421
+ if not results:
422
+ return None
423
+ return results[0]
424
+
397
425
  def _hydrate_param(
398
426
  self, param: str, local_concepts: dict[str, Concept] | None = None
399
427
  ) -> Any:
@@ -412,23 +440,7 @@ class Executor(object):
412
440
  raise SyntaxError(f"No concept found for parameter {param}")
413
441
 
414
442
  concept: Concept = matched.pop()
415
- if not concept.granularity == Granularity.SINGLE_ROW:
416
- raise SyntaxError(f"Cannot bind non-singleton concept {concept.address}")
417
- if (
418
- isinstance(concept.lineage, Function)
419
- and concept.lineage.operator == FunctionType.CONSTANT
420
- ):
421
- rval = concept.lineage.arguments[0]
422
- if isinstance(rval, ListWrapper):
423
- return [x for x in rval]
424
- if isinstance(rval, MapWrapper):
425
- return {k: v for k, v in rval.items()}
426
- return rval
427
- else:
428
- results = self.execute_query(f"select {concept.name} limit 1;").fetchone()
429
- if not results:
430
- return None
431
- return results[0]
443
+ return self._concept_to_value(concept, local_concepts=local_concepts)
432
444
 
433
445
  def execute_raw_sql(
434
446
  self,
@@ -437,7 +449,7 @@ class Executor(object):
437
449
  local_concepts: dict[str, Concept] | None = None,
438
450
  ) -> CursorResult:
439
451
  """Run a command against the raw underlying
440
- execution engine"""
452
+ execution engine."""
441
453
  final_params = None
442
454
  q = text(command)
443
455
  if variables:
@@ -459,7 +471,7 @@ class Executor(object):
459
471
  def execute_text(
460
472
  self, command: str, non_interactive: bool = False
461
473
  ) -> List[CursorResult]:
462
- """Run a preql text command"""
474
+ """Run a trilogy query expressed as text."""
463
475
  output = []
464
476
  # connection = self.engine.connect()
465
477
  for statement in self.parse_text_generator(command):
trilogy/hooks/__init__.py CHANGED
@@ -0,0 +1,4 @@
1
+ from trilogy.hooks.graph_hook import GraphHook
2
+ from trilogy.hooks.query_debugger import DebuggingHook
3
+
4
+ __all__ = ["DebuggingHook", "GraphHook"]
@@ -1,13 +1,15 @@
1
- from trilogy.core.models import (
1
+ from trilogy.core.models.execute import (
2
2
  CTE,
3
+ QueryDatasource,
4
+ UnionCTE,
5
+ )
6
+ from trilogy.core.processing.nodes import StrategyNode
7
+ from trilogy.core.statements.author import (
3
8
  MultiSelectStatement,
4
9
  PersistStatement,
5
- QueryDatasource,
6
10
  RowsetDerivationStatement,
7
11
  SelectStatement,
8
- UnionCTE,
9
12
  )
10
- from trilogy.core.processing.nodes import StrategyNode
11
13
 
12
14
 
13
15
  class BaseHook:
@@ -1,17 +1,17 @@
1
1
  from enum import Enum
2
2
  from logging import DEBUG, StreamHandler
3
3
  from typing import Union
4
+ from uuid import uuid4
4
5
 
5
6
  from trilogy.constants import logger
6
- from trilogy.core.models import (
7
+ from trilogy.core.models.build import BuildDatasource
8
+ from trilogy.core.models.execute import (
7
9
  CTE,
8
- Datasource,
9
10
  QueryDatasource,
10
- SelectStatement,
11
11
  UnionCTE,
12
12
  )
13
13
  from trilogy.core.processing.nodes import StrategyNode
14
- from trilogy.dialect.bigquery import BigqueryDialect
14
+ from trilogy.core.statements.author import SelectStatement
15
15
  from trilogy.hooks.base_hook import BaseHook
16
16
 
17
17
 
@@ -21,93 +21,6 @@ class PrintMode(Enum):
21
21
  FULL = 3
22
22
 
23
23
 
24
- renderer = BigqueryDialect()
25
-
26
-
27
- def print_recursive_resolved(
28
- input: Union[QueryDatasource, Datasource], mode: PrintMode, depth: int = 0
29
- ):
30
- extra = []
31
- if isinstance(input, QueryDatasource):
32
- if input.joins:
33
- extra.append("join")
34
- if input.condition:
35
- extra.append("filter")
36
- if input.group_required:
37
- extra.append("group")
38
- output = [c.address for c in input.output_concepts[:3]]
39
- if len(input.output_concepts) > 3:
40
- output.append("...")
41
- display = [
42
- (
43
- " " * depth,
44
- input.__class__.__name__,
45
- "<",
46
- ",".join(extra),
47
- ">",
48
- # [c.address for c in input.input_concepts],
49
- "->",
50
- output,
51
- )
52
- ]
53
- if isinstance(input, QueryDatasource):
54
- for child in input.datasources:
55
- display += print_recursive_resolved(child, mode=mode, depth=depth + 1)
56
- return display
57
-
58
-
59
- def print_recursive_nodes(
60
- input: StrategyNode, mode: PrintMode = PrintMode.BASIC, depth: int = 0
61
- ):
62
- resolved = input.resolve()
63
- if mode == PrintMode.FULL:
64
- display = [
65
- [
66
- " " * depth,
67
- input,
68
- "->",
69
- resolved.grain,
70
- "->",
71
- [c.address for c in resolved.output_concepts],
72
- ]
73
- ]
74
- elif mode == PrintMode.BASIC:
75
- display = [
76
- [
77
- " " * depth,
78
- input,
79
- "->",
80
- resolved.grain,
81
- ]
82
- ]
83
- for child in input.parents:
84
- display += print_recursive_nodes(
85
- child,
86
- mode=mode,
87
- depth=depth + 1,
88
- )
89
- return display
90
-
91
-
92
- def print_recursive_ctes(
93
- input: CTE | UnionCTE, depth: int = 0, max_depth: int | None = None
94
- ):
95
- if max_depth and depth > max_depth:
96
- return
97
- select_statement = [c.address for c in input.output_columns]
98
- print(" " * depth, input.name, "->", input.group_to_grain, "->", select_statement)
99
- sql = renderer.render_cte(input).statement
100
- for line in sql.split("\n"):
101
- logger.debug(" " * (depth) + line)
102
- if isinstance(input, CTE):
103
- for child in input.parent_ctes:
104
- print_recursive_ctes(child, depth + 1)
105
- elif isinstance(input, UnionCTE):
106
- for child in input.parent_ctes:
107
- for parent in child.parent_ctes:
108
- print_recursive_ctes(parent, depth + 1)
109
-
110
-
111
24
  class DebuggingHook(BaseHook):
112
25
  def __init__(
113
26
  self,
@@ -127,24 +40,127 @@ class DebuggingHook(BaseHook):
127
40
  self.process_nodes = PrintMode(process_nodes)
128
41
  self.process_datasources = PrintMode(process_datasources)
129
42
  self.process_other = PrintMode(process_other)
43
+ self.messages: list[str] = []
44
+ self.uuid = uuid4()
45
+ from trilogy.dialect.bigquery import BigqueryDialect
46
+
47
+ self.renderer = BigqueryDialect()
48
+
49
+ def print(self, *args):
50
+ merged = " ".join([str(x) for x in args])
51
+ self.messages.append(merged)
52
+
53
+ def write(self):
54
+ with open(f"debug_{self.uuid}.log", "w") as f:
55
+ f.write("\n".join(self.messages))
130
56
 
131
57
  def process_select_info(self, select: SelectStatement):
132
58
  if self.process_datasources != PrintMode.OFF:
133
- print(f"grain: {str(select.grain)}")
59
+ self.print(f"grain: {str(select.grain)}")
134
60
 
135
61
  def process_root_datasource(self, datasource: QueryDatasource):
136
62
  if self.process_datasources != PrintMode.OFF:
137
- printed = print_recursive_resolved(datasource, self.process_datasources)
63
+ printed = self.print_recursive_resolved(
64
+ datasource, self.process_datasources
65
+ )
138
66
  for row in printed:
139
- print("".join([str(v) for v in row]))
67
+ self.print("".join([str(v) for v in row]))
140
68
 
141
69
  def process_root_cte(self, cte: CTE | UnionCTE):
142
70
  if self.process_ctes != PrintMode.OFF:
143
- print_recursive_ctes(cte, max_depth=self.max_depth)
71
+ self.print_recursive_ctes(cte, max_depth=self.max_depth)
144
72
 
145
73
  def process_root_strategy_node(self, node: StrategyNode):
146
74
  if self.process_nodes != PrintMode.OFF:
147
- printed = print_recursive_nodes(node, mode=self.process_nodes)
75
+ printed = self.print_recursive_nodes(node, mode=self.process_nodes)
148
76
  for row in printed:
149
77
  # logger.info("".join([str(v) for v in row]))
150
- print("".join([str(v) for v in row]))
78
+ self.print("".join([str(v) for v in row]))
79
+
80
+ def print_recursive_resolved(
81
+ self,
82
+ input: Union[QueryDatasource, BuildDatasource],
83
+ mode: PrintMode,
84
+ depth: int = 0,
85
+ ):
86
+ extra = []
87
+ if isinstance(input, QueryDatasource):
88
+ if input.joins:
89
+ extra.append("join")
90
+ if input.condition:
91
+ extra.append("filter")
92
+ if input.group_required:
93
+ extra.append("group")
94
+ output = [c.address for c in input.output_concepts[:3]]
95
+ if len(input.output_concepts) > 3:
96
+ output.append("...")
97
+ display = [
98
+ (
99
+ " " * depth,
100
+ input.__class__.__name__,
101
+ "<",
102
+ ",".join(extra),
103
+ ">",
104
+ # [c.address for c in input.input_concepts],
105
+ "->",
106
+ output,
107
+ )
108
+ ]
109
+ if isinstance(input, QueryDatasource):
110
+ for child in input.datasources:
111
+ display += self.print_recursive_resolved(
112
+ child, mode=mode, depth=depth + 1
113
+ )
114
+ return display
115
+
116
+ def print_recursive_ctes(
117
+ self, input: CTE | UnionCTE, depth: int = 0, max_depth: int | None = None
118
+ ):
119
+ if max_depth and depth > max_depth:
120
+ return
121
+ select_statement = [c.address for c in input.output_columns]
122
+ self.print(
123
+ " " * depth, input.name, "->", input.group_to_grain, "->", select_statement
124
+ )
125
+ sql = self.renderer.render_cte(input).statement
126
+ for line in sql.split("\n"):
127
+ logger.debug(" " * (depth) + line)
128
+ if isinstance(input, CTE):
129
+ for child in input.parent_ctes:
130
+ self.print_recursive_ctes(child, depth + 1)
131
+ elif isinstance(input, UnionCTE):
132
+ for child in input.parent_ctes:
133
+ for parent in child.parent_ctes:
134
+ self.print_recursive_ctes(parent, depth + 1)
135
+
136
+ def print_recursive_nodes(
137
+ self, input: StrategyNode, mode: PrintMode = PrintMode.BASIC, depth: int = 0
138
+ ):
139
+ resolved = input.resolve()
140
+ if mode == PrintMode.FULL:
141
+ display = [
142
+ [
143
+ " " * depth,
144
+ input,
145
+ "->",
146
+ resolved.grain,
147
+ "->",
148
+ [c.address for c in resolved.output_concepts],
149
+ ]
150
+ ]
151
+ elif mode == PrintMode.BASIC:
152
+ display = [
153
+ [
154
+ " " * depth,
155
+ input,
156
+ "->",
157
+ resolved.grain,
158
+ ]
159
+ ]
160
+ for child in input.parents:
161
+ display += self.print_recursive_nodes(
162
+ child,
163
+ mode=mode,
164
+ depth=depth + 1,
165
+ )
166
+ return display
trilogy/parser.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from typing import Optional
2
2
 
3
- from trilogy.core.models import Environment
3
+ from trilogy.core.models.environment import Environment
4
4
  from trilogy.parsing.parse_engine import parse_text
5
5
 
6
6