pytrilogy 0.3.142__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-313-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.142.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.142.dist-info/RECORD +200 -0
  6. pytrilogy-0.3.142.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +16 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2669 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +501 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +751 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1392 -0
  112. trilogy/dialect/bigquery.py +308 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +231 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +769 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +9 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/agent.py +41 -0
  148. trilogy/scripts/agent_info.py +303 -0
  149. trilogy/scripts/common.py +355 -0
  150. trilogy/scripts/dependency/Cargo.lock +617 -0
  151. trilogy/scripts/dependency/Cargo.toml +39 -0
  152. trilogy/scripts/dependency/README.md +131 -0
  153. trilogy/scripts/dependency/build.sh +25 -0
  154. trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
  155. trilogy/scripts/dependency/src/lib.rs +16 -0
  156. trilogy/scripts/dependency/src/main.rs +770 -0
  157. trilogy/scripts/dependency/src/parser.rs +435 -0
  158. trilogy/scripts/dependency/src/preql.pest +208 -0
  159. trilogy/scripts/dependency/src/python_bindings.rs +303 -0
  160. trilogy/scripts/dependency/src/resolver.rs +716 -0
  161. trilogy/scripts/dependency/tests/base.preql +3 -0
  162. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  163. trilogy/scripts/dependency/tests/customer.preql +6 -0
  164. trilogy/scripts/dependency/tests/main.preql +9 -0
  165. trilogy/scripts/dependency/tests/orders.preql +7 -0
  166. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  167. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  168. trilogy/scripts/dependency.py +323 -0
  169. trilogy/scripts/display.py +512 -0
  170. trilogy/scripts/environment.py +46 -0
  171. trilogy/scripts/fmt.py +32 -0
  172. trilogy/scripts/ingest.py +471 -0
  173. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  174. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  175. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  176. trilogy/scripts/ingest_helpers/typing.py +161 -0
  177. trilogy/scripts/init.py +105 -0
  178. trilogy/scripts/parallel_execution.py +713 -0
  179. trilogy/scripts/plan.py +189 -0
  180. trilogy/scripts/run.py +63 -0
  181. trilogy/scripts/serve.py +140 -0
  182. trilogy/scripts/serve_helpers/__init__.py +41 -0
  183. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  184. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  185. trilogy/scripts/serve_helpers/models.py +38 -0
  186. trilogy/scripts/single_execution.py +131 -0
  187. trilogy/scripts/testing.py +119 -0
  188. trilogy/scripts/trilogy.py +68 -0
  189. trilogy/std/__init__.py +0 -0
  190. trilogy/std/color.preql +3 -0
  191. trilogy/std/date.preql +13 -0
  192. trilogy/std/display.preql +18 -0
  193. trilogy/std/geography.preql +22 -0
  194. trilogy/std/metric.preql +15 -0
  195. trilogy/std/money.preql +67 -0
  196. trilogy/std/net.preql +14 -0
  197. trilogy/std/ranking.preql +7 -0
  198. trilogy/std/report.preql +5 -0
  199. trilogy/std/semantic.preql +6 -0
  200. trilogy/utility.py +34 -0
@@ -0,0 +1,190 @@
1
+ import random
2
+ from datetime import date, datetime
3
+ from typing import TYPE_CHECKING, Any, Iterable
4
+
5
+ from trilogy.core.enums import Purpose
6
+ from trilogy.core.models.author import Concept, ConceptRef
7
+ from trilogy.core.models.core import CONCRETE_TYPES, ArrayType, DataType, TraitDataType
8
+ from trilogy.core.models.datasource import Address, Datasource
9
+ from trilogy.core.models.environment import Environment
10
+ from trilogy.core.statements.execute import ProcessedMockStatement
11
+ from trilogy.dialect.results import MockResult
12
+
13
+ if TYPE_CHECKING:
14
+ from pyarrow import Table
15
+
16
+ DEFAULT_SCALE_FACTOR = 100
17
+
18
+
19
+ def safe_name(name: str) -> str:
20
+ return "".join(c if c.isalnum() or c == "_" else "_" for c in name)
21
+
22
+
23
+ def mock_email(scale_factor: int, is_key: bool = False) -> list[str]:
24
+ providers = ["example.com", "test.com", "mock.com", "sample.org"]
25
+ if is_key:
26
+ return [
27
+ f"user{i}@{providers[i % len(providers)]}"
28
+ for i in range(1, scale_factor + 1)
29
+ ]
30
+ return [
31
+ f"user{random.randint(1, 999999)}@{random.choice(providers)}"
32
+ for _ in range(scale_factor)
33
+ ]
34
+
35
+
36
+ def mock_hex_code(scale_factor: int, is_key: bool = False) -> list[str]:
37
+ if is_key:
38
+ return [f"#{i:06x}" for i in range(1, scale_factor + 1)]
39
+ return [f"#{random.randint(0, 0xFFFFFF):06x}" for _ in range(scale_factor)]
40
+
41
+
42
+ def mock_datatype(
43
+ full_type: Any, datatype: CONCRETE_TYPES, scale_factor: int, is_key: bool = False
44
+ ) -> list[Any]:
45
+ if isinstance(full_type, TraitDataType):
46
+ if full_type.type == DataType.STRING:
47
+ # TODO: get stdlib inventory some other way?
48
+ if full_type.traits == ["email_address"]:
49
+ # email mock function
50
+ return mock_email(scale_factor, is_key)
51
+ elif full_type.traits == ["hex"]:
52
+ return mock_hex_code(scale_factor, is_key)
53
+ return mock_datatype(full_type.type, full_type.type, scale_factor, is_key)
54
+ elif datatype == DataType.INTEGER:
55
+ if is_key:
56
+ # unique integers for keys
57
+ return list(range(1, scale_factor + 1))
58
+ return [random.randint(0, 999_999) for _ in range(scale_factor)]
59
+ elif datatype == DataType.STRING:
60
+ if is_key:
61
+ # unique strings for keys
62
+ return [f"key_{i}" for i in range(1, scale_factor + 1)]
63
+ return [
64
+ f"mock_string_{random.randint(0, 999_999)}" for _ in range(scale_factor)
65
+ ]
66
+ elif datatype == DataType.FLOAT:
67
+ if is_key:
68
+ # unique floats for keys
69
+ return [float(i) for i in range(1, scale_factor + 1)]
70
+ return [random.uniform(0, 999_999) for _ in range(scale_factor)]
71
+ elif datatype == DataType.NUMERIC:
72
+ if is_key:
73
+ # unique numerics for keys
74
+ return [float(i) for i in range(1, scale_factor + 1)]
75
+ return [round(random.uniform(0, 999_999), 2) for _ in range(scale_factor)]
76
+ elif datatype == DataType.BOOL:
77
+ # booleans can only have 2 unique values, so keys don't make sense here
78
+ return [random.choice([True, False]) for _ in range(scale_factor)]
79
+ elif datatype == DataType.DATE:
80
+ if is_key:
81
+ # unique dates for keys - spread across multiple months/years if needed
82
+ base_date = date(2023, 1, 1)
83
+ return [
84
+ date.fromordinal(base_date.toordinal() + i) for i in range(scale_factor)
85
+ ]
86
+ return [date(2023, 1, random.randint(1, 28)) for _ in range(scale_factor)]
87
+ elif datatype in (DataType.DATETIME, DataType.TIMESTAMP):
88
+ if is_key:
89
+ # unique datetimes for keys - increment by seconds
90
+ base_dt = datetime(2023, 1, 1, 0, 0, 0)
91
+ return [
92
+ datetime.fromtimestamp(base_dt.timestamp() + i)
93
+ for i in range(scale_factor)
94
+ ]
95
+ return [
96
+ datetime(
97
+ 2023,
98
+ 1,
99
+ 1,
100
+ random.randint(0, 23),
101
+ random.randint(0, 59),
102
+ random.randint(0, 59),
103
+ )
104
+ for _ in range(scale_factor)
105
+ ]
106
+ elif isinstance(datatype, ArrayType):
107
+ # arrays as keys don't typically make sense, but generate unique if requested
108
+ if is_key:
109
+ return [
110
+ [mock_datatype(datatype.type, datatype.value_data_type, 5, False)[0], i]
111
+ for i in range(scale_factor)
112
+ ]
113
+ return [
114
+ [mock_datatype(datatype.type, datatype.value_data_type, 5, False)]
115
+ for _ in range(scale_factor)
116
+ ]
117
+ raise NotImplementedError(f"Mocking not implemented for datatype {datatype}")
118
+
119
+
120
+ class MockManager:
121
+
122
+ def __init__(
123
+ self, environment: Environment, scale_factor: int = DEFAULT_SCALE_FACTOR
124
+ ):
125
+ self.environment = environment
126
+ self.concept_mocks: dict[str, Any] = {}
127
+ self.scale_factor = scale_factor
128
+
129
+ def mock_concept(self, concept: Concept | ConceptRef):
130
+ if concept.address in self.concept_mocks:
131
+ return False
132
+ concrete = self.environment.concepts[concept.address]
133
+ self.concept_mocks[concept.address] = mock_datatype(
134
+ concept.datatype,
135
+ concept.output_datatype,
136
+ self.scale_factor,
137
+ True if concrete.purpose == Purpose.KEY else False,
138
+ )
139
+ return True
140
+
141
+ def create_mock_table(
142
+ self, concepts: Iterable[Concept | ConceptRef], headers: list[str]
143
+ ) -> "Table":
144
+ from pyarrow import table
145
+
146
+ data = {h: self.concept_mocks[c.address] for h, c in zip(headers, concepts)}
147
+ return table(data)
148
+
149
+
150
+ def handle_processed_mock_statement(
151
+ query: ProcessedMockStatement, environment: Environment, executor
152
+ ) -> MockResult:
153
+ """Handle processed mock statements."""
154
+ # For mock statements, we can simulate some output based on targets
155
+ mock_manager = MockManager(environment)
156
+ output = []
157
+ for target in query.targets:
158
+ datasource = environment.datasources.get(target)
159
+ if not datasource:
160
+ raise ValueError(f"Datasource {target} not found in environment")
161
+ mock_datasource(datasource, mock_manager, executor)
162
+ output.append(
163
+ {
164
+ "target": target,
165
+ "status": "mocked",
166
+ }
167
+ )
168
+ return MockResult(output, ["target", "status"])
169
+
170
+
171
+ def mock_datasource(datasource: Datasource, manager: MockManager, executor):
172
+ concrete: list[ConceptRef] = []
173
+ headers: list[str] = []
174
+ for k, col in datasource.concrete_columns.items():
175
+ manager.mock_concept(col.concept)
176
+ concrete.append(col.concept)
177
+ headers.append(k)
178
+
179
+ table = manager.create_mock_table(concrete, headers)
180
+
181
+ # duckdb load the pyarrow table
182
+ executor.execute_raw_sql(
183
+ "register(:name, :tbl)", {"name": "mock_tbl", "tbl": table}
184
+ )
185
+ address = safe_name(datasource.safe_address)
186
+ executor.execute_raw_sql(
187
+ f"""CREATE OR REPLACE TABLE {address} AS SELECT * FROM mock_tbl"""
188
+ )
189
+ # overwrite the address since we've mangled the name
190
+ datasource.address = Address(location=address)
@@ -0,0 +1,117 @@
1
+ from typing import Any, Callable, Mapping
2
+
3
+ from jinja2 import Template
4
+
5
+ from trilogy.core.enums import DatePart, FunctionType, WindowType
6
+ from trilogy.dialect.base import BaseDialect
7
+
8
+
9
+ def date_diff(first: str, second: str, grain: DatePart) -> str:
10
+ grain = DatePart(grain)
11
+ if grain == DatePart.YEAR:
12
+ return f"date_part('year', {second}) - date_part('year', {first})"
13
+ elif grain == DatePart.MONTH:
14
+ return f"12 * {date_diff(first, second, DatePart.YEAR)} + date_part('month', {second}) - date_part('month', {first})"
15
+ elif grain == DatePart.DAY:
16
+ return f"date_part('day', {second} - {first})"
17
+ elif grain == DatePart.HOUR:
18
+ return f"{date_diff(first, second, DatePart.DAY)} *24 + date_part('hour', {second} - {first})"
19
+ elif grain == DatePart.MINUTE:
20
+ return f"{date_diff(first, second, DatePart.HOUR)} *60 + date_part('minute', {second} - {first})"
21
+ elif grain == DatePart.SECOND:
22
+ return f"{date_diff(first, second, DatePart.MINUTE)} *60 + date_part('second', {second} - {first})"
23
+ else:
24
+ raise NotImplementedError(f"Date diff not implemented for grain {grain}")
25
+
26
+
27
+ WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
28
+
29
+ FUNCTION_MAP = {
30
+ FunctionType.SPLIT: lambda x, types: f"string_to_array({x[0]}, {x[1]})",
31
+ FunctionType.DATE_TRUNCATE: lambda x, types: f"date_trunc('{x[1]}', {x[0]})",
32
+ FunctionType.DATE_ADD: lambda x, types: f"({x[0]} + INTERVAL '{x[2]} {x[1]}')",
33
+ FunctionType.DATE_PART: lambda x, types: f"date_part('{x[1]}', {x[0]})",
34
+ FunctionType.DATE_DIFF: lambda x, types: date_diff(x[0], x[1], x[2]),
35
+ FunctionType.IS_NULL: lambda x, types: f"{x[0]} IS NULL",
36
+ }
37
+
38
+ FUNCTION_GRAIN_MATCH_MAP = {
39
+ **FUNCTION_MAP,
40
+ FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
41
+ FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
42
+ FunctionType.SUM: lambda args, types: f"{args[0]}",
43
+ FunctionType.AVG: lambda args, types: f"{args[0]}",
44
+ }
45
+
46
+ PG_SQL_TEMPLATE = Template(
47
+ """{%- if output %}
48
+ DROP TABLE IF EXISTS {{ output.address.location }};
49
+ CREATE TABLE {{ output.address.location }} AS
50
+ {% endif %}{%- if ctes %}
51
+ WITH {% for cte in ctes %}
52
+ {{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
53
+ {%- if full_select -%}
54
+ {{full_select}}
55
+ {%- else -%}
56
+ SELECT
57
+ {%- for select in select_columns %}
58
+ {{ select }}{% if not loop.last %},{% endif %}{% endfor %}
59
+ {% if base %}FROM
60
+ {{ base }}{% endif %}{% if joins %}
61
+ {% for join in joins %}
62
+ {{ join }}
63
+ {% endfor %}{% endif %}
64
+ {% if where %}WHERE
65
+ {{ where }}
66
+ {% endif %}
67
+ {%- if group_by %}GROUP BY {% for group in group_by %}
68
+ {{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
69
+ HAVING
70
+ \t{{ having }}{% endif %}
71
+ {%- if order_by %}
72
+ ORDER BY {% for order in order_by %}
73
+ {{ order }}{% if not loop.last %},{% endif %}
74
+ {% endfor %}{% endif %}
75
+ {%- if limit is not none %}
76
+ LIMIT {{ limit }}{% endif %}{% endif %}
77
+ """
78
+ )
79
+
80
+ MAX_IDENTIFIER_LENGTH = 50
81
+
82
+
83
+ class PostgresDialect(BaseDialect):
84
+ WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
85
+ FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
86
+ FUNCTION_GRAIN_MATCH_MAP = {
87
+ **BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
88
+ **FUNCTION_GRAIN_MATCH_MAP,
89
+ }
90
+ QUOTE_CHARACTER = '"'
91
+ SQL_TEMPLATE = PG_SQL_TEMPLATE
92
+
93
+ def get_table_primary_keys(
94
+ self, executor, table_name: str, schema: str | None = None
95
+ ) -> list[str]:
96
+ """Uses pg_catalog for more reliable constraint information than information_schema."""
97
+ if schema:
98
+ pk_query = f"""
99
+ SELECT a.attname
100
+ FROM pg_index i
101
+ JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
102
+ WHERE i.indrelid = '{schema}.{table_name}'::regclass
103
+ AND i.indisprimary
104
+ ORDER BY a.attnum
105
+ """
106
+ else:
107
+ pk_query = f"""
108
+ SELECT a.attname
109
+ FROM pg_index i
110
+ JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
111
+ WHERE i.indrelid = '{table_name}'::regclass
112
+ AND i.indisprimary
113
+ ORDER BY a.attnum
114
+ """
115
+
116
+ rows = executor.execute_raw_sql(pk_query).fetchall()
117
+ return [row[0] for row in rows]
@@ -0,0 +1,110 @@
1
+ from typing import Any, Callable, Mapping
2
+
3
+ from jinja2 import Template
4
+
5
+ from trilogy.core.enums import FunctionType, GroupMode, UnnestMode, WindowType
6
+ from trilogy.core.models.core import DataType
7
+ from trilogy.dialect.base import BaseDialect
8
+
9
+ WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
10
+
11
+ FUNCTION_MAP = {
12
+ FunctionType.COUNT: lambda x, types: f"count({x[0]})",
13
+ FunctionType.SUM: lambda x, types: f"sum({x[0]})",
14
+ FunctionType.LENGTH: lambda x, types: f"length({x[0]})",
15
+ FunctionType.AVG: lambda x, types: f"avg({x[0]})",
16
+ FunctionType.INDEX_ACCESS: lambda x, types: f"element_at({x[0]},{x[1]})",
17
+ FunctionType.MAP_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
18
+ FunctionType.LIKE: lambda x, types: (
19
+ f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
20
+ ),
21
+ FunctionType.MINUTE: lambda x, types: f"EXTRACT(MINUTE from {x[0]})",
22
+ FunctionType.SECOND: lambda x, types: f"EXTRACT(SECOND from {x[0]})",
23
+ FunctionType.HOUR: lambda x, types: f"EXTRACT(HOUR from {x[0]})",
24
+ FunctionType.DAY_OF_WEEK: lambda x, types: f"EXTRACT(DAYOFWEEK from {x[0]})",
25
+ FunctionType.DAY: lambda x, types: f"EXTRACT(DAY from {x[0]})",
26
+ FunctionType.YEAR: lambda x, types: f"EXTRACT(YEAR from {x[0]})",
27
+ FunctionType.MONTH: lambda x, types: f"EXTRACT(MONTH from {x[0]})",
28
+ FunctionType.WEEK: lambda x, types: f"EXTRACT(WEEK from {x[0]})",
29
+ FunctionType.QUARTER: lambda x, types: f"EXTRACT(QUARTER from {x[0]})",
30
+ # math
31
+ FunctionType.DIVIDE: lambda x, types: f"{x[0]}/{x[1]}",
32
+ FunctionType.DATE_ADD: lambda x, types: f"DATE_ADD('{x[1]}', {x[2]}, {x[0]})",
33
+ FunctionType.CURRENT_DATE: lambda x, types: "CURRENT_DATE",
34
+ FunctionType.CURRENT_DATETIME: lambda x, types: "CURRENT_TIMESTAMP",
35
+ FunctionType.ARRAY: lambda x, types: f"ARRAY[{', '.join(x)}]",
36
+ # regex
37
+ FunctionType.REGEXP_CONTAINS: lambda x, types: f"REGEXP_LIKE({x[0]}, {x[1]})",
38
+ }
39
+
40
+ FUNCTION_GRAIN_MATCH_MAP = {
41
+ **FUNCTION_MAP,
42
+ FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
43
+ FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
44
+ FunctionType.SUM: lambda args, types: f"{args[0]}",
45
+ FunctionType.AVG: lambda args, types: f"{args[0]}",
46
+ }
47
+
48
+ SQL_TEMPLATE = Template(
49
+ """{%- if output %}
50
+ {{output}}
51
+ {% endif %}{%- if ctes %}
52
+ WITH {% for cte in ctes %}
53
+ {{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
54
+ {%- if full_select -%}
55
+ {{full_select}}
56
+ {%- else -%}
57
+ SELECT
58
+ {%- for select in select_columns %}
59
+ {{ select }}{% if not loop.last %},{% endif %}{% endfor %}
60
+ {% if base %}FROM
61
+ {{ base }}{% endif %}{% if joins %}
62
+ {% for join in joins %}
63
+ {{ join }}
64
+ {% endfor %}{% endif %}
65
+ {% if where %}WHERE
66
+ {{ where }}
67
+ {% endif %}
68
+ {%- if group_by %}GROUP BY {% for group in group_by %}
69
+ {{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
70
+ HAVING
71
+ \t{{ having }}{% endif %}
72
+ {%- if order_by %}
73
+ ORDER BY {% for order in order_by %}
74
+ {{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
75
+ {%- if limit is not none %}
76
+ LIMIT {{ limit }}{% endif %}{% endif %}
77
+ """
78
+ )
79
+ MAX_IDENTIFIER_LENGTH = 50
80
+
81
+
82
+ class PrestoDialect(BaseDialect):
83
+ WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
84
+ FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
85
+ FUNCTION_GRAIN_MATCH_MAP = {
86
+ **BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
87
+ **FUNCTION_GRAIN_MATCH_MAP,
88
+ }
89
+ QUOTE_CHARACTER = '"'
90
+ SQL_TEMPLATE = SQL_TEMPLATE
91
+ DATATYPE_MAP = {
92
+ **BaseDialect.DATATYPE_MAP,
93
+ DataType.NUMERIC: "DECIMAL",
94
+ DataType.STRING: "VARCHAR",
95
+ }
96
+ UNNEST_MODE = UnnestMode.PRESTO
97
+ GROUP_MODE = GroupMode.BY_INDEX
98
+ ALIAS_ORDER_REFERENCING_ALLOWED = (
99
+ False # some complex presto functions don't support aliasing
100
+ )
101
+
102
+ def get_table_primary_keys(
103
+ self, executor, table_name: str, schema: str | None = None
104
+ ) -> list[str]:
105
+ """Presto/Trino don't enforce PKs; rely on data-driven grain detection."""
106
+ return []
107
+
108
+
109
+ class TrinoDialect(PrestoDialect):
110
+ pass
@@ -0,0 +1,89 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, List
3
+
4
+ from trilogy.core.models.author import ConceptRef
5
+ from trilogy.engine import ResultProtocol
6
+
7
+
8
+ @dataclass
9
+ class MockResult(ResultProtocol):
10
+ values: list["MockResultRow"]
11
+ columns: list[str]
12
+
13
+ def __init__(self, values: list[Any], columns: list[str]):
14
+ processed: list[MockResultRow] = []
15
+ for x in values:
16
+ if isinstance(x, dict):
17
+ processed.append(MockResultRow(x))
18
+ elif isinstance(x, MockResultRow):
19
+ processed.append(x)
20
+ else:
21
+ raise ValueError(
22
+ f"Cannot process value of type {type(x)} in MockResult"
23
+ )
24
+ self.columns = columns
25
+ self.values = processed
26
+
27
+ def __iter__(self):
28
+ while self.values:
29
+ yield self.values.pop(0)
30
+
31
+ def fetchall(self):
32
+ return self.values
33
+
34
+ def fetchone(self):
35
+ if self.values:
36
+ return self.values.pop(0)
37
+ return None
38
+
39
+ def fetchmany(self, size: int):
40
+ rval = self.values[:size]
41
+ self.values = self.values[size:]
42
+ return rval
43
+
44
+ def keys(self):
45
+ return self.columns
46
+
47
+ def as_dict(self):
48
+ return [x.as_dict() if isinstance(x, MockResultRow) else x for x in self.values]
49
+
50
+
51
+ @dataclass
52
+ class MockResultRow:
53
+ _values: dict[str, Any]
54
+
55
+ def as_dict(self):
56
+ return self._values
57
+
58
+ def __str__(self) -> str:
59
+ return str(self._values)
60
+
61
+ def __repr__(self) -> str:
62
+ return repr(self._values)
63
+
64
+ def __getattr__(self, name: str) -> Any:
65
+ if name in self._values:
66
+ return self._values[name]
67
+ return super().__getattribute__(name)
68
+
69
+ def __getitem__(self, key: str) -> Any:
70
+ return self._values[key]
71
+
72
+ def __iter__(self):
73
+ return iter(self._values.values())
74
+
75
+ def values(self):
76
+ return self._values.values()
77
+
78
+ def keys(self):
79
+ return self._values.keys()
80
+
81
+
82
+ def generate_result_set(
83
+ columns: List[ConceptRef], output_data: list[Any]
84
+ ) -> MockResult:
85
+ """Generate a mock result set from columns and output data."""
86
+ names = [x.address.replace(".", "_") for x in columns]
87
+ return MockResult(
88
+ values=[dict(zip(names, [row])) for row in output_data], columns=names
89
+ )
@@ -0,0 +1,129 @@
1
+ from typing import Any, Callable, Mapping
2
+
3
+ from jinja2 import Template
4
+
5
+ from trilogy.core.enums import FunctionType, UnnestMode, WindowType
6
+ from trilogy.dialect.base import BaseDialect
7
+
8
+ ENV_SNOWFLAKE_PW = "PREQL_SNOWFLAKE_PW"
9
+ ENV_SNOWFLAKE_USER = "PREQL_SNOWFLAKE_USER"
10
+ ENV_SNOWFLAKE_ACCOUNT = "PREQL_SNOWFLAKE_ACCOUNT"
11
+
12
+ WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
13
+
14
+ FUNCTION_MAP = {
15
+ FunctionType.COUNT: lambda x, types: f"count({x[0]})",
16
+ FunctionType.SUM: lambda x, types: f"sum({x[0]})",
17
+ FunctionType.LENGTH: lambda x, types: f"length({x[0]})",
18
+ FunctionType.AVG: lambda x, types: f"avg({x[0]})",
19
+ FunctionType.LIKE: lambda x, types: (
20
+ f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
21
+ ),
22
+ FunctionType.MINUTE: lambda x, types: f"EXTRACT(MINUTE from {x[0]})",
23
+ FunctionType.SECOND: lambda x, types: f"EXTRACT(SECOND from {x[0]})",
24
+ FunctionType.HOUR: lambda x, types: f"EXTRACT(HOUR from {x[0]})",
25
+ FunctionType.DAY_OF_WEEK: lambda x, types: f"EXTRACT(DAYOFWEEK from {x[0]})",
26
+ FunctionType.DAY: lambda x, types: f"EXTRACT(DAY from {x[0]})",
27
+ FunctionType.YEAR: lambda x, types: f"EXTRACT(YEAR from {x[0]})",
28
+ FunctionType.MONTH: lambda x, types: f"EXTRACT(MONTH from {x[0]})",
29
+ FunctionType.WEEK: lambda x, types: f"EXTRACT(WEEK from {x[0]})",
30
+ FunctionType.QUARTER: lambda x, types: f"EXTRACT(QUARTER from {x[0]})",
31
+ # math
32
+ FunctionType.POWER: lambda x, types: f"POWER({x[0]}, {x[1]})",
33
+ FunctionType.DIVIDE: lambda x, types: f"DIV0({x[0]},{x[1]})",
34
+ FunctionType.UNNEST: lambda x, types: f"table(flatten({x[0]}))",
35
+ FunctionType.ARRAY: lambda x, types: f"ARRAY_CONSTRUCT({', '.join(x)})",
36
+ }
37
+
38
+ FUNCTION_GRAIN_MATCH_MAP = {
39
+ **FUNCTION_MAP,
40
+ FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
41
+ FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
42
+ FunctionType.SUM: lambda args, types: f"{args[0]}",
43
+ FunctionType.AVG: lambda args, types: f"{args[0]}",
44
+ }
45
+
46
+
47
+ SNOWFLAKE_SQL_TEMPLATE = Template(
48
+ """{%- if output %}
49
+ {{output}}
50
+ {% endif %}{%- if ctes %}
51
+ WITH {% if recursive%}RECURSIVE{% endif %}{% for cte in ctes %}
52
+ "{{cte.name}}" as ({{cte.statement}}){% if not loop.last %},{% endif %}{% else %}
53
+ {% endfor %}{% endif %}
54
+ {%- if full_select -%}
55
+ {{full_select}}
56
+ {%- else -%}
57
+
58
+ SELECT
59
+ {%- for select in select_columns %}
60
+ {{ select }}{% if not loop.last %},{% endif %}{% endfor %}
61
+ {% if base %}FROM
62
+ {{ base }}{% endif %}{% if joins %}{% for join in joins %}
63
+ {{ join }}{% endfor %}{% endif %}
64
+ {% if where %}WHERE
65
+ {{ where }}
66
+ {% endif %}
67
+ {%- if group_by %}GROUP BY {% for group in group_by %}
68
+ {{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
69
+ HAVING
70
+ \t{{ having }}{% endif %}
71
+ {%- if order_by %}
72
+ ORDER BY {% for order in order_by %}
73
+ {{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
74
+ {%- if limit is not none %}
75
+ LIMIT {{ limit }}{% endif %}{% endif %}
76
+ """
77
+ )
78
+ MAX_IDENTIFIER_LENGTH = 50
79
+
80
+
81
+ class SnowflakeDialect(BaseDialect):
82
+ WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
83
+ FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
84
+ FUNCTION_GRAIN_MATCH_MAP = {
85
+ **BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
86
+ **FUNCTION_GRAIN_MATCH_MAP,
87
+ }
88
+ QUOTE_CHARACTER = '"'
89
+ SQL_TEMPLATE = SNOWFLAKE_SQL_TEMPLATE
90
+ UNNEST_MODE = UnnestMode.SNOWFLAKE
91
+
92
+ def get_table_schema(
93
+ self, executor, table_name: str, schema: str | None = None
94
+ ) -> list[tuple]:
95
+ """Snowflake requires uppercase identifiers unless quoted."""
96
+ table_name_upper = table_name.upper()
97
+
98
+ column_query = f"""
99
+ SELECT
100
+ column_name,
101
+ data_type,
102
+ is_nullable,
103
+ comment as column_comment
104
+ FROM information_schema.columns
105
+ WHERE table_name = '{table_name_upper}'
106
+ """
107
+ if schema:
108
+ schema_upper = schema.upper()
109
+ column_query += f" AND table_schema = '{schema_upper}'"
110
+ column_query += " ORDER BY ordinal_position"
111
+
112
+ rows = executor.execute_raw_sql(column_query).fetchall()
113
+ return rows
114
+
115
+ def get_table_primary_keys(
116
+ self, executor, table_name: str, schema: str | None = None
117
+ ) -> list[str]:
118
+ """Uses SHOW PRIMARY KEYS; note Snowflake PKs are not enforced."""
119
+ table_name_upper = table_name.upper()
120
+
121
+ # Use SHOW PRIMARY KEYS command (column_name is at index 4)
122
+ if schema:
123
+ schema_upper = schema.upper()
124
+ pk_query = f"SHOW PRIMARY KEYS IN {schema_upper}.{table_name_upper}"
125
+ else:
126
+ pk_query = f"SHOW PRIMARY KEYS IN {table_name_upper}"
127
+
128
+ rows = executor.execute_raw_sql(pk_query).fetchall()
129
+ return [row[4] for row in rows] if rows else []