pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.148.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.148.dist-info/RECORD +206 -0
  6. pytrilogy-0.3.148.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2662 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +434 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +786 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1431 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +376 -0
  117. trilogy/dialect/enums.py +149 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/file_state_store.py +0 -0
  130. trilogy/execution/state/sqllite_state_store.py +0 -0
  131. trilogy/execution/state/state_store.py +301 -0
  132. trilogy/executor.py +656 -0
  133. trilogy/hooks/__init__.py +4 -0
  134. trilogy/hooks/base_hook.py +40 -0
  135. trilogy/hooks/graph_hook.py +135 -0
  136. trilogy/hooks/query_debugger.py +166 -0
  137. trilogy/metadata/__init__.py +0 -0
  138. trilogy/parser.py +10 -0
  139. trilogy/parsing/README.md +21 -0
  140. trilogy/parsing/__init__.py +0 -0
  141. trilogy/parsing/common.py +1069 -0
  142. trilogy/parsing/config.py +5 -0
  143. trilogy/parsing/exceptions.py +8 -0
  144. trilogy/parsing/helpers.py +1 -0
  145. trilogy/parsing/parse_engine.py +2863 -0
  146. trilogy/parsing/render.py +773 -0
  147. trilogy/parsing/trilogy.lark +544 -0
  148. trilogy/py.typed +0 -0
  149. trilogy/render.py +45 -0
  150. trilogy/scripts/README.md +9 -0
  151. trilogy/scripts/__init__.py +0 -0
  152. trilogy/scripts/agent.py +41 -0
  153. trilogy/scripts/agent_info.py +306 -0
  154. trilogy/scripts/common.py +430 -0
  155. trilogy/scripts/dependency/Cargo.lock +617 -0
  156. trilogy/scripts/dependency/Cargo.toml +39 -0
  157. trilogy/scripts/dependency/README.md +131 -0
  158. trilogy/scripts/dependency/build.sh +25 -0
  159. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  160. trilogy/scripts/dependency/src/lib.rs +16 -0
  161. trilogy/scripts/dependency/src/main.rs +770 -0
  162. trilogy/scripts/dependency/src/parser.rs +435 -0
  163. trilogy/scripts/dependency/src/preql.pest +208 -0
  164. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  165. trilogy/scripts/dependency/src/resolver.rs +716 -0
  166. trilogy/scripts/dependency/tests/base.preql +3 -0
  167. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  168. trilogy/scripts/dependency/tests/customer.preql +6 -0
  169. trilogy/scripts/dependency/tests/main.preql +9 -0
  170. trilogy/scripts/dependency/tests/orders.preql +7 -0
  171. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  172. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  173. trilogy/scripts/dependency.py +323 -0
  174. trilogy/scripts/display.py +555 -0
  175. trilogy/scripts/environment.py +59 -0
  176. trilogy/scripts/fmt.py +32 -0
  177. trilogy/scripts/ingest.py +472 -0
  178. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  179. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  180. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  181. trilogy/scripts/ingest_helpers/typing.py +161 -0
  182. trilogy/scripts/init.py +105 -0
  183. trilogy/scripts/parallel_execution.py +748 -0
  184. trilogy/scripts/plan.py +189 -0
  185. trilogy/scripts/refresh.py +106 -0
  186. trilogy/scripts/run.py +79 -0
  187. trilogy/scripts/serve.py +202 -0
  188. trilogy/scripts/serve_helpers/__init__.py +41 -0
  189. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  190. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  191. trilogy/scripts/serve_helpers/models.py +38 -0
  192. trilogy/scripts/single_execution.py +131 -0
  193. trilogy/scripts/testing.py +129 -0
  194. trilogy/scripts/trilogy.py +75 -0
  195. trilogy/std/__init__.py +0 -0
  196. trilogy/std/color.preql +3 -0
  197. trilogy/std/date.preql +13 -0
  198. trilogy/std/display.preql +18 -0
  199. trilogy/std/geography.preql +22 -0
  200. trilogy/std/metric.preql +15 -0
  201. trilogy/std/money.preql +67 -0
  202. trilogy/std/net.preql +14 -0
  203. trilogy/std/ranking.preql +7 -0
  204. trilogy/std/report.preql +5 -0
  205. trilogy/std/semantic.preql +6 -0
  206. trilogy/utility.py +34 -0
@@ -0,0 +1,472 @@
1
+ """Ingest command for Trilogy CLI - bootstraps datasources from warehouse tables."""
2
+
3
+ from datetime import datetime
4
+ from itertools import combinations
5
+ from pathlib import Path as PathlibPath
6
+ from typing import Any
7
+
8
+ from click import UNPROCESSED, Path, argument, option, pass_context
9
+ from click.exceptions import Exit
10
+
11
+ from trilogy.authoring import (
12
+ Address,
13
+ Comment,
14
+ ConceptDeclarationStatement,
15
+ DataType,
16
+ ImportStatement,
17
+ )
18
+ from trilogy.core.enums import Modifier, Purpose
19
+ from trilogy.core.models.author import Concept, Grain, Metadata
20
+ from trilogy.core.models.core import TraitDataType
21
+ from trilogy.core.models.datasource import ColumnAssignment, Datasource
22
+ from trilogy.dialect.enums import Dialects
23
+ from trilogy.executor import Executor
24
+ from trilogy.parsing.render import Renderer
25
+ from trilogy.scripts.common import (
26
+ create_executor,
27
+ find_trilogy_config,
28
+ get_runtime_config,
29
+ handle_execution_exception,
30
+ )
31
+ from trilogy.scripts.display import print_error, print_info, print_success
32
+ from trilogy.scripts.ingest_helpers.foreign_keys import (
33
+ apply_foreign_key_references,
34
+ parse_foreign_keys,
35
+ )
36
+ from trilogy.scripts.ingest_helpers.formatting import (
37
+ canonicalize_names,
38
+ )
39
+ from trilogy.scripts.ingest_helpers.typing import (
40
+ detect_rich_type,
41
+ infer_datatype_from_sql_type,
42
+ )
43
+
44
+
45
+ def _check_column_combination_uniqueness(
46
+ indices: list[int], sample_rows: list[tuple]
47
+ ) -> bool:
48
+ if not sample_rows:
49
+ return False
50
+
51
+ values = set()
52
+ for row in sample_rows:
53
+ # For single column, use scalar value; for multiple columns, use tuple
54
+ if len(indices) == 1:
55
+ value = row[indices[0]]
56
+ else:
57
+ value = tuple(row[idx] for idx in indices)
58
+
59
+ if value in values:
60
+ return False
61
+ values.add(value)
62
+
63
+ # Verify we have as many unique values as rows
64
+ return len(values) == len(sample_rows)
65
+
66
+
67
+ def detect_unique_key_combinations(
68
+ column_names: list[str], sample_rows: list[tuple], max_key_size: int = 3
69
+ ) -> list[list[str]]:
70
+ """Detect unique key combinations from sample data.
71
+
72
+ Returns a list of column combinations that uniquely identify rows,
73
+ ordered by size (smallest first).
74
+ """
75
+ if not sample_rows or not column_names:
76
+ return []
77
+
78
+ unique_combinations = []
79
+
80
+ # Try single columns first
81
+ for i, col_name in enumerate(column_names):
82
+ if _check_column_combination_uniqueness([i], sample_rows):
83
+ unique_combinations.append([col_name])
84
+
85
+ # If we found single-column keys, prefer those
86
+ if unique_combinations:
87
+ return unique_combinations
88
+
89
+ # Try combinations of 2+ columns
90
+ for size in range(2, max_key_size + 1):
91
+ for col_combination in combinations(enumerate(column_names), size):
92
+ indices = [idx for idx, _ in col_combination]
93
+ col_names = [name for _, name in col_combination]
94
+
95
+ if _check_column_combination_uniqueness(indices, sample_rows):
96
+ unique_combinations.append(col_names)
97
+
98
+ # If we found keys of this size, return them (prefer smaller keys)
99
+ if unique_combinations:
100
+ return unique_combinations
101
+
102
+ return unique_combinations
103
+
104
+
105
+ def detect_nullability_from_sample(column_index: int, sample_rows: list[tuple]) -> bool:
106
+ for row in sample_rows:
107
+ if row[column_index] is None:
108
+ return True
109
+ return False
110
+
111
+
112
+ def _process_column(
113
+ idx: int,
114
+ col: tuple[str, str, str | None, str | None],
115
+ grain_components: list[str],
116
+ sample_rows: list[tuple],
117
+ concept_mapping: dict[str, str],
118
+ ) -> tuple[Concept, ColumnAssignment, str | None]:
119
+
120
+ column_name = col[0]
121
+ data_type_str = col[1]
122
+ schema_is_nullable = col[2].upper() == "YES" if len(col) > 2 and col[2] else True
123
+ column_comment = col[3] if len(col) > 3 else None
124
+ # Apply prefix stripping if mapping provided
125
+ concept_name = concept_mapping[column_name]
126
+
127
+ # Infer Trilogy datatype
128
+ trilogy_type = infer_datatype_from_sql_type(data_type_str)
129
+
130
+ # Try to detect rich type
131
+ trait_import, trait_type_name = detect_rich_type(concept_name, trilogy_type)
132
+ if trait_import and trait_type_name:
133
+ final_datatype: TraitDataType | DataType = TraitDataType(
134
+ type=trilogy_type, traits=[trait_type_name]
135
+ )
136
+ print_info(f"Detected rich type for '{concept_name}': {trait_type_name}")
137
+ else:
138
+ final_datatype = trilogy_type
139
+ trait_import = None
140
+
141
+ # Determine purpose based on grain
142
+ if concept_name in grain_components or not grain_components:
143
+ purpose = Purpose.KEY
144
+ keys = set()
145
+ else:
146
+ purpose = Purpose.PROPERTY
147
+ keys = set(grain_components)
148
+
149
+ # Determine nullability: check sample data first, fall back to schema
150
+ if sample_rows:
151
+ has_nulls = detect_nullability_from_sample(idx, sample_rows)
152
+ else:
153
+ has_nulls = schema_is_nullable
154
+
155
+ # Get description from column comment if available
156
+ description = column_comment if column_comment and column_comment.strip() else None
157
+
158
+ # Create concept metadata if we have a description
159
+ metadata = Metadata()
160
+ if description:
161
+ metadata = Metadata(description=description)
162
+
163
+ # Create concept
164
+ modifiers = [Modifier.NULLABLE] if has_nulls else []
165
+
166
+ concept = Concept(
167
+ name=concept_name,
168
+ datatype=final_datatype,
169
+ purpose=purpose,
170
+ modifiers=modifiers,
171
+ metadata=metadata,
172
+ keys=keys,
173
+ )
174
+
175
+ # Create column assignment
176
+ column_assignment = ColumnAssignment(
177
+ alias=column_name, concept=concept.reference, modifiers=modifiers
178
+ )
179
+
180
+ return concept, column_assignment, trait_import
181
+
182
+
183
+ def create_datasource_from_table(
184
+ exec: Executor, table_name: str, schema: str | None = None, root: bool = False
185
+ ) -> tuple[Datasource, list[Concept], set[str]]:
186
+ """Create a Datasource object from a warehouse table.
187
+
188
+ Returns: (datasource, concepts, required_imports)
189
+ """
190
+
191
+ dialect = exec.generator
192
+
193
+ columns = dialect.get_table_schema(exec, table_name, schema)
194
+
195
+ if not columns:
196
+ print_error(f"No columns found for table {table_name}")
197
+ raise Exit(1)
198
+
199
+ # Build qualified table name
200
+ if schema:
201
+ qualified_name = f"{schema}.{table_name}"
202
+ else:
203
+ qualified_name = table_name
204
+
205
+ # Extract column names for grain detection
206
+ column_names = [col[0] for col in columns]
207
+
208
+ # Detect and strip common prefix from all column names BEFORE grain detection
209
+
210
+ column_concept_mapping = canonicalize_names(column_names)
211
+
212
+ # Detect unique key combinations from sample data
213
+ suggested_keys = []
214
+
215
+ # Normalize grain components to snake_case and apply prefix stripping
216
+ db_primary_keys = dialect.get_table_primary_keys(exec, table_name, schema)
217
+ # we always need sample rows for column detection, so fetch here to setup for later.
218
+ sample_rows = dialect.get_table_sample(exec, table_name, schema)
219
+ if db_primary_keys:
220
+ keys = db_primary_keys
221
+ print_info(f"Using primary key from database as grain: {db_primary_keys}")
222
+ else:
223
+ # Get sample data to detect grain and nullability
224
+ print_info(
225
+ f"Analyzing {len(sample_rows)} sample rows for grain and nullability detection"
226
+ )
227
+ suggested_keys = detect_unique_key_combinations(column_names, sample_rows)
228
+ if suggested_keys:
229
+ print_info(f"Detected potential unique key combinations: {suggested_keys}")
230
+ print_info(f"Using detected unique key as grain: {suggested_keys[0]}")
231
+ keys = suggested_keys[0]
232
+ else:
233
+ keys = []
234
+ print_info(
235
+ "No primary key or unique grain detected; defaulting to no grain"
236
+ )
237
+ grain_components = []
238
+ for key in keys:
239
+ stripped = column_concept_mapping.get(key, key)
240
+ grain_components.append(stripped)
241
+
242
+ # Track required imports for rich types
243
+ required_imports: set[str] = set()
244
+
245
+ # Create column assignments for each column
246
+ column_assignments = []
247
+ concepts: list[Concept] = []
248
+ for idx, col in enumerate(columns):
249
+ concept, column_assignment, rich_import = _process_column(
250
+ idx, col, grain_components, sample_rows, column_concept_mapping
251
+ )
252
+ concepts.append(concept)
253
+ column_assignments.append(column_assignment)
254
+ if rich_import:
255
+ required_imports.add(rich_import)
256
+
257
+ grain = Grain(components=set(grain_components)) if grain_components else Grain()
258
+
259
+ address = Address(location=qualified_name, quoted=True)
260
+
261
+ datasource = Datasource(
262
+ name=table_name.replace(".", "_"),
263
+ grain=grain,
264
+ columns=column_assignments,
265
+ address=address,
266
+ is_root=root,
267
+ )
268
+
269
+ return datasource, concepts, required_imports
270
+
271
+
272
+ @argument("tables", type=str)
273
+ @argument("dialect", type=str, required=False)
274
+ @option("--output", "-o", type=Path(), help="Output path for generated scripts")
275
+ @option("--schema", "-s", type=str, help="Schema/database to ingest from")
276
+ @option(
277
+ "--config", type=Path(exists=True), help="Path to trilogy.toml configuration file"
278
+ )
279
+ @option(
280
+ "--fks",
281
+ type=str,
282
+ help="Foreign key relationships in format: table.column:ref_table.column (comma-separated)",
283
+ )
284
+ @argument("conn_args", nargs=-1, type=UNPROCESSED)
285
+ @pass_context
286
+ def ingest(
287
+ ctx,
288
+ tables: str,
289
+ dialect: str | None,
290
+ output: str | None,
291
+ schema: str | None,
292
+ config,
293
+ fks: str | None,
294
+ conn_args,
295
+ ):
296
+ """Bootstrap one or more datasources from tables in your warehouse.
297
+
298
+ Connects to a warehouse and generates Trilogy datasource definitions
299
+ from existing tables.
300
+
301
+ Args:
302
+ tables: Comma-separated list of table names to ingest
303
+ dialect: Database dialect (e.g., duckdb, postgres, snowflake)
304
+ output: Output path for generated scripts
305
+ schema: Schema/database to ingest from
306
+ config: Path to trilogy.toml configuration file
307
+ fks: Foreign key relationships to establish
308
+ conn_args: Additional connection arguments
309
+ """
310
+ # Parse table names
311
+ table_list = [t.strip() for t in tables.split(",") if t.strip()]
312
+
313
+ if not table_list:
314
+ print_error("No tables specified")
315
+ raise Exit(1)
316
+
317
+ # Parse foreign keys
318
+ fk_map = parse_foreign_keys(fks) if fks else {}
319
+
320
+ # Determine output directory
321
+ if output:
322
+ output_dir = PathlibPath(output)
323
+ elif config:
324
+ config_path = PathlibPath(config)
325
+ output_dir = config_path.parent / "raw"
326
+ else:
327
+ found_config = find_trilogy_config()
328
+ if found_config:
329
+ output_dir = found_config.parent / "raw"
330
+ else:
331
+ output_dir = PathlibPath.cwd() / "raw"
332
+
333
+ # Create output directory if it doesn't exist
334
+ output_dir.mkdir(parents=True, exist_ok=True)
335
+
336
+ print_info(f"Ingesting tables: {', '.join(table_list)}")
337
+ print_info(f"Output directory: {output_dir}")
338
+
339
+ # Get runtime config
340
+ runtime_config = (
341
+ get_runtime_config(PathlibPath(config))
342
+ if config
343
+ else get_runtime_config(PathlibPath.cwd())
344
+ )
345
+
346
+ # Determine dialect
347
+ if dialect:
348
+ edialect = Dialects(dialect)
349
+ elif runtime_config.engine_dialect:
350
+ edialect = runtime_config.engine_dialect
351
+ else:
352
+ print_error(
353
+ "No dialect specified. Provide dialect as argument or set engine.dialect in config file."
354
+ )
355
+ raise Exit(1)
356
+
357
+ # Create executor
358
+ try:
359
+ exec = create_executor(
360
+ param=(),
361
+ directory=PathlibPath.cwd(),
362
+ conn_args=conn_args,
363
+ edialect=edialect,
364
+ debug=ctx.obj["DEBUG"],
365
+ config=runtime_config,
366
+ )
367
+ except Exception as e:
368
+ handle_execution_exception(e, debug=ctx.obj["DEBUG"])
369
+
370
+ # Ingest each table
371
+ ingested_files = []
372
+ ingested_data: dict[str, tuple[Datasource, list[Concept], set[str], list[Any]]] = {}
373
+ renderer = Renderer()
374
+ datasources = {}
375
+ for table_name in table_list:
376
+ print_info(f"Processing table: {table_name}")
377
+
378
+ try:
379
+ datasource, concepts, required_imports = create_datasource_from_table(
380
+ exec, table_name, schema, root=True
381
+ )
382
+
383
+ datasources[table_name] = datasource
384
+
385
+ # Build qualified table name
386
+ if schema:
387
+ qualified_name = f"{schema}.{table_name}"
388
+ else:
389
+ qualified_name = table_name
390
+
391
+ # Generate Trilogy script content
392
+ script_content: list[
393
+ Datasource | Comment | ConceptDeclarationStatement | ImportStatement
394
+ ] = []
395
+ script_content.append(
396
+ Comment(text=f"# Datasource ingested from {qualified_name}")
397
+ )
398
+ script_content.append(Comment(text=f"# Generated on {datetime.now()}"))
399
+
400
+ # Add imports for rich types if needed
401
+ if required_imports:
402
+ for import_path in sorted(required_imports):
403
+ # This doesn't matter, stdlib imports are resolved automatically from memory
404
+ file_path = import_path.replace(".", "/")
405
+ script_content.append(
406
+ ImportStatement(
407
+ input_path=import_path,
408
+ alias="", # No alias, direct import
409
+ path=PathlibPath(file_path),
410
+ )
411
+ )
412
+
413
+ # Add concept declarations
414
+ for concept in concepts:
415
+ script_content.append(ConceptDeclarationStatement(concept=concept))
416
+
417
+ # Add datasource
418
+ script_content.append(datasource)
419
+
420
+ # Store for FK processing
421
+ ingested_data[table_name] = (
422
+ datasource,
423
+ concepts,
424
+ required_imports,
425
+ script_content,
426
+ )
427
+
428
+ except Exception as e:
429
+ print_error(f"Failed to ingest {table_name}: {e}")
430
+ if ctx.obj["DEBUG"]:
431
+ import traceback
432
+
433
+ print_error(traceback.format_exc())
434
+ continue
435
+
436
+ # Write all ingested files, applying FK references where needed
437
+ if fk_map:
438
+ print_info("Processing foreign key relationships...")
439
+
440
+ for table_name, (
441
+ datasource,
442
+ concepts,
443
+ required_imports,
444
+ script_content,
445
+ ) in ingested_data.items():
446
+ output_file = output_dir / f"{datasource.name}.preql"
447
+
448
+ # Check if this table has FK relationships
449
+ if fk_map and table_name in fk_map:
450
+ column_mappings = fk_map[table_name]
451
+ modified_content = apply_foreign_key_references(
452
+ table_name, datasource, datasources, script_content, column_mappings
453
+ )
454
+ output_file.write_text(modified_content)
455
+ ingested_files.append(output_file)
456
+ print_success(f"Created {output_file} with FK references")
457
+ else:
458
+ # No FK references for this table, write as-is
459
+ output_file.write_text(renderer.render_statement_string(script_content))
460
+ ingested_files.append(output_file)
461
+ print_success(f"Created {output_file}")
462
+
463
+ # Close executor
464
+ exec.close()
465
+
466
+ if ingested_files:
467
+ print_success(
468
+ f"\nSuccessfully ingested {len(ingested_files)} table(s) to {output_dir}"
469
+ )
470
+ else:
471
+ print_error("No tables were successfully ingested")
472
+ raise Exit(1)
@@ -0,0 +1 @@
1
+ """Ingest helper modules for Trilogy CLI."""
@@ -0,0 +1,123 @@
1
+ from pathlib import Path
2
+
3
+ from trilogy.authoring import (
4
+ Comment,
5
+ ConceptDeclarationStatement,
6
+ Datasource,
7
+ ImportStatement,
8
+ )
9
+ from trilogy.core.validation.fix import (
10
+ DatasourceReferenceFix,
11
+ rewrite_file_with_reference_merges,
12
+ )
13
+ from trilogy.scripts.display import print_error, print_info
14
+
15
+
16
+ def parse_foreign_keys(fks_str: str | None) -> dict[str, dict[str, str]]:
17
+ if not fks_str:
18
+ return {}
19
+
20
+ fk_map: dict[str, dict[str, str]] = {}
21
+
22
+ for fk_spec in fks_str.split(","):
23
+ fk_spec = fk_spec.strip()
24
+ if not fk_spec:
25
+ continue
26
+
27
+ try:
28
+ source_part, target_part = fk_spec.split(":")
29
+ source_table, source_column = source_part.rsplit(".", 1)
30
+ target_table, target_column = target_part.rsplit(".", 1)
31
+
32
+ if source_table not in fk_map:
33
+ fk_map[source_table] = {}
34
+
35
+ # Store as column -> table.column mapping
36
+ fk_map[source_table][source_column] = f"{target_table}.{target_column}"
37
+
38
+ except ValueError:
39
+ from click.exceptions import Exit
40
+
41
+ print_error(f"Invalid FK specification: {fk_spec}")
42
+ print_error("Expected format: source_table.column:target_table.column")
43
+ raise Exit(1)
44
+
45
+ return fk_map
46
+
47
+
48
+ def apply_foreign_key_references(
49
+ table_name: str,
50
+ datasource: Datasource,
51
+ datasources: dict[str, Datasource],
52
+ script_content: list[
53
+ Datasource | Comment | ConceptDeclarationStatement | ImportStatement
54
+ ],
55
+ column_mappings: dict[str, str],
56
+ ) -> str:
57
+ fk_imports: set[str] = set()
58
+ reference_fixes: list[DatasourceReferenceFix] = []
59
+
60
+ for source_column, target_ref in column_mappings.items():
61
+ # Parse target reference: table.column
62
+ target_table, _ = target_ref.rsplit(".", 1)
63
+ target_datasource = datasources.get(target_table)
64
+ target_concept = None
65
+ if not target_datasource:
66
+ continue
67
+ # Find the concept for the target column
68
+ for col_assign in target_datasource.columns:
69
+ if col_assign.alias == target_ref.rsplit(".", 1)[1]:
70
+ target_concept = col_assign.concept
71
+ break
72
+
73
+ # Find the source column's concept address
74
+ source_concept = None
75
+ for col_assign in datasource.columns:
76
+ if col_assign.alias == source_column:
77
+ source_concept = col_assign.concept.address
78
+ break
79
+
80
+ if not source_concept:
81
+ print_error(f"Could not find column {source_column} in {table_name}")
82
+ continue
83
+
84
+ # Create the reference fix
85
+ if target_concept:
86
+ reference_fixes.append(
87
+ DatasourceReferenceFix(
88
+ datasource_identifier=datasource.identifier,
89
+ column_address=source_concept,
90
+ column_alias=source_column,
91
+ reference_concept=target_concept.reference.with_namespace(
92
+ target_table
93
+ ),
94
+ )
95
+ )
96
+
97
+ fk_imports.add(target_table)
98
+ print_info(f"Linking {table_name}.{source_column} -> {target_ref}")
99
+
100
+ # Add FK imports at the beginning (after comments)
101
+ if fk_imports:
102
+ # Find where to insert (after existing imports/comments)
103
+ insert_pos = 0
104
+ for i, stmt in enumerate(script_content):
105
+ if isinstance(stmt, (Comment, ImportStatement)):
106
+ insert_pos = i + 1
107
+ else:
108
+ break
109
+
110
+ # Add FK imports
111
+ for fk_import in sorted(fk_imports):
112
+ script_content.insert(
113
+ insert_pos,
114
+ ImportStatement(
115
+ input_path=fk_import,
116
+ alias=fk_import,
117
+ path=Path(fk_import),
118
+ ),
119
+ )
120
+ insert_pos += 1
121
+
122
+ # Apply reference fixes to update datasource
123
+ return rewrite_file_with_reference_merges(script_content, reference_fixes)
@@ -0,0 +1,93 @@
1
+ import re
2
+
3
+
4
+ def canonicolize_name(name: str) -> str:
5
+ """Convert a string to snake_case.
6
+
7
+ Handles CamelCase, PascalCase, and names with spaces/special chars.
8
+ """
9
+ # Handle spaces and special characters first
10
+ name = re.sub(r"[^\w\s]", "_", name)
11
+ name = re.sub(r"\s+", "_", name)
12
+
13
+ # Insert underscores before uppercase letters (for CamelCase)
14
+ name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
15
+ name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name)
16
+
17
+ # Convert to lowercase and remove duplicate underscores
18
+ name = name.lower()
19
+ name = re.sub(r"_+", "_", name)
20
+
21
+ # Remove leading/trailing underscores
22
+ return name.strip("_")
23
+
24
+
25
+ def find_common_prefix(names: list[str]) -> str:
26
+ """Find the common prefix shared by all names in a list.
27
+
28
+ The prefix is determined by finding the longest common substring
29
+ that ends with an underscore (or is followed by an underscore in all names).
30
+
31
+ Args:
32
+ names: List of names to analyze
33
+
34
+ Returns:
35
+ The common prefix (including trailing underscore), or empty string if none found
36
+ """
37
+ if not names or len(names) < 2:
38
+ return ""
39
+
40
+ # Normalize all to lowercase for comparison
41
+ normalized = [name.lower() for name in names]
42
+
43
+ # Start with the first name as potential prefix
44
+ prefix = normalized[0]
45
+
46
+ # Find common prefix across all names
47
+ for name in normalized[1:]:
48
+ # Find where they start to differ
49
+ i = 0
50
+ while i < len(prefix) and i < len(name) and prefix[i] == name[i]:
51
+ i += 1
52
+ prefix = prefix[:i]
53
+
54
+ if not prefix:
55
+ return ""
56
+
57
+ # Find the last underscore in the common prefix
58
+ last_underscore = prefix.rfind("_")
59
+
60
+ # Only consider it a valid prefix if:
61
+ # 1. There's an underscore
62
+ # 2. The prefix is at least 2 characters (excluding the underscore)
63
+ # 3. All names have content after the prefix
64
+ if last_underscore > 0:
65
+ candidate_prefix = prefix[: last_underscore + 1]
66
+ # Check that all names have content after this prefix
67
+ if all(len(name) > len(candidate_prefix) for name in normalized):
68
+ return candidate_prefix
69
+
70
+ return ""
71
+
72
+
73
+ def canonicalize_names(names: list[str]) -> dict[str, str]:
74
+ if not names:
75
+ return {}
76
+
77
+ common_prefix = find_common_prefix(names)
78
+
79
+ if not common_prefix:
80
+ # No common prefix, return names as-is
81
+ return {name: canonicolize_name(name) for name in names}
82
+
83
+ # Strip the prefix and normalize to snake_case
84
+ result = {}
85
+ for name in names:
86
+ # Remove the prefix (case-insensitive)
87
+ if name.lower().startswith(common_prefix):
88
+ stripped = name[len(common_prefix) :]
89
+ else:
90
+ stripped = name
91
+ result[name] = canonicolize_name(stripped)
92
+
93
+ return result