pytrilogy 0.3.142__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-313-x86_64-linux-gnu.so +0 -0
- pytrilogy-0.3.142.dist-info/METADATA +555 -0
- pytrilogy-0.3.142.dist-info/RECORD +200 -0
- pytrilogy-0.3.142.dist-info/WHEEL +5 -0
- pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +16 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +113 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +443 -0
- trilogy/core/env_processor.py +120 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1227 -0
- trilogy/core/graph_models.py +139 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2669 -0
- trilogy/core/models/build.py +2521 -0
- trilogy/core/models/build_environment.py +180 -0
- trilogy/core/models/core.py +501 -0
- trilogy/core/models/datasource.py +322 -0
- trilogy/core/models/environment.py +751 -0
- trilogy/core/models/execute.py +1177 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +268 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +205 -0
- trilogy/core/processing/node_generators/node_merge_node.py +653 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +748 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +519 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +596 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1392 -0
- trilogy/dialect/bigquery.py +308 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +144 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +231 -0
- trilogy/dialect/enums.py +147 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/config.py +75 -0
- trilogy/executor.py +568 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +139 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2813 -0
- trilogy/parsing/render.py +769 -0
- trilogy/parsing/trilogy.lark +540 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +42 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +303 -0
- trilogy/scripts/common.py +355 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +303 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +512 -0
- trilogy/scripts/environment.py +46 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +471 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +713 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/run.py +63 -0
- trilogy/scripts/serve.py +140 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +119 -0
- trilogy/scripts/trilogy.py +68 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
"""Ingest command for Trilogy CLI - bootstraps datasources from warehouse tables."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from itertools import combinations
|
|
5
|
+
from pathlib import Path as PathlibPath
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from click import UNPROCESSED, Path, argument, option, pass_context
|
|
9
|
+
from click.exceptions import Exit
|
|
10
|
+
|
|
11
|
+
from trilogy.authoring import (
|
|
12
|
+
Address,
|
|
13
|
+
Comment,
|
|
14
|
+
ConceptDeclarationStatement,
|
|
15
|
+
DataType,
|
|
16
|
+
ImportStatement,
|
|
17
|
+
)
|
|
18
|
+
from trilogy.core.enums import Modifier, Purpose
|
|
19
|
+
from trilogy.core.models.author import Concept, Grain, Metadata
|
|
20
|
+
from trilogy.core.models.core import TraitDataType
|
|
21
|
+
from trilogy.core.models.datasource import ColumnAssignment, Datasource
|
|
22
|
+
from trilogy.dialect.enums import Dialects
|
|
23
|
+
from trilogy.executor import Executor
|
|
24
|
+
from trilogy.parsing.render import Renderer
|
|
25
|
+
from trilogy.scripts.common import (
|
|
26
|
+
create_executor,
|
|
27
|
+
find_trilogy_config,
|
|
28
|
+
get_runtime_config,
|
|
29
|
+
handle_execution_exception,
|
|
30
|
+
)
|
|
31
|
+
from trilogy.scripts.display import print_error, print_info, print_success
|
|
32
|
+
from trilogy.scripts.ingest_helpers.foreign_keys import (
|
|
33
|
+
apply_foreign_key_references,
|
|
34
|
+
parse_foreign_keys,
|
|
35
|
+
)
|
|
36
|
+
from trilogy.scripts.ingest_helpers.formatting import (
|
|
37
|
+
canonicalize_names,
|
|
38
|
+
)
|
|
39
|
+
from trilogy.scripts.ingest_helpers.typing import (
|
|
40
|
+
detect_rich_type,
|
|
41
|
+
infer_datatype_from_sql_type,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _check_column_combination_uniqueness(
|
|
46
|
+
indices: list[int], sample_rows: list[tuple]
|
|
47
|
+
) -> bool:
|
|
48
|
+
if not sample_rows:
|
|
49
|
+
return False
|
|
50
|
+
|
|
51
|
+
values = set()
|
|
52
|
+
for row in sample_rows:
|
|
53
|
+
# For single column, use scalar value; for multiple columns, use tuple
|
|
54
|
+
if len(indices) == 1:
|
|
55
|
+
value = row[indices[0]]
|
|
56
|
+
else:
|
|
57
|
+
value = tuple(row[idx] for idx in indices)
|
|
58
|
+
|
|
59
|
+
if value in values:
|
|
60
|
+
return False
|
|
61
|
+
values.add(value)
|
|
62
|
+
|
|
63
|
+
# Verify we have as many unique values as rows
|
|
64
|
+
return len(values) == len(sample_rows)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def detect_unique_key_combinations(
|
|
68
|
+
column_names: list[str], sample_rows: list[tuple], max_key_size: int = 3
|
|
69
|
+
) -> list[list[str]]:
|
|
70
|
+
"""Detect unique key combinations from sample data.
|
|
71
|
+
|
|
72
|
+
Returns a list of column combinations that uniquely identify rows,
|
|
73
|
+
ordered by size (smallest first).
|
|
74
|
+
"""
|
|
75
|
+
if not sample_rows or not column_names:
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
unique_combinations = []
|
|
79
|
+
|
|
80
|
+
# Try single columns first
|
|
81
|
+
for i, col_name in enumerate(column_names):
|
|
82
|
+
if _check_column_combination_uniqueness([i], sample_rows):
|
|
83
|
+
unique_combinations.append([col_name])
|
|
84
|
+
|
|
85
|
+
# If we found single-column keys, prefer those
|
|
86
|
+
if unique_combinations:
|
|
87
|
+
return unique_combinations
|
|
88
|
+
|
|
89
|
+
# Try combinations of 2+ columns
|
|
90
|
+
for size in range(2, max_key_size + 1):
|
|
91
|
+
for col_combination in combinations(enumerate(column_names), size):
|
|
92
|
+
indices = [idx for idx, _ in col_combination]
|
|
93
|
+
col_names = [name for _, name in col_combination]
|
|
94
|
+
|
|
95
|
+
if _check_column_combination_uniqueness(indices, sample_rows):
|
|
96
|
+
unique_combinations.append(col_names)
|
|
97
|
+
|
|
98
|
+
# If we found keys of this size, return them (prefer smaller keys)
|
|
99
|
+
if unique_combinations:
|
|
100
|
+
return unique_combinations
|
|
101
|
+
|
|
102
|
+
return unique_combinations
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def detect_nullability_from_sample(column_index: int, sample_rows: list[tuple]) -> bool:
|
|
106
|
+
for row in sample_rows:
|
|
107
|
+
if row[column_index] is None:
|
|
108
|
+
return True
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _process_column(
|
|
113
|
+
idx: int,
|
|
114
|
+
col: tuple[str, str, str | None, str | None],
|
|
115
|
+
grain_components: list[str],
|
|
116
|
+
sample_rows: list[tuple],
|
|
117
|
+
concept_mapping: dict[str, str],
|
|
118
|
+
) -> tuple[Concept, ColumnAssignment, str | None]:
|
|
119
|
+
|
|
120
|
+
column_name = col[0]
|
|
121
|
+
data_type_str = col[1]
|
|
122
|
+
schema_is_nullable = col[2].upper() == "YES" if len(col) > 2 and col[2] else True
|
|
123
|
+
column_comment = col[3] if len(col) > 3 else None
|
|
124
|
+
# Apply prefix stripping if mapping provided
|
|
125
|
+
concept_name = concept_mapping[column_name]
|
|
126
|
+
|
|
127
|
+
# Infer Trilogy datatype
|
|
128
|
+
trilogy_type = infer_datatype_from_sql_type(data_type_str)
|
|
129
|
+
|
|
130
|
+
# Try to detect rich type
|
|
131
|
+
trait_import, trait_type_name = detect_rich_type(concept_name, trilogy_type)
|
|
132
|
+
if trait_import and trait_type_name:
|
|
133
|
+
final_datatype: TraitDataType | DataType = TraitDataType(
|
|
134
|
+
type=trilogy_type, traits=[trait_type_name]
|
|
135
|
+
)
|
|
136
|
+
print_info(f"Detected rich type for '{concept_name}': {trait_type_name}")
|
|
137
|
+
else:
|
|
138
|
+
final_datatype = trilogy_type
|
|
139
|
+
trait_import = None
|
|
140
|
+
|
|
141
|
+
# Determine purpose based on grain
|
|
142
|
+
if concept_name in grain_components or not grain_components:
|
|
143
|
+
purpose = Purpose.KEY
|
|
144
|
+
keys = set()
|
|
145
|
+
else:
|
|
146
|
+
purpose = Purpose.PROPERTY
|
|
147
|
+
keys = set(grain_components)
|
|
148
|
+
|
|
149
|
+
# Determine nullability: check sample data first, fall back to schema
|
|
150
|
+
if sample_rows:
|
|
151
|
+
has_nulls = detect_nullability_from_sample(idx, sample_rows)
|
|
152
|
+
else:
|
|
153
|
+
has_nulls = schema_is_nullable
|
|
154
|
+
|
|
155
|
+
# Get description from column comment if available
|
|
156
|
+
description = column_comment if column_comment and column_comment.strip() else None
|
|
157
|
+
|
|
158
|
+
# Create concept metadata if we have a description
|
|
159
|
+
metadata = Metadata()
|
|
160
|
+
if description:
|
|
161
|
+
metadata = Metadata(description=description)
|
|
162
|
+
|
|
163
|
+
# Create concept
|
|
164
|
+
modifiers = [Modifier.NULLABLE] if has_nulls else []
|
|
165
|
+
|
|
166
|
+
concept = Concept(
|
|
167
|
+
name=concept_name,
|
|
168
|
+
datatype=final_datatype,
|
|
169
|
+
purpose=purpose,
|
|
170
|
+
modifiers=modifiers,
|
|
171
|
+
metadata=metadata,
|
|
172
|
+
keys=keys,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Create column assignment
|
|
176
|
+
column_assignment = ColumnAssignment(
|
|
177
|
+
alias=column_name, concept=concept.reference, modifiers=modifiers
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
return concept, column_assignment, trait_import
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def create_datasource_from_table(
|
|
184
|
+
exec: Executor, table_name: str, schema: str | None = None
|
|
185
|
+
) -> tuple[Datasource, list[Concept], set[str]]:
|
|
186
|
+
"""Create a Datasource object from a warehouse table.
|
|
187
|
+
|
|
188
|
+
Returns: (datasource, concepts, required_imports)
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
dialect = exec.generator
|
|
192
|
+
|
|
193
|
+
columns = dialect.get_table_schema(exec, table_name, schema)
|
|
194
|
+
|
|
195
|
+
if not columns:
|
|
196
|
+
print_error(f"No columns found for table {table_name}")
|
|
197
|
+
raise Exit(1)
|
|
198
|
+
|
|
199
|
+
# Build qualified table name
|
|
200
|
+
if schema:
|
|
201
|
+
qualified_name = f"{schema}.{table_name}"
|
|
202
|
+
else:
|
|
203
|
+
qualified_name = table_name
|
|
204
|
+
|
|
205
|
+
# Extract column names for grain detection
|
|
206
|
+
column_names = [col[0] for col in columns]
|
|
207
|
+
|
|
208
|
+
# Detect and strip common prefix from all column names BEFORE grain detection
|
|
209
|
+
|
|
210
|
+
column_concept_mapping = canonicalize_names(column_names)
|
|
211
|
+
|
|
212
|
+
# Detect unique key combinations from sample data
|
|
213
|
+
suggested_keys = []
|
|
214
|
+
|
|
215
|
+
# Normalize grain components to snake_case and apply prefix stripping
|
|
216
|
+
db_primary_keys = dialect.get_table_primary_keys(exec, table_name, schema)
|
|
217
|
+
# we always need sample rows for column detection, so fetch here to setup for later.
|
|
218
|
+
sample_rows = dialect.get_table_sample(exec, table_name, schema)
|
|
219
|
+
if db_primary_keys:
|
|
220
|
+
keys = db_primary_keys
|
|
221
|
+
print_info(f"Using primary key from database as grain: {db_primary_keys}")
|
|
222
|
+
else:
|
|
223
|
+
# Get sample data to detect grain and nullability
|
|
224
|
+
print_info(
|
|
225
|
+
f"Analyzing {len(sample_rows)} sample rows for grain and nullability detection"
|
|
226
|
+
)
|
|
227
|
+
suggested_keys = detect_unique_key_combinations(column_names, sample_rows)
|
|
228
|
+
if suggested_keys:
|
|
229
|
+
print_info(f"Detected potential unique key combinations: {suggested_keys}")
|
|
230
|
+
print_info(f"Using detected unique key as grain: {suggested_keys[0]}")
|
|
231
|
+
keys = suggested_keys[0]
|
|
232
|
+
else:
|
|
233
|
+
keys = []
|
|
234
|
+
print_info(
|
|
235
|
+
"No primary key or unique grain detected; defaulting to no grain"
|
|
236
|
+
)
|
|
237
|
+
grain_components = []
|
|
238
|
+
for key in keys:
|
|
239
|
+
stripped = column_concept_mapping.get(key, key)
|
|
240
|
+
grain_components.append(stripped)
|
|
241
|
+
|
|
242
|
+
# Track required imports for rich types
|
|
243
|
+
required_imports: set[str] = set()
|
|
244
|
+
|
|
245
|
+
# Create column assignments for each column
|
|
246
|
+
column_assignments = []
|
|
247
|
+
concepts: list[Concept] = []
|
|
248
|
+
for idx, col in enumerate(columns):
|
|
249
|
+
concept, column_assignment, rich_import = _process_column(
|
|
250
|
+
idx, col, grain_components, sample_rows, column_concept_mapping
|
|
251
|
+
)
|
|
252
|
+
concepts.append(concept)
|
|
253
|
+
column_assignments.append(column_assignment)
|
|
254
|
+
if rich_import:
|
|
255
|
+
required_imports.add(rich_import)
|
|
256
|
+
|
|
257
|
+
grain = Grain(components=set(grain_components)) if grain_components else Grain()
|
|
258
|
+
|
|
259
|
+
address = Address(location=qualified_name, quoted=True)
|
|
260
|
+
|
|
261
|
+
datasource = Datasource(
|
|
262
|
+
name=table_name.replace(".", "_"),
|
|
263
|
+
grain=grain,
|
|
264
|
+
columns=column_assignments,
|
|
265
|
+
address=address,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
return datasource, concepts, required_imports
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
@argument("tables", type=str)
|
|
272
|
+
@argument("dialect", type=str, required=False)
|
|
273
|
+
@option("--output", "-o", type=Path(), help="Output path for generated scripts")
|
|
274
|
+
@option("--schema", "-s", type=str, help="Schema/database to ingest from")
|
|
275
|
+
@option(
|
|
276
|
+
"--config", type=Path(exists=True), help="Path to trilogy.toml configuration file"
|
|
277
|
+
)
|
|
278
|
+
@option(
|
|
279
|
+
"--fks",
|
|
280
|
+
type=str,
|
|
281
|
+
help="Foreign key relationships in format: table.column:ref_table.column (comma-separated)",
|
|
282
|
+
)
|
|
283
|
+
@argument("conn_args", nargs=-1, type=UNPROCESSED)
|
|
284
|
+
@pass_context
|
|
285
|
+
def ingest(
|
|
286
|
+
ctx,
|
|
287
|
+
tables: str,
|
|
288
|
+
dialect: str | None,
|
|
289
|
+
output: str | None,
|
|
290
|
+
schema: str | None,
|
|
291
|
+
config,
|
|
292
|
+
fks: str | None,
|
|
293
|
+
conn_args,
|
|
294
|
+
):
|
|
295
|
+
"""Bootstrap one or more datasources from tables in your warehouse.
|
|
296
|
+
|
|
297
|
+
Connects to a warehouse and generates Trilogy datasource definitions
|
|
298
|
+
from existing tables.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
tables: Comma-separated list of table names to ingest
|
|
302
|
+
dialect: Database dialect (e.g., duckdb, postgres, snowflake)
|
|
303
|
+
output: Output path for generated scripts
|
|
304
|
+
schema: Schema/database to ingest from
|
|
305
|
+
config: Path to trilogy.toml configuration file
|
|
306
|
+
fks: Foreign key relationships to establish
|
|
307
|
+
conn_args: Additional connection arguments
|
|
308
|
+
"""
|
|
309
|
+
# Parse table names
|
|
310
|
+
table_list = [t.strip() for t in tables.split(",") if t.strip()]
|
|
311
|
+
|
|
312
|
+
if not table_list:
|
|
313
|
+
print_error("No tables specified")
|
|
314
|
+
raise Exit(1)
|
|
315
|
+
|
|
316
|
+
# Parse foreign keys
|
|
317
|
+
fk_map = parse_foreign_keys(fks) if fks else {}
|
|
318
|
+
|
|
319
|
+
# Determine output directory
|
|
320
|
+
if output:
|
|
321
|
+
output_dir = PathlibPath(output)
|
|
322
|
+
elif config:
|
|
323
|
+
config_path = PathlibPath(config)
|
|
324
|
+
output_dir = config_path.parent / "raw"
|
|
325
|
+
else:
|
|
326
|
+
found_config = find_trilogy_config()
|
|
327
|
+
if found_config:
|
|
328
|
+
output_dir = found_config.parent / "raw"
|
|
329
|
+
else:
|
|
330
|
+
output_dir = PathlibPath.cwd() / "raw"
|
|
331
|
+
|
|
332
|
+
# Create output directory if it doesn't exist
|
|
333
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
334
|
+
|
|
335
|
+
print_info(f"Ingesting tables: {', '.join(table_list)}")
|
|
336
|
+
print_info(f"Output directory: {output_dir}")
|
|
337
|
+
|
|
338
|
+
# Get runtime config
|
|
339
|
+
runtime_config = (
|
|
340
|
+
get_runtime_config(PathlibPath(config))
|
|
341
|
+
if config
|
|
342
|
+
else get_runtime_config(PathlibPath.cwd())
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
# Determine dialect
|
|
346
|
+
if dialect:
|
|
347
|
+
edialect = Dialects(dialect)
|
|
348
|
+
elif runtime_config.engine_dialect:
|
|
349
|
+
edialect = runtime_config.engine_dialect
|
|
350
|
+
else:
|
|
351
|
+
print_error(
|
|
352
|
+
"No dialect specified. Provide dialect as argument or set engine.dialect in config file."
|
|
353
|
+
)
|
|
354
|
+
raise Exit(1)
|
|
355
|
+
|
|
356
|
+
# Create executor
|
|
357
|
+
try:
|
|
358
|
+
exec = create_executor(
|
|
359
|
+
param=(),
|
|
360
|
+
directory=PathlibPath.cwd(),
|
|
361
|
+
conn_args=conn_args,
|
|
362
|
+
edialect=edialect,
|
|
363
|
+
debug=ctx.obj["DEBUG"],
|
|
364
|
+
config=runtime_config,
|
|
365
|
+
)
|
|
366
|
+
except Exception as e:
|
|
367
|
+
handle_execution_exception(e, debug=ctx.obj["DEBUG"])
|
|
368
|
+
|
|
369
|
+
# Ingest each table
|
|
370
|
+
ingested_files = []
|
|
371
|
+
ingested_data: dict[str, tuple[Datasource, list[Concept], set[str], list[Any]]] = {}
|
|
372
|
+
renderer = Renderer()
|
|
373
|
+
datasources = {}
|
|
374
|
+
for table_name in table_list:
|
|
375
|
+
print_info(f"Processing table: {table_name}")
|
|
376
|
+
|
|
377
|
+
try:
|
|
378
|
+
datasource, concepts, required_imports = create_datasource_from_table(
|
|
379
|
+
exec, table_name, schema
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
datasources[table_name] = datasource
|
|
383
|
+
|
|
384
|
+
# Build qualified table name
|
|
385
|
+
if schema:
|
|
386
|
+
qualified_name = f"{schema}.{table_name}"
|
|
387
|
+
else:
|
|
388
|
+
qualified_name = table_name
|
|
389
|
+
|
|
390
|
+
# Generate Trilogy script content
|
|
391
|
+
script_content: list[
|
|
392
|
+
Datasource | Comment | ConceptDeclarationStatement | ImportStatement
|
|
393
|
+
] = []
|
|
394
|
+
script_content.append(
|
|
395
|
+
Comment(text=f"# Datasource ingested from {qualified_name}")
|
|
396
|
+
)
|
|
397
|
+
script_content.append(Comment(text=f"# Generated on {datetime.now()}"))
|
|
398
|
+
|
|
399
|
+
# Add imports for rich types if needed
|
|
400
|
+
if required_imports:
|
|
401
|
+
for import_path in sorted(required_imports):
|
|
402
|
+
# This doesn't matter, stdlib imports are resolved automatically from memory
|
|
403
|
+
file_path = import_path.replace(".", "/")
|
|
404
|
+
script_content.append(
|
|
405
|
+
ImportStatement(
|
|
406
|
+
input_path=import_path,
|
|
407
|
+
alias="", # No alias, direct import
|
|
408
|
+
path=PathlibPath(file_path),
|
|
409
|
+
)
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Add concept declarations
|
|
413
|
+
for concept in concepts:
|
|
414
|
+
script_content.append(ConceptDeclarationStatement(concept=concept))
|
|
415
|
+
|
|
416
|
+
# Add datasource
|
|
417
|
+
script_content.append(datasource)
|
|
418
|
+
|
|
419
|
+
# Store for FK processing
|
|
420
|
+
ingested_data[table_name] = (
|
|
421
|
+
datasource,
|
|
422
|
+
concepts,
|
|
423
|
+
required_imports,
|
|
424
|
+
script_content,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
except Exception as e:
|
|
428
|
+
print_error(f"Failed to ingest {table_name}: {e}")
|
|
429
|
+
if ctx.obj["DEBUG"]:
|
|
430
|
+
import traceback
|
|
431
|
+
|
|
432
|
+
print_error(traceback.format_exc())
|
|
433
|
+
continue
|
|
434
|
+
|
|
435
|
+
# Write all ingested files, applying FK references where needed
|
|
436
|
+
if fk_map:
|
|
437
|
+
print_info("Processing foreign key relationships...")
|
|
438
|
+
|
|
439
|
+
for table_name, (
|
|
440
|
+
datasource,
|
|
441
|
+
concepts,
|
|
442
|
+
required_imports,
|
|
443
|
+
script_content,
|
|
444
|
+
) in ingested_data.items():
|
|
445
|
+
output_file = output_dir / f"{datasource.name}.preql"
|
|
446
|
+
|
|
447
|
+
# Check if this table has FK relationships
|
|
448
|
+
if fk_map and table_name in fk_map:
|
|
449
|
+
column_mappings = fk_map[table_name]
|
|
450
|
+
modified_content = apply_foreign_key_references(
|
|
451
|
+
table_name, datasource, datasources, script_content, column_mappings
|
|
452
|
+
)
|
|
453
|
+
output_file.write_text(modified_content)
|
|
454
|
+
ingested_files.append(output_file)
|
|
455
|
+
print_success(f"Created {output_file} with FK references")
|
|
456
|
+
else:
|
|
457
|
+
# No FK references for this table, write as-is
|
|
458
|
+
output_file.write_text(renderer.render_statement_string(script_content))
|
|
459
|
+
ingested_files.append(output_file)
|
|
460
|
+
print_success(f"Created {output_file}")
|
|
461
|
+
|
|
462
|
+
# Close executor
|
|
463
|
+
exec.close()
|
|
464
|
+
|
|
465
|
+
if ingested_files:
|
|
466
|
+
print_success(
|
|
467
|
+
f"\nSuccessfully ingested {len(ingested_files)} table(s) to {output_dir}"
|
|
468
|
+
)
|
|
469
|
+
else:
|
|
470
|
+
print_error("No tables were successfully ingested")
|
|
471
|
+
raise Exit(1)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Ingest helper modules for Trilogy CLI."""
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from trilogy.authoring import (
|
|
4
|
+
Comment,
|
|
5
|
+
ConceptDeclarationStatement,
|
|
6
|
+
Datasource,
|
|
7
|
+
ImportStatement,
|
|
8
|
+
)
|
|
9
|
+
from trilogy.core.validation.fix import (
|
|
10
|
+
DatasourceReferenceFix,
|
|
11
|
+
rewrite_file_with_reference_merges,
|
|
12
|
+
)
|
|
13
|
+
from trilogy.scripts.display import print_error, print_info
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_foreign_keys(fks_str: str | None) -> dict[str, dict[str, str]]:
|
|
17
|
+
if not fks_str:
|
|
18
|
+
return {}
|
|
19
|
+
|
|
20
|
+
fk_map: dict[str, dict[str, str]] = {}
|
|
21
|
+
|
|
22
|
+
for fk_spec in fks_str.split(","):
|
|
23
|
+
fk_spec = fk_spec.strip()
|
|
24
|
+
if not fk_spec:
|
|
25
|
+
continue
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
source_part, target_part = fk_spec.split(":")
|
|
29
|
+
source_table, source_column = source_part.rsplit(".", 1)
|
|
30
|
+
target_table, target_column = target_part.rsplit(".", 1)
|
|
31
|
+
|
|
32
|
+
if source_table not in fk_map:
|
|
33
|
+
fk_map[source_table] = {}
|
|
34
|
+
|
|
35
|
+
# Store as column -> table.column mapping
|
|
36
|
+
fk_map[source_table][source_column] = f"{target_table}.{target_column}"
|
|
37
|
+
|
|
38
|
+
except ValueError:
|
|
39
|
+
from click.exceptions import Exit
|
|
40
|
+
|
|
41
|
+
print_error(f"Invalid FK specification: {fk_spec}")
|
|
42
|
+
print_error("Expected format: source_table.column:target_table.column")
|
|
43
|
+
raise Exit(1)
|
|
44
|
+
|
|
45
|
+
return fk_map
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def apply_foreign_key_references(
|
|
49
|
+
table_name: str,
|
|
50
|
+
datasource: Datasource,
|
|
51
|
+
datasources: dict[str, Datasource],
|
|
52
|
+
script_content: list[
|
|
53
|
+
Datasource | Comment | ConceptDeclarationStatement | ImportStatement
|
|
54
|
+
],
|
|
55
|
+
column_mappings: dict[str, str],
|
|
56
|
+
) -> str:
|
|
57
|
+
fk_imports: set[str] = set()
|
|
58
|
+
reference_fixes: list[DatasourceReferenceFix] = []
|
|
59
|
+
|
|
60
|
+
for source_column, target_ref in column_mappings.items():
|
|
61
|
+
# Parse target reference: table.column
|
|
62
|
+
target_table, _ = target_ref.rsplit(".", 1)
|
|
63
|
+
target_datasource = datasources.get(target_table)
|
|
64
|
+
target_concept = None
|
|
65
|
+
if not target_datasource:
|
|
66
|
+
continue
|
|
67
|
+
# Find the concept for the target column
|
|
68
|
+
for col_assign in target_datasource.columns:
|
|
69
|
+
if col_assign.alias == target_ref.rsplit(".", 1)[1]:
|
|
70
|
+
target_concept = col_assign.concept
|
|
71
|
+
break
|
|
72
|
+
|
|
73
|
+
# Find the source column's concept address
|
|
74
|
+
source_concept = None
|
|
75
|
+
for col_assign in datasource.columns:
|
|
76
|
+
if col_assign.alias == source_column:
|
|
77
|
+
source_concept = col_assign.concept.address
|
|
78
|
+
break
|
|
79
|
+
|
|
80
|
+
if not source_concept:
|
|
81
|
+
print_error(f"Could not find column {source_column} in {table_name}")
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
# Create the reference fix
|
|
85
|
+
if target_concept:
|
|
86
|
+
reference_fixes.append(
|
|
87
|
+
DatasourceReferenceFix(
|
|
88
|
+
datasource_identifier=datasource.identifier,
|
|
89
|
+
column_address=source_concept,
|
|
90
|
+
column_alias=source_column,
|
|
91
|
+
reference_concept=target_concept.reference.with_namespace(
|
|
92
|
+
target_table
|
|
93
|
+
),
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
fk_imports.add(target_table)
|
|
98
|
+
print_info(f"Linking {table_name}.{source_column} -> {target_ref}")
|
|
99
|
+
|
|
100
|
+
# Add FK imports at the beginning (after comments)
|
|
101
|
+
if fk_imports:
|
|
102
|
+
# Find where to insert (after existing imports/comments)
|
|
103
|
+
insert_pos = 0
|
|
104
|
+
for i, stmt in enumerate(script_content):
|
|
105
|
+
if isinstance(stmt, (Comment, ImportStatement)):
|
|
106
|
+
insert_pos = i + 1
|
|
107
|
+
else:
|
|
108
|
+
break
|
|
109
|
+
|
|
110
|
+
# Add FK imports
|
|
111
|
+
for fk_import in sorted(fk_imports):
|
|
112
|
+
script_content.insert(
|
|
113
|
+
insert_pos,
|
|
114
|
+
ImportStatement(
|
|
115
|
+
input_path=fk_import,
|
|
116
|
+
alias=fk_import,
|
|
117
|
+
path=Path(fk_import),
|
|
118
|
+
),
|
|
119
|
+
)
|
|
120
|
+
insert_pos += 1
|
|
121
|
+
|
|
122
|
+
# Apply reference fixes to update datasource
|
|
123
|
+
return rewrite_file_with_reference_merges(script_content, reference_fixes)
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def canonicolize_name(name: str) -> str:
|
|
5
|
+
"""Convert a string to snake_case.
|
|
6
|
+
|
|
7
|
+
Handles CamelCase, PascalCase, and names with spaces/special chars.
|
|
8
|
+
"""
|
|
9
|
+
# Handle spaces and special characters first
|
|
10
|
+
name = re.sub(r"[^\w\s]", "_", name)
|
|
11
|
+
name = re.sub(r"\s+", "_", name)
|
|
12
|
+
|
|
13
|
+
# Insert underscores before uppercase letters (for CamelCase)
|
|
14
|
+
name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
|
|
15
|
+
name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name)
|
|
16
|
+
|
|
17
|
+
# Convert to lowercase and remove duplicate underscores
|
|
18
|
+
name = name.lower()
|
|
19
|
+
name = re.sub(r"_+", "_", name)
|
|
20
|
+
|
|
21
|
+
# Remove leading/trailing underscores
|
|
22
|
+
return name.strip("_")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def find_common_prefix(names: list[str]) -> str:
|
|
26
|
+
"""Find the common prefix shared by all names in a list.
|
|
27
|
+
|
|
28
|
+
The prefix is determined by finding the longest common substring
|
|
29
|
+
that ends with an underscore (or is followed by an underscore in all names).
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
names: List of names to analyze
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
The common prefix (including trailing underscore), or empty string if none found
|
|
36
|
+
"""
|
|
37
|
+
if not names or len(names) < 2:
|
|
38
|
+
return ""
|
|
39
|
+
|
|
40
|
+
# Normalize all to lowercase for comparison
|
|
41
|
+
normalized = [name.lower() for name in names]
|
|
42
|
+
|
|
43
|
+
# Start with the first name as potential prefix
|
|
44
|
+
prefix = normalized[0]
|
|
45
|
+
|
|
46
|
+
# Find common prefix across all names
|
|
47
|
+
for name in normalized[1:]:
|
|
48
|
+
# Find where they start to differ
|
|
49
|
+
i = 0
|
|
50
|
+
while i < len(prefix) and i < len(name) and prefix[i] == name[i]:
|
|
51
|
+
i += 1
|
|
52
|
+
prefix = prefix[:i]
|
|
53
|
+
|
|
54
|
+
if not prefix:
|
|
55
|
+
return ""
|
|
56
|
+
|
|
57
|
+
# Find the last underscore in the common prefix
|
|
58
|
+
last_underscore = prefix.rfind("_")
|
|
59
|
+
|
|
60
|
+
# Only consider it a valid prefix if:
|
|
61
|
+
# 1. There's an underscore
|
|
62
|
+
# 2. The prefix is at least 2 characters (excluding the underscore)
|
|
63
|
+
# 3. All names have content after the prefix
|
|
64
|
+
if last_underscore > 0:
|
|
65
|
+
candidate_prefix = prefix[: last_underscore + 1]
|
|
66
|
+
# Check that all names have content after this prefix
|
|
67
|
+
if all(len(name) > len(candidate_prefix) for name in normalized):
|
|
68
|
+
return candidate_prefix
|
|
69
|
+
|
|
70
|
+
return ""
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def canonicalize_names(names: list[str]) -> dict[str, str]:
|
|
74
|
+
if not names:
|
|
75
|
+
return {}
|
|
76
|
+
|
|
77
|
+
common_prefix = find_common_prefix(names)
|
|
78
|
+
|
|
79
|
+
if not common_prefix:
|
|
80
|
+
# No common prefix, return names as-is
|
|
81
|
+
return {name: canonicolize_name(name) for name in names}
|
|
82
|
+
|
|
83
|
+
# Strip the prefix and normalize to snake_case
|
|
84
|
+
result = {}
|
|
85
|
+
for name in names:
|
|
86
|
+
# Remove the prefix (case-insensitive)
|
|
87
|
+
if name.lower().startswith(common_prefix):
|
|
88
|
+
stripped = name[len(common_prefix) :]
|
|
89
|
+
else:
|
|
90
|
+
stripped = name
|
|
91
|
+
result[name] = canonicolize_name(stripped)
|
|
92
|
+
|
|
93
|
+
return result
|