datalex-cli 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datalex_cli/__init__.py +1 -0
- datalex_cli/datalex_cli.py +658 -0
- datalex_cli/main.py +2925 -0
- datalex_cli-0.1.1.dist-info/METADATA +228 -0
- datalex_cli-0.1.1.dist-info/RECORD +64 -0
- datalex_cli-0.1.1.dist-info/WHEEL +5 -0
- datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
- datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
- datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
- datalex_core/__init__.py +94 -0
- datalex_core/_schemas/datalex/common.schema.json +127 -0
- datalex_core/_schemas/datalex/domain.schema.json +24 -0
- datalex_core/_schemas/datalex/entity.schema.json +158 -0
- datalex_core/_schemas/datalex/model.schema.json +141 -0
- datalex_core/_schemas/datalex/policy.schema.json +70 -0
- datalex_core/_schemas/datalex/project.schema.json +82 -0
- datalex_core/_schemas/datalex/snippet.schema.json +24 -0
- datalex_core/_schemas/datalex/source.schema.json +104 -0
- datalex_core/_schemas/datalex/term.schema.json +30 -0
- datalex_core/canonical.py +166 -0
- datalex_core/completion.py +204 -0
- datalex_core/connectors/__init__.py +39 -0
- datalex_core/connectors/base.py +417 -0
- datalex_core/connectors/bigquery.py +229 -0
- datalex_core/connectors/databricks.py +262 -0
- datalex_core/connectors/mysql.py +266 -0
- datalex_core/connectors/postgres.py +309 -0
- datalex_core/connectors/redshift.py +298 -0
- datalex_core/connectors/snowflake.py +336 -0
- datalex_core/connectors/sqlserver.py +425 -0
- datalex_core/datalex/__init__.py +26 -0
- datalex_core/datalex/diff.py +188 -0
- datalex_core/datalex/errors.py +85 -0
- datalex_core/datalex/loader.py +512 -0
- datalex_core/datalex/migrate_layout.py +382 -0
- datalex_core/datalex/parse_cache.py +102 -0
- datalex_core/datalex/project.py +214 -0
- datalex_core/datalex/types.py +224 -0
- datalex_core/dbt/__init__.py +18 -0
- datalex_core/dbt/emit.py +344 -0
- datalex_core/dbt/manifest.py +329 -0
- datalex_core/dbt/profiles.py +185 -0
- datalex_core/dbt/sync.py +279 -0
- datalex_core/dbt/warehouse.py +215 -0
- datalex_core/dialects/__init__.py +15 -0
- datalex_core/dialects/_common.py +48 -0
- datalex_core/dialects/base.py +47 -0
- datalex_core/dialects/postgres.py +164 -0
- datalex_core/dialects/registry.py +36 -0
- datalex_core/dialects/snowflake.py +129 -0
- datalex_core/diffing.py +358 -0
- datalex_core/docs_generator.py +797 -0
- datalex_core/doctor.py +181 -0
- datalex_core/generators.py +478 -0
- datalex_core/importers.py +1176 -0
- datalex_core/issues.py +23 -0
- datalex_core/loader.py +21 -0
- datalex_core/migrate.py +316 -0
- datalex_core/modeling.py +679 -0
- datalex_core/packages.py +430 -0
- datalex_core/policy.py +1037 -0
- datalex_core/resolver.py +456 -0
- datalex_core/schema.py +54 -0
- datalex_core/semantic.py +1561 -0
datalex_cli/main.py
ADDED
|
@@ -0,0 +1,2925 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import glob
|
|
3
|
+
import json
|
|
4
|
+
import hashlib
|
|
5
|
+
import re
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
import time
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, List, Tuple
|
|
11
|
+
from urllib.parse import urlparse
|
|
12
|
+
|
|
13
|
+
import yaml
|
|
14
|
+
|
|
15
|
+
from datalex_core import (
|
|
16
|
+
apply_standards_fixes,
|
|
17
|
+
compile_model,
|
|
18
|
+
completeness_as_dict,
|
|
19
|
+
completeness_report,
|
|
20
|
+
diagnostics_as_json,
|
|
21
|
+
format_diagnostics,
|
|
22
|
+
generate_bash_completion,
|
|
23
|
+
generate_changelog,
|
|
24
|
+
generate_fish_completion,
|
|
25
|
+
generate_html_docs,
|
|
26
|
+
generate_markdown_docs,
|
|
27
|
+
generate_migration,
|
|
28
|
+
generate_sql_ddl,
|
|
29
|
+
generate_zsh_completion,
|
|
30
|
+
ConnectorConfig,
|
|
31
|
+
get_connector,
|
|
32
|
+
import_dbt_schema_yml,
|
|
33
|
+
import_dbml,
|
|
34
|
+
import_spark_schema,
|
|
35
|
+
import_sql_ddl,
|
|
36
|
+
sync_dbt_schema_yml,
|
|
37
|
+
list_connectors,
|
|
38
|
+
lint_issues,
|
|
39
|
+
load_policy_pack,
|
|
40
|
+
load_policy_pack_with_inheritance,
|
|
41
|
+
load_schema,
|
|
42
|
+
load_yaml_model,
|
|
43
|
+
merge_policy_packs,
|
|
44
|
+
merge_models_preserving_docs,
|
|
45
|
+
policy_issues,
|
|
46
|
+
project_diff,
|
|
47
|
+
resolve_model,
|
|
48
|
+
resolve_project,
|
|
49
|
+
run_diagnostics,
|
|
50
|
+
schema_issues,
|
|
51
|
+
semantic_diff,
|
|
52
|
+
standards_issues,
|
|
53
|
+
transform_model,
|
|
54
|
+
write_changelog,
|
|
55
|
+
write_dbt_scaffold,
|
|
56
|
+
write_html_docs,
|
|
57
|
+
write_markdown_docs,
|
|
58
|
+
write_migration,
|
|
59
|
+
)
|
|
60
|
+
from datalex_core.issues import Issue, has_errors, to_lines
|
|
61
|
+
|
|
62
|
+
STARTER_MODEL = """model:
|
|
63
|
+
name: starter_model
|
|
64
|
+
version: 1.0.0
|
|
65
|
+
domain: demo
|
|
66
|
+
owners:
|
|
67
|
+
- data-team@example.com
|
|
68
|
+
state: draft
|
|
69
|
+
|
|
70
|
+
entities:
|
|
71
|
+
- name: User
|
|
72
|
+
type: table
|
|
73
|
+
fields:
|
|
74
|
+
- name: user_id
|
|
75
|
+
type: integer
|
|
76
|
+
primary_key: true
|
|
77
|
+
nullable: false
|
|
78
|
+
- name: email
|
|
79
|
+
type: string
|
|
80
|
+
nullable: false
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
MULTI_MODEL_SHARED = """model:
|
|
84
|
+
name: shared_dimensions
|
|
85
|
+
spec_version: 2
|
|
86
|
+
version: 1.0.0
|
|
87
|
+
domain: shared
|
|
88
|
+
owners:
|
|
89
|
+
- data-team@example.com
|
|
90
|
+
state: draft
|
|
91
|
+
description: Shared dimension entities used across domain models
|
|
92
|
+
|
|
93
|
+
entities:
|
|
94
|
+
- name: Customer
|
|
95
|
+
type: table
|
|
96
|
+
description: Customer master record
|
|
97
|
+
schema: shared
|
|
98
|
+
subject_area: customer_domain
|
|
99
|
+
fields:
|
|
100
|
+
- name: customer_id
|
|
101
|
+
type: integer
|
|
102
|
+
primary_key: true
|
|
103
|
+
nullable: false
|
|
104
|
+
- name: email
|
|
105
|
+
type: string
|
|
106
|
+
nullable: false
|
|
107
|
+
unique: true
|
|
108
|
+
- name: full_name
|
|
109
|
+
type: string
|
|
110
|
+
nullable: false
|
|
111
|
+
- name: created_at
|
|
112
|
+
type: timestamp
|
|
113
|
+
nullable: false
|
|
114
|
+
|
|
115
|
+
indexes:
|
|
116
|
+
- name: idx_customer_email
|
|
117
|
+
entity: Customer
|
|
118
|
+
fields: [email]
|
|
119
|
+
unique: true
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
MULTI_MODEL_ORDERS = """model:
|
|
123
|
+
name: orders
|
|
124
|
+
spec_version: 2
|
|
125
|
+
version: 1.0.0
|
|
126
|
+
domain: sales
|
|
127
|
+
owners:
|
|
128
|
+
- data-team@example.com
|
|
129
|
+
state: draft
|
|
130
|
+
description: Order domain model
|
|
131
|
+
imports:
|
|
132
|
+
- model: shared_dimensions
|
|
133
|
+
alias: shared
|
|
134
|
+
entities: [Customer]
|
|
135
|
+
|
|
136
|
+
entities:
|
|
137
|
+
- name: Order
|
|
138
|
+
type: table
|
|
139
|
+
description: Customer orders
|
|
140
|
+
schema: sales
|
|
141
|
+
subject_area: order_domain
|
|
142
|
+
fields:
|
|
143
|
+
- name: order_id
|
|
144
|
+
type: integer
|
|
145
|
+
primary_key: true
|
|
146
|
+
nullable: false
|
|
147
|
+
- name: customer_id
|
|
148
|
+
type: integer
|
|
149
|
+
nullable: false
|
|
150
|
+
foreign_key: true
|
|
151
|
+
- name: total_amount
|
|
152
|
+
type: decimal(12,2)
|
|
153
|
+
nullable: false
|
|
154
|
+
- name: order_date
|
|
155
|
+
type: timestamp
|
|
156
|
+
nullable: false
|
|
157
|
+
|
|
158
|
+
relationships:
|
|
159
|
+
- name: order_customer
|
|
160
|
+
from: Order.customer_id
|
|
161
|
+
to: Customer.customer_id
|
|
162
|
+
cardinality: many_to_one
|
|
163
|
+
description: Order belongs to a customer (cross-model)
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
END_TO_END_SOURCE = """model:
|
|
167
|
+
name: source_sales_raw
|
|
168
|
+
spec_version: 2
|
|
169
|
+
version: 1.0.0
|
|
170
|
+
domain: sales
|
|
171
|
+
owners:
|
|
172
|
+
- data-platform@example.com
|
|
173
|
+
state: draft
|
|
174
|
+
layer: source
|
|
175
|
+
description: Source layer contract pulled from warehouse raw schemas.
|
|
176
|
+
|
|
177
|
+
entities:
|
|
178
|
+
- name: RawCustomers
|
|
179
|
+
type: table
|
|
180
|
+
description: Raw customer profile records from CRM.
|
|
181
|
+
tags: [BRONZE, SOURCE, CUSTOMER]
|
|
182
|
+
schema: raw
|
|
183
|
+
subject_area: customer_domain
|
|
184
|
+
owner: customer-data@example.com
|
|
185
|
+
grain: [customer_id]
|
|
186
|
+
sla:
|
|
187
|
+
freshness: 4h
|
|
188
|
+
quality_score: 98
|
|
189
|
+
fields:
|
|
190
|
+
- name: customer_id
|
|
191
|
+
type: string
|
|
192
|
+
primary_key: true
|
|
193
|
+
nullable: false
|
|
194
|
+
description: Stable customer identifier from CRM.
|
|
195
|
+
tags: [IDENTIFIER]
|
|
196
|
+
- name: email
|
|
197
|
+
type: string
|
|
198
|
+
nullable: false
|
|
199
|
+
description: Customer email from source system.
|
|
200
|
+
tags: [PII, CONTACT]
|
|
201
|
+
sensitivity: restricted
|
|
202
|
+
- name: created_at
|
|
203
|
+
type: timestamp
|
|
204
|
+
nullable: false
|
|
205
|
+
description: Customer creation timestamp from source.
|
|
206
|
+
tags: [AUDIT]
|
|
207
|
+
|
|
208
|
+
- name: RawOrders
|
|
209
|
+
type: table
|
|
210
|
+
description: Raw order transactions from commerce platform.
|
|
211
|
+
tags: [BRONZE, SOURCE, ORDER]
|
|
212
|
+
schema: raw
|
|
213
|
+
subject_area: order_domain
|
|
214
|
+
owner: order-data@example.com
|
|
215
|
+
grain: [order_id]
|
|
216
|
+
sla:
|
|
217
|
+
freshness: 2h
|
|
218
|
+
quality_score: 97
|
|
219
|
+
fields:
|
|
220
|
+
- name: order_id
|
|
221
|
+
type: string
|
|
222
|
+
primary_key: true
|
|
223
|
+
nullable: false
|
|
224
|
+
description: Unique order identifier.
|
|
225
|
+
tags: [IDENTIFIER]
|
|
226
|
+
- name: customer_id
|
|
227
|
+
type: string
|
|
228
|
+
nullable: false
|
|
229
|
+
foreign_key: true
|
|
230
|
+
description: Customer identifier attached to the order.
|
|
231
|
+
tags: [JOIN_KEY]
|
|
232
|
+
- name: order_ts
|
|
233
|
+
type: timestamp
|
|
234
|
+
nullable: false
|
|
235
|
+
description: Order creation timestamp.
|
|
236
|
+
tags: [EVENT_TIME]
|
|
237
|
+
- name: gross_amount
|
|
238
|
+
type: decimal(12,2)
|
|
239
|
+
nullable: false
|
|
240
|
+
description: Total order amount before discounts and tax allocations.
|
|
241
|
+
tags: [AMOUNT, FINANCE]
|
|
242
|
+
- name: status
|
|
243
|
+
type: string
|
|
244
|
+
nullable: false
|
|
245
|
+
description: Raw order lifecycle status.
|
|
246
|
+
tags: [STATUS]
|
|
247
|
+
|
|
248
|
+
relationships:
|
|
249
|
+
- name: raw_orders_customer
|
|
250
|
+
from: RawOrders.customer_id
|
|
251
|
+
to: RawCustomers.customer_id
|
|
252
|
+
cardinality: many_to_one
|
|
253
|
+
description: Raw order row belongs to a raw customer row.
|
|
254
|
+
|
|
255
|
+
governance:
|
|
256
|
+
classification:
|
|
257
|
+
RawCustomers.email: PII
|
|
258
|
+
stewards:
|
|
259
|
+
customer_domain: customer-data@example.com
|
|
260
|
+
order_domain: order-data@example.com
|
|
261
|
+
retention:
|
|
262
|
+
period: 3y
|
|
263
|
+
policy: source_contract_baseline
|
|
264
|
+
|
|
265
|
+
glossary:
|
|
266
|
+
- term: Raw Zone
|
|
267
|
+
definition: Ingested source-aligned data before business transformations.
|
|
268
|
+
owner: data-platform@example.com
|
|
269
|
+
tags: [INGESTION]
|
|
270
|
+
|
|
271
|
+
rules:
|
|
272
|
+
- name: raw_orders_amount_non_negative
|
|
273
|
+
target: RawOrders.gross_amount
|
|
274
|
+
expression: "value >= 0"
|
|
275
|
+
severity: error
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
END_TO_END_TRANSFORM = """model:
|
|
279
|
+
name: commerce_transform
|
|
280
|
+
spec_version: 2
|
|
281
|
+
version: 1.0.0
|
|
282
|
+
domain: sales
|
|
283
|
+
owners:
|
|
284
|
+
- analytics-engineering@example.com
|
|
285
|
+
state: draft
|
|
286
|
+
layer: transform
|
|
287
|
+
description: Transform layer business models derived from raw sources.
|
|
288
|
+
imports:
|
|
289
|
+
- model: source_sales_raw
|
|
290
|
+
alias: src
|
|
291
|
+
path: ../source/source_sales_raw.model.yaml
|
|
292
|
+
|
|
293
|
+
entities:
|
|
294
|
+
- name: CustomerDim
|
|
295
|
+
type: table
|
|
296
|
+
description: Conformed customer dimension for analytics.
|
|
297
|
+
tags: [SILVER, DIMENSION, CUSTOMER]
|
|
298
|
+
schema: analytics
|
|
299
|
+
subject_area: customer_domain
|
|
300
|
+
owner: analytics-engineering@example.com
|
|
301
|
+
grain: [customer_id]
|
|
302
|
+
sla:
|
|
303
|
+
freshness: 8h
|
|
304
|
+
quality_score: 99
|
|
305
|
+
fields:
|
|
306
|
+
- name: customer_id
|
|
307
|
+
type: string
|
|
308
|
+
primary_key: true
|
|
309
|
+
nullable: false
|
|
310
|
+
description: Conformed customer key.
|
|
311
|
+
tags: [IDENTIFIER]
|
|
312
|
+
- name: email
|
|
313
|
+
type: string
|
|
314
|
+
nullable: false
|
|
315
|
+
description: Customer email used by lifecycle reporting.
|
|
316
|
+
tags: [PII, CONTACT]
|
|
317
|
+
sensitivity: restricted
|
|
318
|
+
- name: customer_tier
|
|
319
|
+
type: string
|
|
320
|
+
nullable: false
|
|
321
|
+
description: Normalized customer segment derived from source events.
|
|
322
|
+
tags: [SEGMENT]
|
|
323
|
+
|
|
324
|
+
- name: OrderFact
|
|
325
|
+
type: table
|
|
326
|
+
description: Atomic order-level fact table for finance and growth analytics.
|
|
327
|
+
tags: [SILVER, FACT, ORDER]
|
|
328
|
+
schema: analytics
|
|
329
|
+
subject_area: order_domain
|
|
330
|
+
owner: analytics-engineering@example.com
|
|
331
|
+
grain: [order_id]
|
|
332
|
+
sla:
|
|
333
|
+
freshness: 4h
|
|
334
|
+
quality_score: 99
|
|
335
|
+
fields:
|
|
336
|
+
- name: order_id
|
|
337
|
+
type: string
|
|
338
|
+
primary_key: true
|
|
339
|
+
nullable: false
|
|
340
|
+
description: Unique order key.
|
|
341
|
+
tags: [IDENTIFIER]
|
|
342
|
+
- name: customer_id
|
|
343
|
+
type: string
|
|
344
|
+
nullable: false
|
|
345
|
+
foreign_key: true
|
|
346
|
+
description: Foreign key to customer dimension.
|
|
347
|
+
tags: [JOIN_KEY]
|
|
348
|
+
- name: order_date
|
|
349
|
+
type: date
|
|
350
|
+
nullable: false
|
|
351
|
+
description: Business order date used for reporting grain.
|
|
352
|
+
tags: [REPORTING_DATE]
|
|
353
|
+
- name: net_revenue
|
|
354
|
+
type: decimal(12,2)
|
|
355
|
+
nullable: false
|
|
356
|
+
description: Revenue after discount normalization.
|
|
357
|
+
tags: [AMOUNT, FINANCE]
|
|
358
|
+
- name: order_status
|
|
359
|
+
type: string
|
|
360
|
+
nullable: false
|
|
361
|
+
description: Standardized business order status.
|
|
362
|
+
tags: [STATUS]
|
|
363
|
+
|
|
364
|
+
relationships:
|
|
365
|
+
- name: order_fact_customer_dim
|
|
366
|
+
from: OrderFact.customer_id
|
|
367
|
+
to: CustomerDim.customer_id
|
|
368
|
+
cardinality: many_to_one
|
|
369
|
+
description: Fact row belongs to one customer.
|
|
370
|
+
|
|
371
|
+
indexes:
|
|
372
|
+
- name: idx_order_fact_order_date
|
|
373
|
+
entity: OrderFact
|
|
374
|
+
fields: [order_date]
|
|
375
|
+
- name: idx_order_fact_customer_id
|
|
376
|
+
entity: OrderFact
|
|
377
|
+
fields: [customer_id]
|
|
378
|
+
|
|
379
|
+
governance:
|
|
380
|
+
classification:
|
|
381
|
+
CustomerDim.email: PII
|
|
382
|
+
stewards:
|
|
383
|
+
customer_domain: analytics-engineering@example.com
|
|
384
|
+
order_domain: analytics-engineering@example.com
|
|
385
|
+
retention:
|
|
386
|
+
period: 5y
|
|
387
|
+
policy: transformed_contract
|
|
388
|
+
|
|
389
|
+
glossary:
|
|
390
|
+
- term: Order Fact
|
|
391
|
+
definition: One row per order after transformation and standardization.
|
|
392
|
+
owner: analytics-engineering@example.com
|
|
393
|
+
related_fields:
|
|
394
|
+
- OrderFact.order_id
|
|
395
|
+
- OrderFact.net_revenue
|
|
396
|
+
tags: [FACT]
|
|
397
|
+
|
|
398
|
+
rules:
|
|
399
|
+
- name: order_fact_revenue_non_negative
|
|
400
|
+
target: OrderFact.net_revenue
|
|
401
|
+
expression: "value >= 0"
|
|
402
|
+
severity: error
|
|
403
|
+
"""
|
|
404
|
+
|
|
405
|
+
END_TO_END_REPORT = """model:
|
|
406
|
+
name: commerce_reporting
|
|
407
|
+
spec_version: 2
|
|
408
|
+
version: 1.0.0
|
|
409
|
+
domain: sales
|
|
410
|
+
owners:
|
|
411
|
+
- bi-team@example.com
|
|
412
|
+
state: draft
|
|
413
|
+
layer: report
|
|
414
|
+
description: Reporting layer metric contracts and dictionary-ready semantic views.
|
|
415
|
+
imports:
|
|
416
|
+
- model: commerce_transform
|
|
417
|
+
alias: tr
|
|
418
|
+
path: ../transform/commerce_transform.model.yaml
|
|
419
|
+
|
|
420
|
+
entities:
|
|
421
|
+
- name: DailyRevenueMetric
|
|
422
|
+
type: view
|
|
423
|
+
description: Daily revenue KPI contract used by executive dashboards.
|
|
424
|
+
tags: [GOLD, METRIC, KPI, REPORTING]
|
|
425
|
+
schema: reporting
|
|
426
|
+
subject_area: executive_kpis
|
|
427
|
+
owner: bi-team@example.com
|
|
428
|
+
grain: [metric_date]
|
|
429
|
+
sla:
|
|
430
|
+
freshness: 24h
|
|
431
|
+
quality_score: 99
|
|
432
|
+
fields:
|
|
433
|
+
- name: metric_date
|
|
434
|
+
type: date
|
|
435
|
+
nullable: false
|
|
436
|
+
description: Daily reporting grain for KPI trend lines.
|
|
437
|
+
tags: [GRAIN, REPORTING_DATE]
|
|
438
|
+
- name: gross_revenue
|
|
439
|
+
type: decimal(12,2)
|
|
440
|
+
nullable: false
|
|
441
|
+
computed: true
|
|
442
|
+
computed_expression: "SUM(OrderFact.net_revenue)"
|
|
443
|
+
description: Sum of net revenue at daily grain.
|
|
444
|
+
tags: [METRIC, FINANCE]
|
|
445
|
+
- name: order_count
|
|
446
|
+
type: integer
|
|
447
|
+
nullable: false
|
|
448
|
+
computed: true
|
|
449
|
+
computed_expression: "COUNT_DISTINCT(OrderFact.order_id)"
|
|
450
|
+
description: Distinct order count at daily grain.
|
|
451
|
+
tags: [METRIC, VOLUME]
|
|
452
|
+
- name: avg_order_value
|
|
453
|
+
type: decimal(12,2)
|
|
454
|
+
nullable: false
|
|
455
|
+
computed: true
|
|
456
|
+
computed_expression: "gross_revenue / NULLIF(order_count, 0)"
|
|
457
|
+
description: Average order value derived from daily metrics.
|
|
458
|
+
tags: [METRIC, FINANCE]
|
|
459
|
+
|
|
460
|
+
- name: CustomerRevenueMetric
|
|
461
|
+
type: view
|
|
462
|
+
description: Customer-level revenue KPI contract for retention analysis.
|
|
463
|
+
tags: [GOLD, METRIC, CUSTOMER]
|
|
464
|
+
schema: reporting
|
|
465
|
+
subject_area: customer_kpis
|
|
466
|
+
owner: bi-team@example.com
|
|
467
|
+
grain: [customer_id, report_month]
|
|
468
|
+
sla:
|
|
469
|
+
freshness: 24h
|
|
470
|
+
quality_score: 99
|
|
471
|
+
fields:
|
|
472
|
+
- name: customer_id
|
|
473
|
+
type: string
|
|
474
|
+
nullable: false
|
|
475
|
+
description: Customer identifier for customer KPI cuts.
|
|
476
|
+
tags: [DIMENSION, IDENTIFIER]
|
|
477
|
+
- name: report_month
|
|
478
|
+
type: date
|
|
479
|
+
nullable: false
|
|
480
|
+
description: Monthly reporting period for customer metrics.
|
|
481
|
+
tags: [GRAIN]
|
|
482
|
+
- name: customer_revenue
|
|
483
|
+
type: decimal(12,2)
|
|
484
|
+
nullable: false
|
|
485
|
+
computed: true
|
|
486
|
+
computed_expression: "SUM(OrderFact.net_revenue)"
|
|
487
|
+
description: Total monthly customer revenue.
|
|
488
|
+
tags: [METRIC, FINANCE]
|
|
489
|
+
- name: active_order_count
|
|
490
|
+
type: integer
|
|
491
|
+
nullable: false
|
|
492
|
+
computed: true
|
|
493
|
+
computed_expression: "COUNT_DISTINCT(OrderFact.order_id)"
|
|
494
|
+
description: Distinct active orders for the customer period.
|
|
495
|
+
tags: [METRIC]
|
|
496
|
+
|
|
497
|
+
indexes:
|
|
498
|
+
- name: idx_daily_revenue_metric_date
|
|
499
|
+
entity: DailyRevenueMetric
|
|
500
|
+
fields: [metric_date]
|
|
501
|
+
- name: idx_customer_revenue_metric_customer
|
|
502
|
+
entity: CustomerRevenueMetric
|
|
503
|
+
fields: [customer_id]
|
|
504
|
+
|
|
505
|
+
governance:
|
|
506
|
+
classification:
|
|
507
|
+
CustomerRevenueMetric.customer_id: INTERNAL
|
|
508
|
+
stewards:
|
|
509
|
+
executive_kpis: bi-team@example.com
|
|
510
|
+
customer_kpis: bi-team@example.com
|
|
511
|
+
retention:
|
|
512
|
+
period: 7y
|
|
513
|
+
policy: reporting_contract
|
|
514
|
+
|
|
515
|
+
glossary:
|
|
516
|
+
- term: Gross Revenue
|
|
517
|
+
abbreviation: GR
|
|
518
|
+
definition: Sum of net revenue values over the reporting grain.
|
|
519
|
+
owner: bi-team@example.com
|
|
520
|
+
related_fields:
|
|
521
|
+
- DailyRevenueMetric.gross_revenue
|
|
522
|
+
tags: [KPI, FINANCE]
|
|
523
|
+
- term: Average Order Value
|
|
524
|
+
abbreviation: AOV
|
|
525
|
+
definition: Gross revenue divided by distinct order count for the period.
|
|
526
|
+
owner: bi-team@example.com
|
|
527
|
+
related_fields:
|
|
528
|
+
- DailyRevenueMetric.avg_order_value
|
|
529
|
+
tags: [KPI, COMMERCE]
|
|
530
|
+
- term: Customer Revenue
|
|
531
|
+
definition: Total revenue attributed to a customer within report_month.
|
|
532
|
+
owner: bi-team@example.com
|
|
533
|
+
related_fields:
|
|
534
|
+
- CustomerRevenueMetric.customer_revenue
|
|
535
|
+
tags: [KPI, CUSTOMER]
|
|
536
|
+
|
|
537
|
+
rules:
|
|
538
|
+
- name: gross_revenue_non_negative
|
|
539
|
+
target: DailyRevenueMetric.gross_revenue
|
|
540
|
+
expression: "value >= 0"
|
|
541
|
+
severity: error
|
|
542
|
+
- name: order_count_non_negative
|
|
543
|
+
target: DailyRevenueMetric.order_count
|
|
544
|
+
expression: "value >= 0"
|
|
545
|
+
severity: error
|
|
546
|
+
- name: customer_revenue_non_negative
|
|
547
|
+
target: CustomerRevenueMetric.customer_revenue
|
|
548
|
+
expression: "value >= 0"
|
|
549
|
+
severity: error
|
|
550
|
+
|
|
551
|
+
metrics:
|
|
552
|
+
- name: daily_gross_revenue
|
|
553
|
+
entity: DailyRevenueMetric
|
|
554
|
+
description: Daily gross revenue KPI for executive reporting.
|
|
555
|
+
expression: gross_revenue
|
|
556
|
+
aggregation: sum
|
|
557
|
+
grain: [metric_date]
|
|
558
|
+
dimensions: [metric_date]
|
|
559
|
+
time_dimension: metric_date
|
|
560
|
+
owner: bi-team@example.com
|
|
561
|
+
tags: [KPI, METRIC, FINANCE]
|
|
562
|
+
- name: daily_order_count
|
|
563
|
+
entity: DailyRevenueMetric
|
|
564
|
+
description: Daily distinct order count.
|
|
565
|
+
expression: order_count
|
|
566
|
+
aggregation: count_distinct
|
|
567
|
+
grain: [metric_date]
|
|
568
|
+
dimensions: [metric_date]
|
|
569
|
+
time_dimension: metric_date
|
|
570
|
+
owner: bi-team@example.com
|
|
571
|
+
tags: [KPI, METRIC, VOLUME]
|
|
572
|
+
- name: monthly_customer_revenue
|
|
573
|
+
entity: CustomerRevenueMetric
|
|
574
|
+
description: Monthly revenue by customer.
|
|
575
|
+
expression: customer_revenue
|
|
576
|
+
aggregation: sum
|
|
577
|
+
grain: [customer_id, report_month]
|
|
578
|
+
dimensions: [customer_id]
|
|
579
|
+
time_dimension: report_month
|
|
580
|
+
owner: bi-team@example.com
|
|
581
|
+
tags: [KPI, METRIC, CUSTOMER]
|
|
582
|
+
|
|
583
|
+
display:
|
|
584
|
+
sections:
|
|
585
|
+
- name: Executive KPIs
|
|
586
|
+
entities: [DailyRevenueMetric]
|
|
587
|
+
- name: Customer KPIs
|
|
588
|
+
entities: [CustomerRevenueMetric]
|
|
589
|
+
"""
|
|
590
|
+
|
|
591
|
+
END_TO_END_POLICY = """pack:
|
|
592
|
+
name: end_to_end_dictionary
|
|
593
|
+
version: 1.0.0
|
|
594
|
+
description: Strict policy profile for end-to-end modeling + dictionary-first projects.
|
|
595
|
+
extends: strict.policy.yaml
|
|
596
|
+
|
|
597
|
+
policies:
|
|
598
|
+
- id: REQUIRE_MODEL_GOVERNANCE
|
|
599
|
+
type: custom_expression
|
|
600
|
+
severity: error
|
|
601
|
+
params:
|
|
602
|
+
scope: model
|
|
603
|
+
expression: "has_governance"
|
|
604
|
+
message: "Model '{name}' must define governance metadata."
|
|
605
|
+
|
|
606
|
+
- id: REQUIRE_MODEL_GLOSSARY
|
|
607
|
+
type: custom_expression
|
|
608
|
+
severity: error
|
|
609
|
+
params:
|
|
610
|
+
scope: model
|
|
611
|
+
expression: "has_glossary"
|
|
612
|
+
message: "Model '{name}' must define glossary terms for dictionary coverage."
|
|
613
|
+
|
|
614
|
+
- id: REQUIRE_MODEL_RULES
|
|
615
|
+
type: custom_expression
|
|
616
|
+
severity: error
|
|
617
|
+
params:
|
|
618
|
+
scope: model
|
|
619
|
+
expression: "has_rules"
|
|
620
|
+
message: "Model '{name}' must define rules for business logic checks."
|
|
621
|
+
|
|
622
|
+
- id: REQUIRE_REPORT_LAYER_METRICS
|
|
623
|
+
type: custom_expression
|
|
624
|
+
severity: error
|
|
625
|
+
params:
|
|
626
|
+
scope: model
|
|
627
|
+
expression: "layer != 'report' or has_metrics"
|
|
628
|
+
message: "Report layer model '{name}' must define metrics."
|
|
629
|
+
|
|
630
|
+
- id: REQUIRE_ENTITY_SUBJECT_AREA
|
|
631
|
+
type: custom_expression
|
|
632
|
+
severity: error
|
|
633
|
+
params:
|
|
634
|
+
scope: entity
|
|
635
|
+
expression: "subject_area != ''"
|
|
636
|
+
message: "Entity '{name}' must define subject_area for dictionary organization."
|
|
637
|
+
|
|
638
|
+
- id: REQUIRE_ENTITY_DESCRIPTION
|
|
639
|
+
type: custom_expression
|
|
640
|
+
severity: error
|
|
641
|
+
params:
|
|
642
|
+
scope: entity
|
|
643
|
+
expression: "has_description"
|
|
644
|
+
message: "Entity '{name}' must include a description."
|
|
645
|
+
|
|
646
|
+
- id: REQUIRE_FIELD_DESCRIPTION
|
|
647
|
+
type: custom_expression
|
|
648
|
+
severity: error
|
|
649
|
+
params:
|
|
650
|
+
scope: field
|
|
651
|
+
expression: "primary_key or has_description"
|
|
652
|
+
message: "Field '{name}' must include a description unless it is a primary key."
|
|
653
|
+
|
|
654
|
+
- id: REQUIRE_FIELD_TAGS
|
|
655
|
+
type: custom_expression
|
|
656
|
+
severity: error
|
|
657
|
+
params:
|
|
658
|
+
scope: field
|
|
659
|
+
expression: "primary_key or tags != []"
|
|
660
|
+
message: "Field '{name}' must include at least one tag unless it is a primary key."
|
|
661
|
+
"""
|
|
662
|
+
|
|
663
|
+
END_TO_END_DICTIONARY_README = """# End-to-End Dictionary Workflow
|
|
664
|
+
|
|
665
|
+
This project is scaffolded to keep architecture, transformation logic, reporting metrics,
|
|
666
|
+
and business dictionary metadata in one programmable YAML system.
|
|
667
|
+
|
|
668
|
+
## Layers
|
|
669
|
+
|
|
670
|
+
1. `models/source/`:
|
|
671
|
+
- Physical source contracts (warehouse/raw systems).
|
|
672
|
+
2. `models/transform/`:
|
|
673
|
+
- Business-conformed entities and relationships.
|
|
674
|
+
3. `models/report/`:
|
|
675
|
+
- Reporting semantic contracts and KPI-focused glossary terms.
|
|
676
|
+
|
|
677
|
+
## Required Sections Per Model
|
|
678
|
+
|
|
679
|
+
1. `model` metadata (`name`, `version`, `owners`, `state`, `description`).
|
|
680
|
+
2. `entities` with field-level descriptions and tags.
|
|
681
|
+
3. `grain` in transform/report entities.
|
|
682
|
+
4. `governance` classification/stewardship metadata.
|
|
683
|
+
5. `glossary` terms for dictionary clarity.
|
|
684
|
+
6. `rules` for enforceable business logic.
|
|
685
|
+
7. `metrics` in report models for KPI contracts.
|
|
686
|
+
|
|
687
|
+
## Mandatory Validation Flow
|
|
688
|
+
|
|
689
|
+
```bash
|
|
690
|
+
datalex validate-all --glob "models/**/*.model.yaml"
|
|
691
|
+
datalex policy-check models/source/source_sales_raw.model.yaml --policy policies/end_to_end_dictionary.policy.yaml --inherit
|
|
692
|
+
datalex policy-check models/transform/commerce_transform.model.yaml --policy policies/end_to_end_dictionary.policy.yaml --inherit
|
|
693
|
+
datalex policy-check models/report/commerce_reporting.model.yaml --policy policies/end_to_end_dictionary.policy.yaml --inherit
|
|
694
|
+
datalex resolve-project models
|
|
695
|
+
datalex generate docs models/report/commerce_reporting.model.yaml --format html --out docs/dictionary/reporting-dictionary.html
|
|
696
|
+
```
|
|
697
|
+
"""
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
def _default_schema_path() -> str:
|
|
701
|
+
return str(Path.cwd() / "schemas" / "model.schema.json")
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def _default_policy_schema_path() -> str:
|
|
705
|
+
return str(Path.cwd() / "schemas" / "policy.schema.json")
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
def _default_policy_path() -> str:
|
|
709
|
+
return str(Path.cwd() / "policies" / "default.policy.yaml")
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def _print_issues(issues: List[Issue]) -> None:
|
|
713
|
+
if not issues:
|
|
714
|
+
print("No issues found.")
|
|
715
|
+
return
|
|
716
|
+
for line in to_lines(issues):
|
|
717
|
+
print(line)
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def _combined_issues(model: Dict[str, Any], schema: Dict[str, Any]) -> List[Issue]:
|
|
721
|
+
issues = schema_issues(model, schema)
|
|
722
|
+
issues.extend(lint_issues(model))
|
|
723
|
+
return issues
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def _normalize_host_and_port(host: str, port: int) -> Tuple[str, int]:
|
|
727
|
+
"""Accept URL-ish host input and normalize it to hostname + port."""
|
|
728
|
+
clean_host = (host or "").strip()
|
|
729
|
+
clean_port = port or 0
|
|
730
|
+
if not clean_host:
|
|
731
|
+
return "", clean_port
|
|
732
|
+
|
|
733
|
+
target = clean_host if "://" in clean_host else f"//{clean_host}"
|
|
734
|
+
parsed = urlparse(target)
|
|
735
|
+
normalized_host = parsed.hostname or clean_host.split("/", 1)[0].strip()
|
|
736
|
+
|
|
737
|
+
parsed_port = 0
|
|
738
|
+
try:
|
|
739
|
+
parsed_port = parsed.port or 0
|
|
740
|
+
except ValueError:
|
|
741
|
+
parsed_port = 0
|
|
742
|
+
|
|
743
|
+
if not clean_port and parsed_port:
|
|
744
|
+
clean_port = parsed_port
|
|
745
|
+
|
|
746
|
+
return normalized_host, clean_port
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
def _sanitize_model_file_stem(model_name: str) -> str:
|
|
750
|
+
stem = (model_name or "imported_model").strip() or "imported_model"
|
|
751
|
+
for ch in ("/", "\\", " ", ":", ";"):
|
|
752
|
+
stem = stem.replace(ch, "_")
|
|
753
|
+
return stem
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
def _should_create_directory(path: Path) -> bool:
|
|
757
|
+
if sys.stdin.isatty():
|
|
758
|
+
answer = input(f'Project folder "{path}" does not exist. Create it? [y/N]: ').strip().lower()
|
|
759
|
+
return answer in {"y", "yes"}
|
|
760
|
+
return False
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def _resolve_pull_output_path(args: argparse.Namespace, model_name: str) -> Tuple[bool, str]:
|
|
764
|
+
project_dir_raw = getattr(args, "project_dir", "") or ""
|
|
765
|
+
out_raw = getattr(args, "out", "") or ""
|
|
766
|
+
create_project_dir = bool(getattr(args, "create_project_dir", False))
|
|
767
|
+
|
|
768
|
+
if not project_dir_raw:
|
|
769
|
+
return True, out_raw
|
|
770
|
+
|
|
771
|
+
project_dir = Path(project_dir_raw).expanduser()
|
|
772
|
+
if project_dir.exists() and not project_dir.is_dir():
|
|
773
|
+
return False, f"Project folder is not a directory: {project_dir}"
|
|
774
|
+
if not project_dir.exists():
|
|
775
|
+
if create_project_dir or _should_create_directory(project_dir):
|
|
776
|
+
project_dir.mkdir(parents=True, exist_ok=True)
|
|
777
|
+
else:
|
|
778
|
+
if not sys.stdin.isatty():
|
|
779
|
+
return False, (
|
|
780
|
+
f"Project folder does not exist: {project_dir}. "
|
|
781
|
+
f"Re-run with --create-project-dir to create it."
|
|
782
|
+
)
|
|
783
|
+
return False, f"Aborted: project folder not created: {project_dir}"
|
|
784
|
+
|
|
785
|
+
if out_raw:
|
|
786
|
+
out_path = Path(out_raw)
|
|
787
|
+
if out_path.is_absolute():
|
|
788
|
+
return False, "--out must be a relative filename/path when used with --project-dir"
|
|
789
|
+
return True, str(project_dir / out_path)
|
|
790
|
+
|
|
791
|
+
file_name = f"{_sanitize_model_file_stem(model_name)}.model.yaml"
|
|
792
|
+
return True, str(project_dir / file_name)
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
def _validate_model_file(model_path: str, schema: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Issue]]:
|
|
796
|
+
model = load_yaml_model(model_path)
|
|
797
|
+
issues = _combined_issues(model, schema)
|
|
798
|
+
return model, issues
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
def _print_issue_block(prefix: str, issues: List[Issue]) -> None:
|
|
802
|
+
if not issues:
|
|
803
|
+
print(f"{prefix}: No issues found.")
|
|
804
|
+
return
|
|
805
|
+
print(f"{prefix}:")
|
|
806
|
+
for line in to_lines(issues):
|
|
807
|
+
print(f" {line}")
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
def _issues_as_json(issues: List[Issue]) -> List[Dict[str, str]]:
|
|
811
|
+
return [
|
|
812
|
+
{
|
|
813
|
+
"severity": issue.severity,
|
|
814
|
+
"code": issue.code,
|
|
815
|
+
"message": issue.message,
|
|
816
|
+
"path": issue.path,
|
|
817
|
+
}
|
|
818
|
+
for issue in issues
|
|
819
|
+
]
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
def _write_yaml(path: str, payload: Dict[str, Any]) -> None:
|
|
823
|
+
output = yaml.safe_dump(payload, sort_keys=False)
|
|
824
|
+
Path(path).write_text(output, encoding="utf-8")
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
def _print_or_write_yaml(payload: Dict[str, Any], out: str = "") -> None:
|
|
828
|
+
output = yaml.safe_dump(payload, sort_keys=False, default_flow_style=False, allow_unicode=True)
|
|
829
|
+
if out:
|
|
830
|
+
Path(out).write_text(output, encoding="utf-8")
|
|
831
|
+
print(f"Wrote model: {out}")
|
|
832
|
+
else:
|
|
833
|
+
print(output)
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
def _init_schemas_and_policies(root: Path) -> List[Path]:
|
|
837
|
+
"""Copy schema and policy files into the workspace. Returns list of created paths."""
|
|
838
|
+
created = []
|
|
839
|
+
(root / "schemas").mkdir(parents=True, exist_ok=True)
|
|
840
|
+
(root / "policies").mkdir(parents=True, exist_ok=True)
|
|
841
|
+
|
|
842
|
+
schema_dst = root / "schemas" / "model.schema.json"
|
|
843
|
+
policy_schema_dst = root / "schemas" / "policy.schema.json"
|
|
844
|
+
default_policy_dst = root / "policies" / "default.policy.yaml"
|
|
845
|
+
strict_policy_dst = root / "policies" / "strict.policy.yaml"
|
|
846
|
+
|
|
847
|
+
if not schema_dst.exists():
|
|
848
|
+
repo_schema = Path.cwd() / "schemas" / "model.schema.json"
|
|
849
|
+
if repo_schema.exists():
|
|
850
|
+
schema_dst.write_text(repo_schema.read_text(encoding="utf-8"), encoding="utf-8")
|
|
851
|
+
else:
|
|
852
|
+
schema_dst.write_text("{}", encoding="utf-8")
|
|
853
|
+
created.append(schema_dst)
|
|
854
|
+
|
|
855
|
+
if not policy_schema_dst.exists():
|
|
856
|
+
repo_policy_schema = Path.cwd() / "schemas" / "policy.schema.json"
|
|
857
|
+
if repo_policy_schema.exists():
|
|
858
|
+
policy_schema_dst.write_text(
|
|
859
|
+
repo_policy_schema.read_text(encoding="utf-8"), encoding="utf-8"
|
|
860
|
+
)
|
|
861
|
+
else:
|
|
862
|
+
policy_schema_dst.write_text("{}", encoding="utf-8")
|
|
863
|
+
created.append(policy_schema_dst)
|
|
864
|
+
|
|
865
|
+
repo_policy_dir = Path.cwd() / "policies"
|
|
866
|
+
if not default_policy_dst.exists():
|
|
867
|
+
repo_default = repo_policy_dir / "default.policy.yaml"
|
|
868
|
+
if repo_default.exists():
|
|
869
|
+
default_policy_dst.write_text(repo_default.read_text(encoding="utf-8"), encoding="utf-8")
|
|
870
|
+
created.append(default_policy_dst)
|
|
871
|
+
|
|
872
|
+
if not strict_policy_dst.exists():
|
|
873
|
+
repo_strict = repo_policy_dir / "strict.policy.yaml"
|
|
874
|
+
if repo_strict.exists():
|
|
875
|
+
strict_policy_dst.write_text(repo_strict.read_text(encoding="utf-8"), encoding="utf-8")
|
|
876
|
+
created.append(strict_policy_dst)
|
|
877
|
+
|
|
878
|
+
return created
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
def cmd_init(args: argparse.Namespace) -> int:
|
|
882
|
+
root = Path(args.path).resolve()
|
|
883
|
+
created = _init_schemas_and_policies(root)
|
|
884
|
+
|
|
885
|
+
template = args.template
|
|
886
|
+
if args.multi_model:
|
|
887
|
+
if template not in {"single", "multi-model"}:
|
|
888
|
+
print(
|
|
889
|
+
"Init failed: --multi-model cannot be combined with --template end-to-end.",
|
|
890
|
+
file=sys.stderr,
|
|
891
|
+
)
|
|
892
|
+
return 1
|
|
893
|
+
template = "multi-model"
|
|
894
|
+
|
|
895
|
+
if template == "multi-model":
|
|
896
|
+
# Multi-model project structure
|
|
897
|
+
models_dir = root / "models"
|
|
898
|
+
(models_dir / "shared").mkdir(parents=True, exist_ok=True)
|
|
899
|
+
(models_dir / "orders").mkdir(parents=True, exist_ok=True)
|
|
900
|
+
|
|
901
|
+
shared_dst = models_dir / "shared" / "shared_dimensions.model.yaml"
|
|
902
|
+
orders_dst = models_dir / "orders" / "orders.model.yaml"
|
|
903
|
+
config_dst = root / "dm.config.yaml"
|
|
904
|
+
|
|
905
|
+
if not shared_dst.exists():
|
|
906
|
+
shared_dst.write_text(MULTI_MODEL_SHARED, encoding="utf-8")
|
|
907
|
+
created.append(shared_dst)
|
|
908
|
+
|
|
909
|
+
if not orders_dst.exists():
|
|
910
|
+
orders_dst.write_text(MULTI_MODEL_ORDERS, encoding="utf-8")
|
|
911
|
+
created.append(orders_dst)
|
|
912
|
+
|
|
913
|
+
if not config_dst.exists():
|
|
914
|
+
config_dst.write_text(
|
|
915
|
+
"schema: schemas/model.schema.json\n"
|
|
916
|
+
"policy_schema: schemas/policy.schema.json\n"
|
|
917
|
+
"policy_pack: policies/default.policy.yaml\n"
|
|
918
|
+
"model_glob: \"models/**/*.model.yaml\"\n"
|
|
919
|
+
"multi_model: true\n"
|
|
920
|
+
"search_dirs:\n"
|
|
921
|
+
" - models/shared\n"
|
|
922
|
+
" - models/orders\n",
|
|
923
|
+
encoding="utf-8",
|
|
924
|
+
)
|
|
925
|
+
created.append(config_dst)
|
|
926
|
+
|
|
927
|
+
print(f"Initialized multi-model workspace at {root}")
|
|
928
|
+
elif template == "end-to-end":
|
|
929
|
+
models_dir = root / "models"
|
|
930
|
+
(models_dir / "source").mkdir(parents=True, exist_ok=True)
|
|
931
|
+
(models_dir / "transform").mkdir(parents=True, exist_ok=True)
|
|
932
|
+
(models_dir / "report").mkdir(parents=True, exist_ok=True)
|
|
933
|
+
(root / "docs" / "dictionary").mkdir(parents=True, exist_ok=True)
|
|
934
|
+
|
|
935
|
+
source_dst = models_dir / "source" / "source_sales_raw.model.yaml"
|
|
936
|
+
transform_dst = models_dir / "transform" / "commerce_transform.model.yaml"
|
|
937
|
+
report_dst = models_dir / "report" / "commerce_reporting.model.yaml"
|
|
938
|
+
dictionary_readme_dst = root / "docs" / "dictionary" / "README.md"
|
|
939
|
+
end_to_end_policy_dst = root / "policies" / "end_to_end_dictionary.policy.yaml"
|
|
940
|
+
config_dst = root / "dm.config.yaml"
|
|
941
|
+
|
|
942
|
+
if not source_dst.exists():
|
|
943
|
+
source_dst.write_text(END_TO_END_SOURCE, encoding="utf-8")
|
|
944
|
+
created.append(source_dst)
|
|
945
|
+
|
|
946
|
+
if not transform_dst.exists():
|
|
947
|
+
transform_dst.write_text(END_TO_END_TRANSFORM, encoding="utf-8")
|
|
948
|
+
created.append(transform_dst)
|
|
949
|
+
|
|
950
|
+
if not report_dst.exists():
|
|
951
|
+
report_dst.write_text(END_TO_END_REPORT, encoding="utf-8")
|
|
952
|
+
created.append(report_dst)
|
|
953
|
+
|
|
954
|
+
if not dictionary_readme_dst.exists():
|
|
955
|
+
dictionary_readme_dst.write_text(END_TO_END_DICTIONARY_README, encoding="utf-8")
|
|
956
|
+
created.append(dictionary_readme_dst)
|
|
957
|
+
|
|
958
|
+
if not end_to_end_policy_dst.exists():
|
|
959
|
+
end_to_end_policy_dst.write_text(END_TO_END_POLICY, encoding="utf-8")
|
|
960
|
+
created.append(end_to_end_policy_dst)
|
|
961
|
+
|
|
962
|
+
if not config_dst.exists():
|
|
963
|
+
config_dst.write_text(
|
|
964
|
+
"schema: schemas/model.schema.json\n"
|
|
965
|
+
"policy_schema: schemas/policy.schema.json\n"
|
|
966
|
+
"policy_pack: policies/end_to_end_dictionary.policy.yaml\n"
|
|
967
|
+
"model_glob: \"models/**/*.model.yaml\"\n"
|
|
968
|
+
"multi_model: true\n"
|
|
969
|
+
"search_dirs:\n"
|
|
970
|
+
" - models/source\n"
|
|
971
|
+
" - models/transform\n"
|
|
972
|
+
" - models/report\n",
|
|
973
|
+
encoding="utf-8",
|
|
974
|
+
)
|
|
975
|
+
created.append(config_dst)
|
|
976
|
+
|
|
977
|
+
print(f"Initialized end-to-end modeling workspace at {root}")
|
|
978
|
+
else:
|
|
979
|
+
# Single-model project structure
|
|
980
|
+
(root / "model-examples").mkdir(parents=True, exist_ok=True)
|
|
981
|
+
sample_dst = root / "model-examples" / "starter.model.yaml"
|
|
982
|
+
config_dst = root / "dm.config.yaml"
|
|
983
|
+
|
|
984
|
+
if not sample_dst.exists():
|
|
985
|
+
sample_dst.write_text(STARTER_MODEL, encoding="utf-8")
|
|
986
|
+
created.append(sample_dst)
|
|
987
|
+
|
|
988
|
+
if not config_dst.exists():
|
|
989
|
+
config_dst.write_text(
|
|
990
|
+
"schema: schemas/model.schema.json\n"
|
|
991
|
+
"policy_schema: schemas/policy.schema.json\n"
|
|
992
|
+
"policy_pack: policies/default.policy.yaml\n"
|
|
993
|
+
"model_glob: \"**/*.model.yaml\"\n",
|
|
994
|
+
encoding="utf-8",
|
|
995
|
+
)
|
|
996
|
+
created.append(config_dst)
|
|
997
|
+
|
|
998
|
+
print(f"Initialized workspace at {root}")
|
|
999
|
+
|
|
1000
|
+
for path in created:
|
|
1001
|
+
print(f"- {path}")
|
|
1002
|
+
return 0
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
def cmd_validate(args: argparse.Namespace) -> int:
|
|
1006
|
+
schema = load_schema(args.schema)
|
|
1007
|
+
_, issues = _validate_model_file(args.model, schema)
|
|
1008
|
+
_print_issues(issues)
|
|
1009
|
+
return 1 if has_errors(issues) else 0
|
|
1010
|
+
|
|
1011
|
+
|
|
1012
|
+
def cmd_lint(args: argparse.Namespace) -> int:
|
|
1013
|
+
model = load_yaml_model(args.model)
|
|
1014
|
+
issues = lint_issues(model)
|
|
1015
|
+
_print_issues(issues)
|
|
1016
|
+
return 1 if has_errors(issues) else 0
|
|
1017
|
+
|
|
1018
|
+
|
|
1019
|
+
def cmd_compile(args: argparse.Namespace) -> int:
|
|
1020
|
+
schema = load_schema(args.schema)
|
|
1021
|
+
model, issues = _validate_model_file(args.model, schema)
|
|
1022
|
+
if has_errors(issues):
|
|
1023
|
+
_print_issues(issues)
|
|
1024
|
+
return 1
|
|
1025
|
+
|
|
1026
|
+
canonical = compile_model(model)
|
|
1027
|
+
output = json.dumps(canonical, indent=2, sort_keys=False)
|
|
1028
|
+
|
|
1029
|
+
if args.out:
|
|
1030
|
+
Path(args.out).write_text(output + "\n", encoding="utf-8")
|
|
1031
|
+
print(f"Wrote canonical model: {args.out}")
|
|
1032
|
+
else:
|
|
1033
|
+
print(output)
|
|
1034
|
+
|
|
1035
|
+
return 0
|
|
1036
|
+
|
|
1037
|
+
|
|
1038
|
+
def cmd_diff(args: argparse.Namespace) -> int:
|
|
1039
|
+
old_model = load_yaml_model(args.old)
|
|
1040
|
+
new_model = load_yaml_model(args.new)
|
|
1041
|
+
diff = semantic_diff(old_model, new_model)
|
|
1042
|
+
print(json.dumps(diff, indent=2))
|
|
1043
|
+
return 0
|
|
1044
|
+
|
|
1045
|
+
|
|
1046
|
+
def cmd_validate_all(args: argparse.Namespace) -> int:
|
|
1047
|
+
schema = load_schema(args.schema)
|
|
1048
|
+
paths = sorted(
|
|
1049
|
+
{
|
|
1050
|
+
Path(path)
|
|
1051
|
+
for path in glob.glob(args.glob, recursive=True)
|
|
1052
|
+
if Path(path).is_file()
|
|
1053
|
+
}
|
|
1054
|
+
)
|
|
1055
|
+
|
|
1056
|
+
if not paths:
|
|
1057
|
+
print(f"No files matched glob: {args.glob}")
|
|
1058
|
+
return 0
|
|
1059
|
+
|
|
1060
|
+
failing_files = 0
|
|
1061
|
+
for path in paths:
|
|
1062
|
+
if any(path.match(pattern) for pattern in args.exclude):
|
|
1063
|
+
continue
|
|
1064
|
+
|
|
1065
|
+
_, issues = _validate_model_file(str(path), schema)
|
|
1066
|
+
_print_issue_block(str(path), issues)
|
|
1067
|
+
if has_errors(issues):
|
|
1068
|
+
failing_files += 1
|
|
1069
|
+
|
|
1070
|
+
if failing_files:
|
|
1071
|
+
print(f"Validation failed for {failing_files} file(s).")
|
|
1072
|
+
return 1
|
|
1073
|
+
|
|
1074
|
+
print("All model files passed validation.")
|
|
1075
|
+
return 0
|
|
1076
|
+
|
|
1077
|
+
|
|
1078
|
+
def cmd_gate(args: argparse.Namespace) -> int:
|
|
1079
|
+
schema = load_schema(args.schema)
|
|
1080
|
+
|
|
1081
|
+
old_model, old_issues = _validate_model_file(args.old, schema)
|
|
1082
|
+
new_model, new_issues = _validate_model_file(args.new, schema)
|
|
1083
|
+
|
|
1084
|
+
_print_issue_block(f"Old model ({args.old})", old_issues)
|
|
1085
|
+
_print_issue_block(f"New model ({args.new})", new_issues)
|
|
1086
|
+
|
|
1087
|
+
combined_issues = list(old_issues) + list(new_issues)
|
|
1088
|
+
if has_errors(combined_issues):
|
|
1089
|
+
print("Gate failed: model validation errors detected.")
|
|
1090
|
+
return 1
|
|
1091
|
+
|
|
1092
|
+
diff = semantic_diff(old_model, new_model)
|
|
1093
|
+
if args.output_json:
|
|
1094
|
+
print(json.dumps(diff, indent=2))
|
|
1095
|
+
else:
|
|
1096
|
+
summary = diff["summary"]
|
|
1097
|
+
print("Diff summary:")
|
|
1098
|
+
print(
|
|
1099
|
+
f" entities +{summary['added_entities']} -{summary['removed_entities']} "
|
|
1100
|
+
f"changed:{summary['changed_entities']}"
|
|
1101
|
+
)
|
|
1102
|
+
print(
|
|
1103
|
+
f" relationships +{summary['added_relationships']} -{summary['removed_relationships']}"
|
|
1104
|
+
)
|
|
1105
|
+
print(f" metrics +{summary['added_metrics']} -{summary['removed_metrics']} changed:{summary['changed_metrics']}")
|
|
1106
|
+
print(f" breaking changes: {summary['breaking_change_count']}")
|
|
1107
|
+
if diff["breaking_changes"]:
|
|
1108
|
+
print("Breaking changes:")
|
|
1109
|
+
for item in diff["breaking_changes"]:
|
|
1110
|
+
print(f" - {item}")
|
|
1111
|
+
|
|
1112
|
+
if diff["has_breaking_changes"] and not args.allow_breaking:
|
|
1113
|
+
print("Gate failed: breaking changes detected. Use --allow-breaking to bypass.")
|
|
1114
|
+
return 2
|
|
1115
|
+
|
|
1116
|
+
print("Gate passed.")
|
|
1117
|
+
return 0
|
|
1118
|
+
|
|
1119
|
+
|
|
1120
|
+
def cmd_policy_check(args: argparse.Namespace) -> int:
|
|
1121
|
+
schema = load_schema(args.schema)
|
|
1122
|
+
policy_schema = load_schema(args.policy_schema)
|
|
1123
|
+
|
|
1124
|
+
model, model_issues = _validate_model_file(args.model, schema)
|
|
1125
|
+
if getattr(args, "inherit", False):
|
|
1126
|
+
policy_pack = load_policy_pack_with_inheritance(args.policy)
|
|
1127
|
+
else:
|
|
1128
|
+
policy_pack = load_policy_pack(args.policy)
|
|
1129
|
+
policy_pack_issues = schema_issues(policy_pack, policy_schema)
|
|
1130
|
+
|
|
1131
|
+
_print_issue_block(f"Model checks ({args.model})", model_issues)
|
|
1132
|
+
_print_issue_block(f"Policy pack checks ({args.policy})", policy_pack_issues)
|
|
1133
|
+
|
|
1134
|
+
if has_errors(model_issues) or has_errors(policy_pack_issues):
|
|
1135
|
+
print("Policy check failed: validation errors detected before policy evaluation.")
|
|
1136
|
+
return 1
|
|
1137
|
+
|
|
1138
|
+
evaluated_issues = policy_issues(model, policy_pack)
|
|
1139
|
+
_print_issue_block("Policy evaluation", evaluated_issues)
|
|
1140
|
+
|
|
1141
|
+
if args.output_json:
|
|
1142
|
+
payload = {
|
|
1143
|
+
"model": args.model,
|
|
1144
|
+
"policy": args.policy,
|
|
1145
|
+
"summary": {
|
|
1146
|
+
"error_count": len([item for item in evaluated_issues if item.severity == "error"]),
|
|
1147
|
+
"warning_count": len([item for item in evaluated_issues if item.severity == "warn"]),
|
|
1148
|
+
"info_count": len([item for item in evaluated_issues if item.severity == "info"]),
|
|
1149
|
+
},
|
|
1150
|
+
"issues": _issues_as_json(evaluated_issues),
|
|
1151
|
+
}
|
|
1152
|
+
print(json.dumps(payload, indent=2))
|
|
1153
|
+
|
|
1154
|
+
if has_errors(evaluated_issues):
|
|
1155
|
+
print("Policy check failed.")
|
|
1156
|
+
return 1
|
|
1157
|
+
|
|
1158
|
+
print("Policy check passed.")
|
|
1159
|
+
return 0
|
|
1160
|
+
|
|
1161
|
+
|
|
1162
|
+
def cmd_generate_sql(args: argparse.Namespace) -> int:
|
|
1163
|
+
schema = load_schema(args.schema)
|
|
1164
|
+
model, issues = _validate_model_file(args.model, schema)
|
|
1165
|
+
|
|
1166
|
+
if has_errors(issues):
|
|
1167
|
+
_print_issues(issues)
|
|
1168
|
+
return 1
|
|
1169
|
+
|
|
1170
|
+
ddl = generate_sql_ddl(model, dialect=args.dialect)
|
|
1171
|
+
if args.out:
|
|
1172
|
+
Path(args.out).write_text(ddl, encoding="utf-8")
|
|
1173
|
+
print(f"Wrote SQL DDL: {args.out}")
|
|
1174
|
+
else:
|
|
1175
|
+
print(ddl)
|
|
1176
|
+
|
|
1177
|
+
return 0
|
|
1178
|
+
|
|
1179
|
+
|
|
1180
|
+
def cmd_generate_dbt(args: argparse.Namespace) -> int:
|
|
1181
|
+
schema = load_schema(args.schema)
|
|
1182
|
+
model, issues = _validate_model_file(args.model, schema)
|
|
1183
|
+
|
|
1184
|
+
if has_errors(issues):
|
|
1185
|
+
_print_issues(issues)
|
|
1186
|
+
return 1
|
|
1187
|
+
|
|
1188
|
+
created = write_dbt_scaffold(
|
|
1189
|
+
model=model,
|
|
1190
|
+
out_dir=args.out_dir,
|
|
1191
|
+
source_name=args.source_name,
|
|
1192
|
+
project_name=args.project_name,
|
|
1193
|
+
)
|
|
1194
|
+
|
|
1195
|
+
print(f"Created dbt scaffold files ({len(created)}):")
|
|
1196
|
+
for path in created:
|
|
1197
|
+
print(f"- {path}")
|
|
1198
|
+
|
|
1199
|
+
return 0
|
|
1200
|
+
|
|
1201
|
+
|
|
1202
|
+
def cmd_generate_metadata(args: argparse.Namespace) -> int:
|
|
1203
|
+
schema = load_schema(args.schema)
|
|
1204
|
+
model, issues = _validate_model_file(args.model, schema)
|
|
1205
|
+
|
|
1206
|
+
if has_errors(issues):
|
|
1207
|
+
_print_issues(issues)
|
|
1208
|
+
return 1
|
|
1209
|
+
|
|
1210
|
+
canonical = compile_model(model)
|
|
1211
|
+
payload = {
|
|
1212
|
+
"model": canonical.get("model", {}),
|
|
1213
|
+
"summary": {
|
|
1214
|
+
"entity_count": len(canonical.get("entities", [])),
|
|
1215
|
+
"relationship_count": len(canonical.get("relationships", [])),
|
|
1216
|
+
"index_count": len(canonical.get("indexes", [])),
|
|
1217
|
+
"glossary_term_count": len(canonical.get("glossary", [])),
|
|
1218
|
+
"rule_count": len(canonical.get("rules", [])),
|
|
1219
|
+
},
|
|
1220
|
+
"entities": canonical.get("entities", []),
|
|
1221
|
+
"relationships": canonical.get("relationships", []),
|
|
1222
|
+
"indexes": canonical.get("indexes", []),
|
|
1223
|
+
"glossary": canonical.get("glossary", []),
|
|
1224
|
+
"governance": canonical.get("governance", {}),
|
|
1225
|
+
"generated_by": "datalex generate metadata",
|
|
1226
|
+
}
|
|
1227
|
+
output = json.dumps(payload, indent=2)
|
|
1228
|
+
|
|
1229
|
+
if args.out:
|
|
1230
|
+
Path(args.out).write_text(output + "\n", encoding="utf-8")
|
|
1231
|
+
print(f"Wrote metadata export: {args.out}")
|
|
1232
|
+
else:
|
|
1233
|
+
print(output)
|
|
1234
|
+
|
|
1235
|
+
return 0
|
|
1236
|
+
|
|
1237
|
+
|
|
1238
|
+
def cmd_import_sql(args: argparse.Namespace) -> int:
|
|
1239
|
+
ddl_text = Path(args.input).read_text(encoding="utf-8")
|
|
1240
|
+
model = import_sql_ddl(
|
|
1241
|
+
ddl_text=ddl_text,
|
|
1242
|
+
model_name=args.model_name,
|
|
1243
|
+
domain=args.domain,
|
|
1244
|
+
owners=args.owner if args.owner else ["data-team@example.com"],
|
|
1245
|
+
)
|
|
1246
|
+
|
|
1247
|
+
schema = load_schema(args.schema)
|
|
1248
|
+
issues = _combined_issues(model, schema)
|
|
1249
|
+
_print_issue_block("Imported model checks", issues)
|
|
1250
|
+
|
|
1251
|
+
if args.out:
|
|
1252
|
+
_write_yaml(args.out, model)
|
|
1253
|
+
print(f"Wrote imported YAML model: {args.out}")
|
|
1254
|
+
else:
|
|
1255
|
+
print(yaml.safe_dump(model, sort_keys=False))
|
|
1256
|
+
|
|
1257
|
+
return 1 if has_errors(issues) else 0
|
|
1258
|
+
|
|
1259
|
+
|
|
1260
|
+
def cmd_import_dbml(args: argparse.Namespace) -> int:
|
|
1261
|
+
dbml_text = Path(args.input).read_text(encoding="utf-8")
|
|
1262
|
+
model = import_dbml(
|
|
1263
|
+
dbml_text=dbml_text,
|
|
1264
|
+
model_name=args.model_name,
|
|
1265
|
+
domain=args.domain,
|
|
1266
|
+
owners=args.owner if args.owner else ["data-team@example.com"],
|
|
1267
|
+
)
|
|
1268
|
+
|
|
1269
|
+
schema = load_schema(args.schema)
|
|
1270
|
+
issues = _combined_issues(model, schema)
|
|
1271
|
+
_print_issue_block("Imported model checks", issues)
|
|
1272
|
+
|
|
1273
|
+
if args.out:
|
|
1274
|
+
_write_yaml(args.out, model)
|
|
1275
|
+
print(f"Wrote imported YAML model: {args.out}")
|
|
1276
|
+
else:
|
|
1277
|
+
print(yaml.safe_dump(model, sort_keys=False))
|
|
1278
|
+
|
|
1279
|
+
return 1 if has_errors(issues) else 0
|
|
1280
|
+
|
|
1281
|
+
|
|
1282
|
+
def cmd_import_spark_schema(args: argparse.Namespace) -> int:
|
|
1283
|
+
text = Path(args.input).read_text(encoding="utf-8")
|
|
1284
|
+
model = import_spark_schema(
|
|
1285
|
+
schema_text=text,
|
|
1286
|
+
model_name=args.model_name,
|
|
1287
|
+
domain=args.domain,
|
|
1288
|
+
owners=args.owner if args.owner else ["data-team@example.com"],
|
|
1289
|
+
table_name=getattr(args, "table_name", None),
|
|
1290
|
+
)
|
|
1291
|
+
|
|
1292
|
+
schema = load_schema(args.schema)
|
|
1293
|
+
issues = _combined_issues(model, schema)
|
|
1294
|
+
_print_issue_block("Imported model checks", issues)
|
|
1295
|
+
|
|
1296
|
+
if args.out:
|
|
1297
|
+
_write_yaml(args.out, model)
|
|
1298
|
+
print(f"Wrote imported YAML model: {args.out}")
|
|
1299
|
+
else:
|
|
1300
|
+
print(yaml.safe_dump(model, sort_keys=False))
|
|
1301
|
+
|
|
1302
|
+
return 1 if has_errors(issues) else 0
|
|
1303
|
+
|
|
1304
|
+
|
|
1305
|
+
def cmd_import_dbt(args: argparse.Namespace) -> int:
|
|
1306
|
+
schema_text = Path(args.input).read_text(encoding="utf-8")
|
|
1307
|
+
model = import_dbt_schema_yml(
|
|
1308
|
+
schema_yml_text=schema_text,
|
|
1309
|
+
model_name=args.model_name,
|
|
1310
|
+
domain=args.domain,
|
|
1311
|
+
owners=args.owner if args.owner else ["data-team@example.com"],
|
|
1312
|
+
)
|
|
1313
|
+
|
|
1314
|
+
schema = load_schema(args.schema)
|
|
1315
|
+
issues = _combined_issues(model, schema)
|
|
1316
|
+
_print_issue_block("Imported model checks", issues)
|
|
1317
|
+
|
|
1318
|
+
if args.out:
|
|
1319
|
+
_write_yaml(args.out, model)
|
|
1320
|
+
print(f"Wrote imported YAML model: {args.out}")
|
|
1321
|
+
else:
|
|
1322
|
+
print(yaml.safe_dump(model, sort_keys=False))
|
|
1323
|
+
|
|
1324
|
+
return 1 if has_errors(issues) else 0
|
|
1325
|
+
|
|
1326
|
+
|
|
1327
|
+
def cmd_dbt_sync(args: argparse.Namespace) -> int:
|
|
1328
|
+
"""Merge DataLex model metadata into an existing dbt schema.yml (non-destructive)."""
|
|
1329
|
+
model = load_yaml_model(args.model)
|
|
1330
|
+
dbt_schema_path = Path(args.dbt_schema)
|
|
1331
|
+
if not dbt_schema_path.exists():
|
|
1332
|
+
print(f"ERROR: dbt schema file not found: {dbt_schema_path}", file=sys.stderr)
|
|
1333
|
+
return 1
|
|
1334
|
+
existing_yml = dbt_schema_path.read_text(encoding="utf-8")
|
|
1335
|
+
updated_yml = sync_dbt_schema_yml(model, existing_yml)
|
|
1336
|
+
out_path = Path(args.out) if getattr(args, "out", None) else dbt_schema_path
|
|
1337
|
+
out_path.write_text(updated_yml, encoding="utf-8")
|
|
1338
|
+
print(f"dbt schema synced: {out_path}")
|
|
1339
|
+
return 0
|
|
1340
|
+
|
|
1341
|
+
|
|
1342
|
+
def cmd_dbt_push(args: argparse.Namespace) -> int:
|
|
1343
|
+
"""Push DataLex metadata into all schema.yml files found in a dbt project directory."""
|
|
1344
|
+
model = load_yaml_model(args.model)
|
|
1345
|
+
dbt_project_root = Path(args.dbt_project)
|
|
1346
|
+
if not dbt_project_root.is_dir():
|
|
1347
|
+
print(f"ERROR: dbt project directory not found: {dbt_project_root}", file=sys.stderr)
|
|
1348
|
+
return 1
|
|
1349
|
+
yaml_files = list(dbt_project_root.rglob("schema.yml")) + list(dbt_project_root.rglob("schema.yaml"))
|
|
1350
|
+
if not yaml_files:
|
|
1351
|
+
print("No dbt schema.yml files found in project directory.", file=sys.stderr)
|
|
1352
|
+
return 1
|
|
1353
|
+
updated_count = 0
|
|
1354
|
+
for yml_path in sorted(yaml_files):
|
|
1355
|
+
try:
|
|
1356
|
+
existing_yml = yml_path.read_text(encoding="utf-8")
|
|
1357
|
+
updated_yml = sync_dbt_schema_yml(model, existing_yml)
|
|
1358
|
+
yml_path.write_text(updated_yml, encoding="utf-8")
|
|
1359
|
+
print(f" synced: {yml_path}")
|
|
1360
|
+
updated_count += 1
|
|
1361
|
+
except Exception as exc:
|
|
1362
|
+
print(f" WARN: skipping {yml_path}: {exc}", file=sys.stderr)
|
|
1363
|
+
print(f"dbt push complete. Updated {updated_count} schema.yml file(s).")
|
|
1364
|
+
return 0
|
|
1365
|
+
|
|
1366
|
+
|
|
1367
|
+
def _build_connector_extra(args: argparse.Namespace) -> Dict[str, Any]:
|
|
1368
|
+
extra: Dict[str, Any] = {}
|
|
1369
|
+
if getattr(args, "odbc_driver", ""):
|
|
1370
|
+
extra["odbc_driver"] = getattr(args, "odbc_driver")
|
|
1371
|
+
if getattr(args, "encrypt", ""):
|
|
1372
|
+
extra["encrypt"] = getattr(args, "encrypt")
|
|
1373
|
+
if getattr(args, "trust_server_certificate", ""):
|
|
1374
|
+
extra["trust_server_certificate"] = getattr(args, "trust_server_certificate")
|
|
1375
|
+
if getattr(args, "http_path", ""):
|
|
1376
|
+
extra["http_path"] = getattr(args, "http_path")
|
|
1377
|
+
return extra
|
|
1378
|
+
|
|
1379
|
+
|
|
1380
|
+
def cmd_pull(args: argparse.Namespace) -> int:
|
|
1381
|
+
connector_type = args.connector
|
|
1382
|
+
connector = get_connector(connector_type)
|
|
1383
|
+
if connector is None:
|
|
1384
|
+
print(f"Unknown connector: {connector_type}", file=sys.stderr)
|
|
1385
|
+
print(f"Available: {', '.join(c['type'] for c in list_connectors())}", file=sys.stderr)
|
|
1386
|
+
return 1
|
|
1387
|
+
|
|
1388
|
+
ok, msg = connector.check_driver()
|
|
1389
|
+
if not ok:
|
|
1390
|
+
print(f"Driver check failed: {msg}", file=sys.stderr)
|
|
1391
|
+
return 1
|
|
1392
|
+
|
|
1393
|
+
host, port = _normalize_host_and_port(
|
|
1394
|
+
getattr(args, "host", "") or "",
|
|
1395
|
+
getattr(args, "port", 0) or 0,
|
|
1396
|
+
)
|
|
1397
|
+
|
|
1398
|
+
config = ConnectorConfig(
|
|
1399
|
+
connector_type=connector_type,
|
|
1400
|
+
host=host,
|
|
1401
|
+
port=port,
|
|
1402
|
+
database=getattr(args, "database", "") or "",
|
|
1403
|
+
schema=getattr(args, "db_schema", "") or "",
|
|
1404
|
+
user=getattr(args, "user", "") or "",
|
|
1405
|
+
password=getattr(args, "password", "") or "",
|
|
1406
|
+
warehouse=getattr(args, "warehouse", "") or "",
|
|
1407
|
+
project=getattr(args, "project", "") or "",
|
|
1408
|
+
dataset=getattr(args, "dataset", "") or "",
|
|
1409
|
+
catalog=getattr(args, "catalog", "") or "",
|
|
1410
|
+
token=getattr(args, "token", "") or "",
|
|
1411
|
+
private_key_path=getattr(args, "private_key_path", "") or "",
|
|
1412
|
+
model_name=getattr(args, "model_name", "imported_model") or "imported_model",
|
|
1413
|
+
domain=getattr(args, "domain", "imported") or "imported",
|
|
1414
|
+
owners=[getattr(args, "owner", None)] if getattr(args, "owner", None) else None,
|
|
1415
|
+
tables=getattr(args, "tables", None),
|
|
1416
|
+
exclude_tables=getattr(args, "exclude_tables", None),
|
|
1417
|
+
extra=_build_connector_extra(args),
|
|
1418
|
+
)
|
|
1419
|
+
|
|
1420
|
+
if getattr(args, "test", False):
|
|
1421
|
+
ok, msg = connector.test_connection(config)
|
|
1422
|
+
print(f"{'OK' if ok else 'FAIL'}: {msg}")
|
|
1423
|
+
return 0 if ok else 1
|
|
1424
|
+
|
|
1425
|
+
ok_out, output_path_or_error = _resolve_pull_output_path(args, config.model_name)
|
|
1426
|
+
if not ok_out:
|
|
1427
|
+
print(output_path_or_error, file=sys.stderr)
|
|
1428
|
+
return 1
|
|
1429
|
+
|
|
1430
|
+
print(f"Pulling schema from {connector.display_name}...")
|
|
1431
|
+
result = connector.pull_schema(config)
|
|
1432
|
+
|
|
1433
|
+
print(f"\n{result.summary()}")
|
|
1434
|
+
|
|
1435
|
+
if result.warnings:
|
|
1436
|
+
for w in result.warnings:
|
|
1437
|
+
print(f" [WARN] {w}")
|
|
1438
|
+
|
|
1439
|
+
if output_path_or_error:
|
|
1440
|
+
_write_yaml(output_path_or_error, result.model)
|
|
1441
|
+
print(f"\nWrote model: {output_path_or_error}")
|
|
1442
|
+
else:
|
|
1443
|
+
print("\n" + yaml.safe_dump(result.model, sort_keys=False))
|
|
1444
|
+
|
|
1445
|
+
return 0
|
|
1446
|
+
|
|
1447
|
+
|
|
1448
|
+
def cmd_connectors(args: argparse.Namespace) -> int:
|
|
1449
|
+
connectors = list_connectors()
|
|
1450
|
+
if getattr(args, "output_json", False):
|
|
1451
|
+
print(json.dumps(connectors, indent=2))
|
|
1452
|
+
else:
|
|
1453
|
+
print("Available database connectors:\n")
|
|
1454
|
+
for c in connectors:
|
|
1455
|
+
status = "installed" if c["installed"] else "NOT INSTALLED"
|
|
1456
|
+
print(f" {c['type']:12s} {c['name']:30s} driver: {c['driver']:25s} [{status}]")
|
|
1457
|
+
print(
|
|
1458
|
+
"\nUsage: datalex pull <connector> --host <host> --database <db> --user <user> "
|
|
1459
|
+
"--password <pass> [--out model.yaml | --project-dir ./models]"
|
|
1460
|
+
)
|
|
1461
|
+
return 0
|
|
1462
|
+
|
|
1463
|
+
|
|
1464
|
+
def _build_connector_config(args: argparse.Namespace) -> "ConnectorConfig":
|
|
1465
|
+
host, port = _normalize_host_and_port(
|
|
1466
|
+
getattr(args, "host", "") or "",
|
|
1467
|
+
getattr(args, "port", 0) or 0,
|
|
1468
|
+
)
|
|
1469
|
+
extra = _build_connector_extra(args)
|
|
1470
|
+
|
|
1471
|
+
return ConnectorConfig(
|
|
1472
|
+
connector_type=args.connector,
|
|
1473
|
+
host=host,
|
|
1474
|
+
port=port,
|
|
1475
|
+
database=getattr(args, "database", "") or "",
|
|
1476
|
+
schema=getattr(args, "db_schema", "") or "",
|
|
1477
|
+
user=getattr(args, "user", "") or "",
|
|
1478
|
+
password=getattr(args, "password", "") or "",
|
|
1479
|
+
warehouse=getattr(args, "warehouse", "") or "",
|
|
1480
|
+
project=getattr(args, "project", "") or "",
|
|
1481
|
+
dataset=getattr(args, "dataset", "") or "",
|
|
1482
|
+
catalog=getattr(args, "catalog", "") or "",
|
|
1483
|
+
token=getattr(args, "token", "") or "",
|
|
1484
|
+
private_key_path=getattr(args, "private_key_path", "") or "",
|
|
1485
|
+
extra=extra,
|
|
1486
|
+
)
|
|
1487
|
+
|
|
1488
|
+
|
|
1489
|
+
def cmd_schemas(args: argparse.Namespace) -> int:
|
|
1490
|
+
connector = get_connector(args.connector)
|
|
1491
|
+
if connector is None:
|
|
1492
|
+
print(f"Unknown connector: {args.connector}", file=sys.stderr)
|
|
1493
|
+
return 1
|
|
1494
|
+
ok, msg = connector.check_driver()
|
|
1495
|
+
if not ok:
|
|
1496
|
+
print(f"Driver check failed: {msg}", file=sys.stderr)
|
|
1497
|
+
return 1
|
|
1498
|
+
|
|
1499
|
+
config = _build_connector_config(args)
|
|
1500
|
+
schemas = connector.list_schemas(config)
|
|
1501
|
+
|
|
1502
|
+
if getattr(args, "output_json", False):
|
|
1503
|
+
print(json.dumps(schemas, indent=2))
|
|
1504
|
+
else:
|
|
1505
|
+
print(f"Schemas in {connector.display_name} ({config.database or config.project or 'default'}):\n")
|
|
1506
|
+
for s in schemas:
|
|
1507
|
+
print(f" {s['name']:30s} {s['table_count']:4d} tables")
|
|
1508
|
+
return 0
|
|
1509
|
+
|
|
1510
|
+
|
|
1511
|
+
def cmd_tables(args: argparse.Namespace) -> int:
|
|
1512
|
+
connector = get_connector(args.connector)
|
|
1513
|
+
if connector is None:
|
|
1514
|
+
print(f"Unknown connector: {args.connector}", file=sys.stderr)
|
|
1515
|
+
return 1
|
|
1516
|
+
ok, msg = connector.check_driver()
|
|
1517
|
+
if not ok:
|
|
1518
|
+
print(f"Driver check failed: {msg}", file=sys.stderr)
|
|
1519
|
+
return 1
|
|
1520
|
+
|
|
1521
|
+
config = _build_connector_config(args)
|
|
1522
|
+
tables = connector.list_tables(config)
|
|
1523
|
+
|
|
1524
|
+
if getattr(args, "output_json", False):
|
|
1525
|
+
print(json.dumps(tables, indent=2))
|
|
1526
|
+
else:
|
|
1527
|
+
schema_label = config.schema or config.dataset or "default"
|
|
1528
|
+
print(f"Tables in {connector.display_name} / {schema_label}:\n")
|
|
1529
|
+
print(f" {'TABLE':30s} {'TYPE':8s} {'COLUMNS':>8s} {'ROWS':>12s}")
|
|
1530
|
+
print(f" {'-'*30} {'-'*8} {'-'*8} {'-'*12}")
|
|
1531
|
+
for t in tables:
|
|
1532
|
+
rows = str(t.get("row_count") or "") if t.get("row_count") is not None else "-"
|
|
1533
|
+
print(f" {t['name']:30s} {t['type']:8s} {t['column_count']:>8d} {rows:>12s}")
|
|
1534
|
+
print(f"\n Total: {len(tables)} tables")
|
|
1535
|
+
return 0
|
|
1536
|
+
|
|
1537
|
+
|
|
1538
|
+
def cmd_generate_docs(args: argparse.Namespace) -> int:
|
|
1539
|
+
model = load_yaml_model(args.model)
|
|
1540
|
+
fmt = args.format
|
|
1541
|
+
|
|
1542
|
+
if fmt == "html":
|
|
1543
|
+
if args.out:
|
|
1544
|
+
write_html_docs(model, args.out, title=args.title)
|
|
1545
|
+
print(f"Wrote HTML docs: {args.out}")
|
|
1546
|
+
else:
|
|
1547
|
+
print(generate_html_docs(model, title=args.title))
|
|
1548
|
+
elif fmt == "markdown":
|
|
1549
|
+
if args.out:
|
|
1550
|
+
write_markdown_docs(model, args.out, title=args.title)
|
|
1551
|
+
print(f"Wrote Markdown docs: {args.out}")
|
|
1552
|
+
else:
|
|
1553
|
+
print(generate_markdown_docs(model, title=args.title))
|
|
1554
|
+
|
|
1555
|
+
return 0
|
|
1556
|
+
|
|
1557
|
+
|
|
1558
|
+
def cmd_generate_changelog(args: argparse.Namespace) -> int:
|
|
1559
|
+
old_model = load_yaml_model(args.old)
|
|
1560
|
+
new_model = load_yaml_model(args.new)
|
|
1561
|
+
diff = semantic_diff(old_model, new_model)
|
|
1562
|
+
|
|
1563
|
+
old_version = old_model.get("model", {}).get("version", "")
|
|
1564
|
+
new_version = new_model.get("model", {}).get("version", "")
|
|
1565
|
+
|
|
1566
|
+
if args.out:
|
|
1567
|
+
write_changelog(diff, args.out, old_version=old_version, new_version=new_version)
|
|
1568
|
+
print(f"Wrote changelog: {args.out}")
|
|
1569
|
+
else:
|
|
1570
|
+
print(generate_changelog(diff, old_version=old_version, new_version=new_version))
|
|
1571
|
+
|
|
1572
|
+
return 0
|
|
1573
|
+
|
|
1574
|
+
|
|
1575
|
+
def cmd_fmt(args: argparse.Namespace) -> int:
|
|
1576
|
+
model = load_yaml_model(args.model)
|
|
1577
|
+
canonical = compile_model(model)
|
|
1578
|
+
output = yaml.safe_dump(canonical, sort_keys=False, default_flow_style=False, allow_unicode=True)
|
|
1579
|
+
|
|
1580
|
+
if args.write:
|
|
1581
|
+
Path(args.model).write_text(output, encoding="utf-8")
|
|
1582
|
+
print(f"Formatted: {args.model}")
|
|
1583
|
+
elif args.out:
|
|
1584
|
+
Path(args.out).write_text(output, encoding="utf-8")
|
|
1585
|
+
print(f"Wrote formatted model: {args.out}")
|
|
1586
|
+
else:
|
|
1587
|
+
print(output)
|
|
1588
|
+
|
|
1589
|
+
return 0
|
|
1590
|
+
|
|
1591
|
+
|
|
1592
|
+
def cmd_stats(args: argparse.Namespace) -> int:
|
|
1593
|
+
model = load_yaml_model(args.model)
|
|
1594
|
+
entities = model.get("entities", [])
|
|
1595
|
+
relationships = model.get("relationships", [])
|
|
1596
|
+
indexes = model.get("indexes", [])
|
|
1597
|
+
glossary = model.get("glossary", [])
|
|
1598
|
+
rules = model.get("rules", [])
|
|
1599
|
+
|
|
1600
|
+
total_fields = sum(len(e.get("fields", [])) for e in entities)
|
|
1601
|
+
pk_count = sum(
|
|
1602
|
+
1 for e in entities for f in e.get("fields", []) if f.get("primary_key")
|
|
1603
|
+
)
|
|
1604
|
+
fk_count = sum(
|
|
1605
|
+
1 for e in entities for f in e.get("fields", []) if f.get("foreign_key")
|
|
1606
|
+
)
|
|
1607
|
+
nullable_count = sum(
|
|
1608
|
+
1 for e in entities for f in e.get("fields", []) if f.get("nullable", True)
|
|
1609
|
+
)
|
|
1610
|
+
described_fields = sum(
|
|
1611
|
+
1 for e in entities for f in e.get("fields", []) if f.get("description")
|
|
1612
|
+
)
|
|
1613
|
+
deprecated_count = sum(
|
|
1614
|
+
1 for e in entities for f in e.get("fields", []) if f.get("deprecated")
|
|
1615
|
+
)
|
|
1616
|
+
entity_types = {}
|
|
1617
|
+
for e in entities:
|
|
1618
|
+
t = e.get("type", "table")
|
|
1619
|
+
entity_types[t] = entity_types.get(t, 0) + 1
|
|
1620
|
+
subject_areas = set(e.get("subject_area") for e in entities if e.get("subject_area"))
|
|
1621
|
+
tags = set()
|
|
1622
|
+
for e in entities:
|
|
1623
|
+
for t in e.get("tags", []):
|
|
1624
|
+
tags.add(t)
|
|
1625
|
+
|
|
1626
|
+
desc_coverage = f"{described_fields}/{total_fields}" if total_fields else "0/0"
|
|
1627
|
+
desc_pct = f"{described_fields / total_fields * 100:.0f}%" if total_fields else "0%"
|
|
1628
|
+
|
|
1629
|
+
stats = {
|
|
1630
|
+
"model_name": model.get("model", {}).get("name", "unknown"),
|
|
1631
|
+
"version": model.get("model", {}).get("version", "unknown"),
|
|
1632
|
+
"entity_count": len(entities),
|
|
1633
|
+
"entity_types": entity_types,
|
|
1634
|
+
"total_fields": total_fields,
|
|
1635
|
+
"primary_keys": pk_count,
|
|
1636
|
+
"foreign_keys": fk_count,
|
|
1637
|
+
"nullable_fields": nullable_count,
|
|
1638
|
+
"relationship_count": len(relationships),
|
|
1639
|
+
"index_count": len(indexes),
|
|
1640
|
+
"glossary_terms": len(glossary),
|
|
1641
|
+
"rule_count": len(rules),
|
|
1642
|
+
"description_coverage": f"{desc_coverage} ({desc_pct})",
|
|
1643
|
+
"deprecated_fields": deprecated_count,
|
|
1644
|
+
"subject_areas": sorted(subject_areas),
|
|
1645
|
+
"tags": sorted(tags),
|
|
1646
|
+
}
|
|
1647
|
+
|
|
1648
|
+
if args.output_json:
|
|
1649
|
+
print(json.dumps(stats, indent=2))
|
|
1650
|
+
else:
|
|
1651
|
+
print(f"Model: {stats['model_name']} v{stats['version']}")
|
|
1652
|
+
print(f"Entities: {stats['entity_count']} ({', '.join(f'{v} {k}' for k, v in entity_types.items())})")
|
|
1653
|
+
print(f"Fields: {stats['total_fields']} (PK: {pk_count}, FK: {fk_count}, nullable: {nullable_count})")
|
|
1654
|
+
print(f"Relationships: {stats['relationship_count']}")
|
|
1655
|
+
print(f"Indexes: {stats['index_count']}")
|
|
1656
|
+
print(f"Glossary terms: {stats['glossary_terms']}")
|
|
1657
|
+
print(f"Rules: {stats['rule_count']}")
|
|
1658
|
+
print(f"Description coverage: {desc_coverage} ({desc_pct})")
|
|
1659
|
+
if deprecated_count:
|
|
1660
|
+
print(f"Deprecated fields: {deprecated_count}")
|
|
1661
|
+
if subject_areas:
|
|
1662
|
+
print(f"Subject areas: {', '.join(sorted(subject_areas))}")
|
|
1663
|
+
if tags:
|
|
1664
|
+
print(f"Tags: {', '.join(sorted(tags))}")
|
|
1665
|
+
|
|
1666
|
+
return 0
|
|
1667
|
+
|
|
1668
|
+
|
|
1669
|
+
def cmd_completeness(args: argparse.Namespace) -> int:
|
|
1670
|
+
"""Score every entity in a model against the single-source-of-truth dimensions."""
|
|
1671
|
+
model = load_yaml_model(args.model)
|
|
1672
|
+
report = completeness_report(model)
|
|
1673
|
+
data = completeness_as_dict(report)
|
|
1674
|
+
|
|
1675
|
+
if args.output_json:
|
|
1676
|
+
print(json.dumps(data, indent=2))
|
|
1677
|
+
return 0
|
|
1678
|
+
|
|
1679
|
+
# ── Human-readable report ─────────────────────────────────────────────────
|
|
1680
|
+
BAR_WIDTH = 20
|
|
1681
|
+
SCORE_PASS = 80
|
|
1682
|
+
SCORE_WARN = 60
|
|
1683
|
+
|
|
1684
|
+
def _bar(score: int) -> str:
|
|
1685
|
+
filled = round(score / 100 * BAR_WIDTH)
|
|
1686
|
+
if score >= SCORE_PASS:
|
|
1687
|
+
fill_char, empty_char = "█", "░"
|
|
1688
|
+
elif score >= SCORE_WARN:
|
|
1689
|
+
fill_char, empty_char = "▓", "░"
|
|
1690
|
+
else:
|
|
1691
|
+
fill_char, empty_char = "▒", "░"
|
|
1692
|
+
return fill_char * filled + empty_char * (BAR_WIDTH - filled)
|
|
1693
|
+
|
|
1694
|
+
def _score_label(score: int) -> str:
|
|
1695
|
+
if score == 100:
|
|
1696
|
+
return "COMPLETE"
|
|
1697
|
+
if score >= SCORE_PASS:
|
|
1698
|
+
return "GOOD "
|
|
1699
|
+
if score >= SCORE_WARN:
|
|
1700
|
+
return "PARTIAL "
|
|
1701
|
+
return "GAPS "
|
|
1702
|
+
|
|
1703
|
+
print(f"\nCompleteness report — {report.model_name}")
|
|
1704
|
+
print(f"Model score: {report.model_score}% "
|
|
1705
|
+
f"({report.fully_complete}/{report.total_entities} fully complete)\n")
|
|
1706
|
+
print(f" {'Entity':<30} {'Score':>5} {'':^{BAR_WIDTH}} Status")
|
|
1707
|
+
print(f" {'-'*30} {'-----':>5} {'-'*BAR_WIDTH} --------")
|
|
1708
|
+
|
|
1709
|
+
for e in report.entities:
|
|
1710
|
+
bar = _bar(e.score)
|
|
1711
|
+
label = _score_label(e.score)
|
|
1712
|
+
print(f" {e.entity_name:<30} {e.score:>4}% {bar} {label}")
|
|
1713
|
+
if e.missing and not args.summary:
|
|
1714
|
+
for m in e.missing:
|
|
1715
|
+
print(f" {'':30} ↳ missing: {m}")
|
|
1716
|
+
|
|
1717
|
+
if report.needs_attention:
|
|
1718
|
+
print(f"\n Needs attention (<60%): {', '.join(report.needs_attention)}")
|
|
1719
|
+
|
|
1720
|
+
# Surface completeness as lint-style warnings when --min-score is set
|
|
1721
|
+
if args.min_score is not None:
|
|
1722
|
+
failed = [e for e in report.entities if e.score < args.min_score]
|
|
1723
|
+
if failed:
|
|
1724
|
+
print(
|
|
1725
|
+
f"\n {len(failed)} entity/entities below minimum score of {args.min_score}%:"
|
|
1726
|
+
)
|
|
1727
|
+
for e in failed:
|
|
1728
|
+
print(f" [{e.score}%] {e.entity_name}")
|
|
1729
|
+
return 1
|
|
1730
|
+
|
|
1731
|
+
return 0
|
|
1732
|
+
|
|
1733
|
+
|
|
1734
|
+
def cmd_resolve(args: argparse.Namespace) -> int:
|
|
1735
|
+
search_dirs = args.search_dir if args.search_dir else []
|
|
1736
|
+
resolved = resolve_model(args.model, search_dirs=search_dirs)
|
|
1737
|
+
|
|
1738
|
+
if resolved.issues:
|
|
1739
|
+
for iss in resolved.issues:
|
|
1740
|
+
sev = iss.severity.upper()
|
|
1741
|
+
print(f" [{sev}] {iss.code}: {iss.message}")
|
|
1742
|
+
|
|
1743
|
+
summary = resolved.to_graph_summary()
|
|
1744
|
+
|
|
1745
|
+
if args.output_json:
|
|
1746
|
+
print(json.dumps(summary, indent=2))
|
|
1747
|
+
else:
|
|
1748
|
+
print(f"Root model: {summary['root_model']}")
|
|
1749
|
+
print(f"Models resolved: {summary['model_count']}")
|
|
1750
|
+
print(f"Total entities: {summary['total_entities']}")
|
|
1751
|
+
for m in summary["models"]:
|
|
1752
|
+
prefix = "*" if m["is_root"] else " "
|
|
1753
|
+
alias = f" (alias: {m.get('alias', '')})" if m.get("alias") else ""
|
|
1754
|
+
print(f" {prefix} {m['name']}{alias}: {m['entity_count']} entities [{', '.join(m['entities'])}]")
|
|
1755
|
+
cross = summary["cross_model_relationships"]
|
|
1756
|
+
if cross:
|
|
1757
|
+
print(f"Cross-model relationships: {len(cross)}")
|
|
1758
|
+
for cr in cross:
|
|
1759
|
+
print(f" {cr['from_model']}.{cr['from']} -> {cr['to_model']}.{cr['to']} ({cr['cardinality']})")
|
|
1760
|
+
|
|
1761
|
+
has_errs = any(i.severity == "error" for i in resolved.issues)
|
|
1762
|
+
return 1 if has_errs else 0
|
|
1763
|
+
|
|
1764
|
+
|
|
1765
|
+
def cmd_diff_all(args: argparse.Namespace) -> int:
|
|
1766
|
+
diff = project_diff(args.old, args.new)
|
|
1767
|
+
|
|
1768
|
+
if args.output_json:
|
|
1769
|
+
print(json.dumps(diff, indent=2))
|
|
1770
|
+
else:
|
|
1771
|
+
s = diff["summary"]
|
|
1772
|
+
print(f"Project diff: {args.old} -> {args.new}")
|
|
1773
|
+
print(f" Models: +{s['added_models']} -{s['removed_models']} changed:{s['changed_models']} unchanged:{s['unchanged_models']}")
|
|
1774
|
+
if diff["added_models"]:
|
|
1775
|
+
print(f" Added: {', '.join(diff['added_models'])}")
|
|
1776
|
+
if diff["removed_models"]:
|
|
1777
|
+
print(f" Removed: {', '.join(diff['removed_models'])}")
|
|
1778
|
+
if diff["changed_models"]:
|
|
1779
|
+
print(f" Changed: {', '.join(diff['changed_models'])}")
|
|
1780
|
+
for name, mdiff in diff["model_diffs"].items():
|
|
1781
|
+
ms = mdiff["summary"]
|
|
1782
|
+
print(f" [{name}] entities +{ms['added_entities']} -{ms['removed_entities']} changed:{ms['changed_entities']}")
|
|
1783
|
+
print(f" [{name}] metrics +{ms['added_metrics']} -{ms['removed_metrics']} changed:{ms['changed_metrics']}")
|
|
1784
|
+
print(f" Breaking changes: {s['breaking_change_count']}")
|
|
1785
|
+
if diff["breaking_changes"]:
|
|
1786
|
+
for bc in diff["breaking_changes"]:
|
|
1787
|
+
print(f" - {bc}")
|
|
1788
|
+
|
|
1789
|
+
if diff["has_breaking_changes"] and not args.allow_breaking:
|
|
1790
|
+
print("Project diff failed: breaking changes detected. Use --allow-breaking to bypass.")
|
|
1791
|
+
return 2
|
|
1792
|
+
|
|
1793
|
+
return 0
|
|
1794
|
+
|
|
1795
|
+
|
|
1796
|
+
def cmd_transform(args: argparse.Namespace) -> int:
|
|
1797
|
+
schema = load_schema(args.schema)
|
|
1798
|
+
model, issues = _validate_model_file(args.model, schema)
|
|
1799
|
+
if has_errors(issues):
|
|
1800
|
+
_print_issues(issues)
|
|
1801
|
+
return 1
|
|
1802
|
+
|
|
1803
|
+
target_kind = "logical" if args.transform_command == "conceptual-to-logical" else "physical"
|
|
1804
|
+
transformed = transform_model(model, target_kind=target_kind, dialect=getattr(args, "dialect", "postgres"))
|
|
1805
|
+
transformed_issues = _combined_issues(transformed, schema)
|
|
1806
|
+
if has_errors(transformed_issues):
|
|
1807
|
+
_print_issues(transformed_issues)
|
|
1808
|
+
return 1
|
|
1809
|
+
|
|
1810
|
+
_print_or_write_yaml(transformed, getattr(args, "out", "") or "")
|
|
1811
|
+
return 0
|
|
1812
|
+
|
|
1813
|
+
|
|
1814
|
+
def cmd_standards_check(args: argparse.Namespace) -> int:
|
|
1815
|
+
schema = load_schema(args.schema)
|
|
1816
|
+
model, issues = _validate_model_file(args.model, schema)
|
|
1817
|
+
issues.extend(standards_issues(model))
|
|
1818
|
+
|
|
1819
|
+
if args.output_json:
|
|
1820
|
+
print(json.dumps({"issues": _issues_as_json(issues)}, indent=2))
|
|
1821
|
+
else:
|
|
1822
|
+
_print_issues(issues)
|
|
1823
|
+
return 1 if has_errors(issues) else 0
|
|
1824
|
+
|
|
1825
|
+
|
|
1826
|
+
def cmd_standards_fix(args: argparse.Namespace) -> int:
|
|
1827
|
+
model = load_yaml_model(args.model)
|
|
1828
|
+
fixed, changes = apply_standards_fixes(model)
|
|
1829
|
+
|
|
1830
|
+
if not args.write and not args.out:
|
|
1831
|
+
print("# Applied supported standards autofixes")
|
|
1832
|
+
for change in changes:
|
|
1833
|
+
print(f"# - {change}")
|
|
1834
|
+
print("")
|
|
1835
|
+
|
|
1836
|
+
_print_or_write_yaml(fixed, args.model if args.write else (args.out or ""))
|
|
1837
|
+
return 0
|
|
1838
|
+
|
|
1839
|
+
|
|
1840
|
+
def cmd_sync_compare(args: argparse.Namespace) -> int:
|
|
1841
|
+
current_model = load_yaml_model(args.current)
|
|
1842
|
+
candidate_model = load_yaml_model(args.candidate)
|
|
1843
|
+
diff = semantic_diff(current_model, candidate_model)
|
|
1844
|
+
print(json.dumps(diff, indent=2))
|
|
1845
|
+
return 0 if not diff["has_breaking_changes"] or args.allow_breaking else 2
|
|
1846
|
+
|
|
1847
|
+
|
|
1848
|
+
def cmd_sync_merge(args: argparse.Namespace) -> int:
|
|
1849
|
+
current_model = load_yaml_model(args.current)
|
|
1850
|
+
candidate_model = load_yaml_model(args.candidate)
|
|
1851
|
+
merged = merge_models_preserving_docs(current_model, candidate_model)
|
|
1852
|
+
_print_or_write_yaml(merged, getattr(args, "out", "") or "")
|
|
1853
|
+
return 0
|
|
1854
|
+
|
|
1855
|
+
|
|
1856
|
+
def cmd_sync_pull(args: argparse.Namespace) -> int:
|
|
1857
|
+
return cmd_pull(args)
|
|
1858
|
+
|
|
1859
|
+
|
|
1860
|
+
def cmd_resolve_project(args: argparse.Namespace) -> int:
|
|
1861
|
+
search_dirs = args.search_dir if args.search_dir else []
|
|
1862
|
+
results = resolve_project(args.directory, search_dirs=search_dirs)
|
|
1863
|
+
|
|
1864
|
+
total_issues = 0
|
|
1865
|
+
all_models = []
|
|
1866
|
+
|
|
1867
|
+
for path, resolved in sorted(results.items()):
|
|
1868
|
+
name = resolved.root_model.get("model", {}).get("name", "unknown")
|
|
1869
|
+
imports = list(resolved.imported_models.keys())
|
|
1870
|
+
entities = [e.get("name", "") for e in resolved.unified_entities()]
|
|
1871
|
+
issue_count = len(resolved.issues)
|
|
1872
|
+
total_issues += issue_count
|
|
1873
|
+
|
|
1874
|
+
all_models.append({
|
|
1875
|
+
"name": name,
|
|
1876
|
+
"file": path,
|
|
1877
|
+
"imports": imports,
|
|
1878
|
+
"entity_count": len(entities),
|
|
1879
|
+
"entities": entities,
|
|
1880
|
+
"issue_count": issue_count,
|
|
1881
|
+
"issues": [
|
|
1882
|
+
{"severity": i.severity, "code": i.code, "message": i.message}
|
|
1883
|
+
for i in resolved.issues
|
|
1884
|
+
],
|
|
1885
|
+
})
|
|
1886
|
+
|
|
1887
|
+
if args.output_json:
|
|
1888
|
+
print(json.dumps({"models": all_models, "total_issues": total_issues}, indent=2))
|
|
1889
|
+
else:
|
|
1890
|
+
print(f"Project: {args.directory}")
|
|
1891
|
+
print(f"Models found: {len(all_models)}")
|
|
1892
|
+
for m in all_models:
|
|
1893
|
+
imp_str = f" (imports: {', '.join(m['imports'])})" if m["imports"] else ""
|
|
1894
|
+
status = "OK" if m["issue_count"] == 0 else f"{m['issue_count']} issues"
|
|
1895
|
+
print(f" {m['name']}: {m['entity_count']} entities{imp_str} [{status}]")
|
|
1896
|
+
for iss in m["issues"]:
|
|
1897
|
+
print(f" [{iss['severity'].upper()}] {iss['code']}: {iss['message']}")
|
|
1898
|
+
print(f"Total issues: {total_issues}")
|
|
1899
|
+
|
|
1900
|
+
return 1 if total_issues > 0 else 0
|
|
1901
|
+
|
|
1902
|
+
|
|
1903
|
+
def cmd_schema(args: argparse.Namespace) -> int:
|
|
1904
|
+
schema = load_schema(args.schema)
|
|
1905
|
+
print(json.dumps(schema, indent=2))
|
|
1906
|
+
return 0
|
|
1907
|
+
|
|
1908
|
+
|
|
1909
|
+
def cmd_policy_schema(args: argparse.Namespace) -> int:
|
|
1910
|
+
schema = load_schema(args.policy_schema)
|
|
1911
|
+
print(json.dumps(schema, indent=2))
|
|
1912
|
+
return 0
|
|
1913
|
+
|
|
1914
|
+
|
|
1915
|
+
def cmd_doctor(args: argparse.Namespace) -> int:
|
|
1916
|
+
project_dir = getattr(args, "path", ".")
|
|
1917
|
+
results = run_diagnostics(project_dir)
|
|
1918
|
+
|
|
1919
|
+
if getattr(args, "output_json", False):
|
|
1920
|
+
print(json.dumps(diagnostics_as_json(results), indent=2))
|
|
1921
|
+
else:
|
|
1922
|
+
print(format_diagnostics(results))
|
|
1923
|
+
|
|
1924
|
+
error_count = sum(1 for r in results if r.status == "error")
|
|
1925
|
+
return 1 if error_count > 0 else 0
|
|
1926
|
+
|
|
1927
|
+
|
|
1928
|
+
def cmd_migrate(args: argparse.Namespace) -> int:
|
|
1929
|
+
old_model = load_yaml_model(args.old)
|
|
1930
|
+
new_model = load_yaml_model(args.new)
|
|
1931
|
+
dialect = getattr(args, "dialect", "postgres")
|
|
1932
|
+
|
|
1933
|
+
if args.out:
|
|
1934
|
+
write_migration(old_model, new_model, args.out, dialect=dialect)
|
|
1935
|
+
print(f"Wrote migration SQL: {args.out}")
|
|
1936
|
+
else:
|
|
1937
|
+
sql = generate_migration(old_model, new_model, dialect=dialect)
|
|
1938
|
+
print(sql)
|
|
1939
|
+
|
|
1940
|
+
return 0
|
|
1941
|
+
|
|
1942
|
+
|
|
1943
|
+
def _split_sql_statements(sql_text: str) -> List[str]:
|
|
1944
|
+
statements: List[str] = []
|
|
1945
|
+
buf: List[str] = []
|
|
1946
|
+
in_single = False
|
|
1947
|
+
in_double = False
|
|
1948
|
+
in_line_comment = False
|
|
1949
|
+
in_block_comment = False
|
|
1950
|
+
i = 0
|
|
1951
|
+
|
|
1952
|
+
while i < len(sql_text):
|
|
1953
|
+
ch = sql_text[i]
|
|
1954
|
+
nxt = sql_text[i + 1] if i + 1 < len(sql_text) else ""
|
|
1955
|
+
|
|
1956
|
+
if in_line_comment:
|
|
1957
|
+
if ch == "\n":
|
|
1958
|
+
in_line_comment = False
|
|
1959
|
+
buf.append(ch)
|
|
1960
|
+
i += 1
|
|
1961
|
+
continue
|
|
1962
|
+
|
|
1963
|
+
if in_block_comment:
|
|
1964
|
+
if ch == "*" and nxt == "/":
|
|
1965
|
+
in_block_comment = False
|
|
1966
|
+
i += 2
|
|
1967
|
+
continue
|
|
1968
|
+
i += 1
|
|
1969
|
+
continue
|
|
1970
|
+
|
|
1971
|
+
if not in_single and not in_double and ch == "-" and nxt == "-":
|
|
1972
|
+
in_line_comment = True
|
|
1973
|
+
i += 2
|
|
1974
|
+
continue
|
|
1975
|
+
|
|
1976
|
+
if not in_single and not in_double and ch == "/" and nxt == "*":
|
|
1977
|
+
in_block_comment = True
|
|
1978
|
+
i += 2
|
|
1979
|
+
continue
|
|
1980
|
+
|
|
1981
|
+
if ch == "'" and not in_double:
|
|
1982
|
+
if in_single and nxt == "'":
|
|
1983
|
+
buf.append(ch)
|
|
1984
|
+
buf.append(nxt)
|
|
1985
|
+
i += 2
|
|
1986
|
+
continue
|
|
1987
|
+
in_single = not in_single
|
|
1988
|
+
buf.append(ch)
|
|
1989
|
+
i += 1
|
|
1990
|
+
continue
|
|
1991
|
+
|
|
1992
|
+
if ch == '"' and not in_single:
|
|
1993
|
+
in_double = not in_double
|
|
1994
|
+
buf.append(ch)
|
|
1995
|
+
i += 1
|
|
1996
|
+
continue
|
|
1997
|
+
|
|
1998
|
+
if ch == ";" and not in_single and not in_double:
|
|
1999
|
+
stmt = "".join(buf).strip()
|
|
2000
|
+
if stmt:
|
|
2001
|
+
statements.append(stmt)
|
|
2002
|
+
buf = []
|
|
2003
|
+
i += 1
|
|
2004
|
+
continue
|
|
2005
|
+
|
|
2006
|
+
buf.append(ch)
|
|
2007
|
+
i += 1
|
|
2008
|
+
|
|
2009
|
+
tail = "".join(buf).strip()
|
|
2010
|
+
if tail:
|
|
2011
|
+
statements.append(tail)
|
|
2012
|
+
return statements
|
|
2013
|
+
|
|
2014
|
+
|
|
2015
|
+
def _escape_sql_string(value: str) -> str:
|
|
2016
|
+
return value.replace("'", "''")
|
|
2017
|
+
|
|
2018
|
+
|
|
2019
|
+
def _sql_checksum(sql_text: str) -> str:
|
|
2020
|
+
return hashlib.sha256(sql_text.encode("utf-8")).hexdigest()
|
|
2021
|
+
|
|
2022
|
+
|
|
2023
|
+
def _default_migration_name() -> str:
|
|
2024
|
+
return f"migration_{time.strftime('%Y%m%d%H%M%S', time.gmtime())}"
|
|
2025
|
+
|
|
2026
|
+
|
|
2027
|
+
def _preview_sql(statement: str, max_len: int = 180) -> str:
|
|
2028
|
+
flat = " ".join(statement.strip().split())
|
|
2029
|
+
return flat if len(flat) <= max_len else f"{flat[: max_len - 3]}..."
|
|
2030
|
+
|
|
2031
|
+
|
|
2032
|
+
def _detect_destructive_statements(statements: List[str]) -> List[Dict[str, Any]]:
|
|
2033
|
+
checks = [
|
|
2034
|
+
("DROP TABLE", re.compile(r"\bDROP\s+TABLE\b", re.IGNORECASE)),
|
|
2035
|
+
("DROP VIEW", re.compile(r"\bDROP\s+VIEW\b", re.IGNORECASE)),
|
|
2036
|
+
("DROP SCHEMA", re.compile(r"\bDROP\s+SCHEMA\b", re.IGNORECASE)),
|
|
2037
|
+
("DROP DATABASE", re.compile(r"\bDROP\s+DATABASE\b", re.IGNORECASE)),
|
|
2038
|
+
("TRUNCATE TABLE", re.compile(r"\bTRUNCATE\s+TABLE\b", re.IGNORECASE)),
|
|
2039
|
+
("ALTER TABLE DROP COLUMN", re.compile(r"\bALTER\s+TABLE\b[\s\S]*\bDROP\s+COLUMN\b", re.IGNORECASE)),
|
|
2040
|
+
]
|
|
2041
|
+
findings: List[Dict[str, Any]] = []
|
|
2042
|
+
for idx, statement in enumerate(statements, start=1):
|
|
2043
|
+
for check_name, pattern in checks:
|
|
2044
|
+
if pattern.search(statement):
|
|
2045
|
+
findings.append({
|
|
2046
|
+
"statement_index": idx,
|
|
2047
|
+
"kind": check_name,
|
|
2048
|
+
"preview": _preview_sql(statement),
|
|
2049
|
+
})
|
|
2050
|
+
break
|
|
2051
|
+
return findings
|
|
2052
|
+
|
|
2053
|
+
|
|
2054
|
+
def _write_apply_report(path: str, payload: Dict[str, Any]) -> None:
|
|
2055
|
+
Path(path).write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
|
2056
|
+
|
|
2057
|
+
|
|
2058
|
+
class ApplyExecutionError(RuntimeError):
|
|
2059
|
+
def __init__(self, connector: str, statement_index: int, statement: str, error: Exception):
|
|
2060
|
+
self.connector = connector
|
|
2061
|
+
self.statement_index = statement_index
|
|
2062
|
+
self.statement = statement
|
|
2063
|
+
self.error = error
|
|
2064
|
+
message = (
|
|
2065
|
+
f"{connector} apply failed at statement #{statement_index}: "
|
|
2066
|
+
f"{_preview_sql(statement)} ({error})"
|
|
2067
|
+
)
|
|
2068
|
+
super().__init__(message)
|
|
2069
|
+
|
|
2070
|
+
|
|
2071
|
+
def _apply_snowflake(config: ConnectorConfig, statements: List[str], migration_name: str, checksum: str, ledger_table: str, skip_ledger: bool) -> None:
|
|
2072
|
+
import snowflake.connector
|
|
2073
|
+
from datalex_core.connectors.snowflake import _load_private_key
|
|
2074
|
+
|
|
2075
|
+
params: Dict[str, Any] = {
|
|
2076
|
+
"account": config.host,
|
|
2077
|
+
"user": config.user,
|
|
2078
|
+
"warehouse": config.warehouse,
|
|
2079
|
+
"database": config.database,
|
|
2080
|
+
"schema": config.schema or "PUBLIC",
|
|
2081
|
+
}
|
|
2082
|
+
if config.private_key_path:
|
|
2083
|
+
passphrase = config.password if config.password else None
|
|
2084
|
+
params["private_key"] = _load_private_key(config.private_key_path, passphrase)
|
|
2085
|
+
else:
|
|
2086
|
+
params["password"] = config.password
|
|
2087
|
+
|
|
2088
|
+
conn = snowflake.connector.connect(**params)
|
|
2089
|
+
try:
|
|
2090
|
+
cur = conn.cursor()
|
|
2091
|
+
try:
|
|
2092
|
+
if config.warehouse:
|
|
2093
|
+
try:
|
|
2094
|
+
cur.execute(f"ALTER WAREHOUSE IF EXISTS {config.warehouse} RESUME IF SUSPENDED")
|
|
2095
|
+
except Exception:
|
|
2096
|
+
pass
|
|
2097
|
+
|
|
2098
|
+
for idx, stmt in enumerate(statements, start=1):
|
|
2099
|
+
try:
|
|
2100
|
+
cur.execute(stmt)
|
|
2101
|
+
except Exception as e:
|
|
2102
|
+
raise ApplyExecutionError("snowflake", idx, stmt, e) from e
|
|
2103
|
+
|
|
2104
|
+
if not skip_ledger:
|
|
2105
|
+
schema_name = (config.schema or "PUBLIC").upper()
|
|
2106
|
+
table_name = ledger_table
|
|
2107
|
+
create_sql = (
|
|
2108
|
+
f'CREATE TABLE IF NOT EXISTS "{schema_name}"."{table_name}" ('
|
|
2109
|
+
'migration_name VARCHAR, checksum VARCHAR, statement_count NUMBER, '
|
|
2110
|
+
'status VARCHAR, applied_at TIMESTAMP_NTZ)'
|
|
2111
|
+
)
|
|
2112
|
+
cur.execute(create_sql)
|
|
2113
|
+
insert_sql = (
|
|
2114
|
+
f'INSERT INTO "{schema_name}"."{table_name}" '
|
|
2115
|
+
'(migration_name, checksum, statement_count, status, applied_at) VALUES '
|
|
2116
|
+
f"('{_escape_sql_string(migration_name)}', '{checksum}', {len(statements)}, 'success', CURRENT_TIMESTAMP())"
|
|
2117
|
+
)
|
|
2118
|
+
cur.execute(insert_sql)
|
|
2119
|
+
finally:
|
|
2120
|
+
cur.close()
|
|
2121
|
+
finally:
|
|
2122
|
+
conn.close()
|
|
2123
|
+
|
|
2124
|
+
|
|
2125
|
+
def _apply_databricks(config: ConnectorConfig, statements: List[str], migration_name: str, checksum: str, ledger_table: str, skip_ledger: bool) -> None:
|
|
2126
|
+
from databricks import sql
|
|
2127
|
+
|
|
2128
|
+
conn = sql.connect(
|
|
2129
|
+
server_hostname=config.host,
|
|
2130
|
+
http_path=config.extra.get("http_path", ""),
|
|
2131
|
+
access_token=config.token,
|
|
2132
|
+
)
|
|
2133
|
+
try:
|
|
2134
|
+
cur = conn.cursor()
|
|
2135
|
+
try:
|
|
2136
|
+
for idx, stmt in enumerate(statements, start=1):
|
|
2137
|
+
try:
|
|
2138
|
+
cur.execute(stmt)
|
|
2139
|
+
except Exception as e:
|
|
2140
|
+
raise ApplyExecutionError("databricks", idx, stmt, e) from e
|
|
2141
|
+
|
|
2142
|
+
if not skip_ledger:
|
|
2143
|
+
catalog = config.catalog or "main"
|
|
2144
|
+
schema_name = config.schema or "default"
|
|
2145
|
+
qualified = f"`{catalog}`.`{schema_name}`.`{ledger_table}`"
|
|
2146
|
+
cur.execute(
|
|
2147
|
+
f"CREATE TABLE IF NOT EXISTS {qualified} ("
|
|
2148
|
+
"migration_name STRING, checksum STRING, statement_count INT, status STRING, applied_at TIMESTAMP)"
|
|
2149
|
+
)
|
|
2150
|
+
cur.execute(
|
|
2151
|
+
f"INSERT INTO {qualified} (migration_name, checksum, statement_count, status, applied_at) VALUES ("
|
|
2152
|
+
f"'{_escape_sql_string(migration_name)}', '{checksum}', {len(statements)}, 'success', current_timestamp())"
|
|
2153
|
+
)
|
|
2154
|
+
finally:
|
|
2155
|
+
cur.close()
|
|
2156
|
+
finally:
|
|
2157
|
+
conn.close()
|
|
2158
|
+
|
|
2159
|
+
|
|
2160
|
+
def _apply_bigquery(config: ConnectorConfig, statements: List[str], migration_name: str, checksum: str, ledger_table: str, skip_ledger: bool) -> None:
|
|
2161
|
+
from google.cloud import bigquery
|
|
2162
|
+
|
|
2163
|
+
client = bigquery.Client(project=config.project)
|
|
2164
|
+
for idx, stmt in enumerate(statements, start=1):
|
|
2165
|
+
try:
|
|
2166
|
+
client.query(stmt).result()
|
|
2167
|
+
except Exception as e:
|
|
2168
|
+
raise ApplyExecutionError("bigquery", idx, stmt, e) from e
|
|
2169
|
+
|
|
2170
|
+
if not skip_ledger:
|
|
2171
|
+
dataset = config.dataset
|
|
2172
|
+
if not dataset:
|
|
2173
|
+
raise ValueError("--dataset is required for BigQuery migration ledger")
|
|
2174
|
+
qualified = f"`{config.project}.{dataset}.{ledger_table}`"
|
|
2175
|
+
client.query(
|
|
2176
|
+
f"CREATE TABLE IF NOT EXISTS {qualified} ("
|
|
2177
|
+
"migration_name STRING, checksum STRING, statement_count INT64, status STRING, applied_at TIMESTAMP)"
|
|
2178
|
+
).result()
|
|
2179
|
+
client.query(
|
|
2180
|
+
f"INSERT INTO {qualified} (migration_name, checksum, statement_count, status, applied_at) VALUES ("
|
|
2181
|
+
f"'{_escape_sql_string(migration_name)}', '{checksum}', {len(statements)}, 'success', CURRENT_TIMESTAMP())"
|
|
2182
|
+
).result()
|
|
2183
|
+
|
|
2184
|
+
|
|
2185
|
+
def cmd_apply(args: argparse.Namespace) -> int:
|
|
2186
|
+
connector_type = args.connector
|
|
2187
|
+
dialect = (getattr(args, "dialect", "") or connector_type).lower()
|
|
2188
|
+
started_ts = time.time()
|
|
2189
|
+
mode = "sql_file" if args.sql_file else "model_diff"
|
|
2190
|
+
policy_results: List[Dict[str, str]] = []
|
|
2191
|
+
|
|
2192
|
+
if connector_type not in {"snowflake", "databricks", "bigquery"}:
|
|
2193
|
+
print("Apply currently supports only snowflake, databricks, and bigquery.", file=sys.stderr)
|
|
2194
|
+
return 1
|
|
2195
|
+
|
|
2196
|
+
if dialect not in {"snowflake", "databricks", "bigquery"}:
|
|
2197
|
+
print(f"Unsupported apply dialect: {dialect}", file=sys.stderr)
|
|
2198
|
+
return 1
|
|
2199
|
+
|
|
2200
|
+
if args.sql_file and (args.old or args.new):
|
|
2201
|
+
print("Use either --sql-file or --old/--new, not both.", file=sys.stderr)
|
|
2202
|
+
return 1
|
|
2203
|
+
|
|
2204
|
+
if not args.sql_file and not (args.old and args.new):
|
|
2205
|
+
print("Provide --sql-file or both --old and --new.", file=sys.stderr)
|
|
2206
|
+
return 1
|
|
2207
|
+
|
|
2208
|
+
if (args.old and not args.new) or (args.new and not args.old):
|
|
2209
|
+
print("Both --old and --new are required together.", file=sys.stderr)
|
|
2210
|
+
return 1
|
|
2211
|
+
|
|
2212
|
+
if args.sql_file:
|
|
2213
|
+
sql_text = Path(args.sql_file).read_text(encoding="utf-8")
|
|
2214
|
+
else:
|
|
2215
|
+
schema = load_schema(args.model_schema)
|
|
2216
|
+
old_model, old_issues = _validate_model_file(args.old, schema)
|
|
2217
|
+
new_model, new_issues = _validate_model_file(args.new, schema)
|
|
2218
|
+
_print_issue_block(f"Old model ({args.old})", old_issues)
|
|
2219
|
+
_print_issue_block(f"New model ({args.new})", new_issues)
|
|
2220
|
+
combined_issues = list(old_issues) + list(new_issues)
|
|
2221
|
+
if has_errors(combined_issues):
|
|
2222
|
+
print("Apply failed: validation errors detected.", file=sys.stderr)
|
|
2223
|
+
return 1
|
|
2224
|
+
if not getattr(args, "skip_policy_check", False):
|
|
2225
|
+
policy_pack = load_policy_pack_with_inheritance(args.policy_pack)
|
|
2226
|
+
evaluated = policy_issues(new_model, policy_pack)
|
|
2227
|
+
_print_issue_block(f"Policy evaluation ({args.policy_pack})", evaluated)
|
|
2228
|
+
policy_results = _issues_as_json(evaluated)
|
|
2229
|
+
if has_errors(evaluated):
|
|
2230
|
+
print("Apply failed: policy check failed.", file=sys.stderr)
|
|
2231
|
+
return 1
|
|
2232
|
+
sql_text = generate_migration(old_model, new_model, dialect=dialect)
|
|
2233
|
+
|
|
2234
|
+
statements = _split_sql_statements(sql_text)
|
|
2235
|
+
if not statements:
|
|
2236
|
+
print("No executable SQL statements found.", file=sys.stderr)
|
|
2237
|
+
return 1
|
|
2238
|
+
|
|
2239
|
+
migration_name = args.migration_name or _default_migration_name()
|
|
2240
|
+
checksum = _sql_checksum(sql_text)
|
|
2241
|
+
destructive_findings = _detect_destructive_statements(statements)
|
|
2242
|
+
|
|
2243
|
+
if destructive_findings and not getattr(args, "allow_destructive", False):
|
|
2244
|
+
print(
|
|
2245
|
+
"Apply blocked: destructive SQL detected. Re-run with --allow-destructive if this is intentional.",
|
|
2246
|
+
file=sys.stderr,
|
|
2247
|
+
)
|
|
2248
|
+
for finding in destructive_findings[:5]:
|
|
2249
|
+
print(
|
|
2250
|
+
f" - #{finding['statement_index']} {finding['kind']}: {finding['preview']}",
|
|
2251
|
+
file=sys.stderr,
|
|
2252
|
+
)
|
|
2253
|
+
if len(destructive_findings) > 5:
|
|
2254
|
+
print(f" ... and {len(destructive_findings) - 5} more statement(s).", file=sys.stderr)
|
|
2255
|
+
return 1
|
|
2256
|
+
|
|
2257
|
+
if getattr(args, "write_sql", ""):
|
|
2258
|
+
Path(args.write_sql).write_text(sql_text.strip() + "\n", encoding="utf-8")
|
|
2259
|
+
|
|
2260
|
+
report: Dict[str, Any] = {
|
|
2261
|
+
"connector": connector_type,
|
|
2262
|
+
"dialect": dialect,
|
|
2263
|
+
"mode": mode,
|
|
2264
|
+
"status": "pending",
|
|
2265
|
+
"migration_name": migration_name,
|
|
2266
|
+
"checksum": checksum,
|
|
2267
|
+
"statement_count": len(statements),
|
|
2268
|
+
"destructive_statement_count": len(destructive_findings),
|
|
2269
|
+
"destructive_statements": destructive_findings,
|
|
2270
|
+
"policy_checked": mode == "model_diff" and not getattr(args, "skip_policy_check", False),
|
|
2271
|
+
"policy_results": policy_results,
|
|
2272
|
+
"skip_ledger": bool(args.skip_ledger),
|
|
2273
|
+
"ledger_table": args.ledger_table,
|
|
2274
|
+
"started_at_epoch": started_ts,
|
|
2275
|
+
"started_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(started_ts)),
|
|
2276
|
+
}
|
|
2277
|
+
|
|
2278
|
+
if getattr(args, "dry_run", False):
|
|
2279
|
+
finished_ts = time.time()
|
|
2280
|
+
report["status"] = "dry_run"
|
|
2281
|
+
report["finished_at_epoch"] = finished_ts
|
|
2282
|
+
report["finished_at_utc"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(finished_ts))
|
|
2283
|
+
report["duration_ms"] = int((finished_ts - started_ts) * 1000)
|
|
2284
|
+
if getattr(args, "report_json", ""):
|
|
2285
|
+
_write_apply_report(args.report_json, report)
|
|
2286
|
+
if getattr(args, "output_json", False):
|
|
2287
|
+
print(json.dumps(report, indent=2))
|
|
2288
|
+
else:
|
|
2289
|
+
print(f"DRY RUN: {len(statements)} statements for {connector_type}")
|
|
2290
|
+
print(f"Migration: {migration_name}")
|
|
2291
|
+
print(f"Checksum: {checksum}")
|
|
2292
|
+
if destructive_findings:
|
|
2293
|
+
print(f"Destructive statements: {len(destructive_findings)} (allowed)")
|
|
2294
|
+
print("\n" + sql_text.strip() + "\n")
|
|
2295
|
+
return 0
|
|
2296
|
+
|
|
2297
|
+
if connector_type == "snowflake" and (not getattr(args, "host", "") or not getattr(args, "user", "") or not getattr(args, "database", "")):
|
|
2298
|
+
print("Snowflake apply requires --host, --user, and --database.", file=sys.stderr)
|
|
2299
|
+
return 1
|
|
2300
|
+
if connector_type == "databricks" and (not getattr(args, "host", "") or not getattr(args, "token", "") or not getattr(args, "http_path", "")):
|
|
2301
|
+
print("Databricks apply requires --host, --token, and --http-path.", file=sys.stderr)
|
|
2302
|
+
return 1
|
|
2303
|
+
if connector_type == "bigquery" and (not getattr(args, "project", "") or not getattr(args, "dataset", "")):
|
|
2304
|
+
print("BigQuery apply requires --project and --dataset.", file=sys.stderr)
|
|
2305
|
+
return 1
|
|
2306
|
+
|
|
2307
|
+
connector = get_connector(connector_type)
|
|
2308
|
+
if connector is None:
|
|
2309
|
+
print(f"Unknown connector: {connector_type}", file=sys.stderr)
|
|
2310
|
+
return 1
|
|
2311
|
+
|
|
2312
|
+
ok, msg = connector.check_driver()
|
|
2313
|
+
if not ok:
|
|
2314
|
+
print(f"Driver check failed: {msg}", file=sys.stderr)
|
|
2315
|
+
return 1
|
|
2316
|
+
|
|
2317
|
+
config = _build_connector_config(args)
|
|
2318
|
+
try:
|
|
2319
|
+
if connector_type == "snowflake":
|
|
2320
|
+
_apply_snowflake(config, statements, migration_name, checksum, args.ledger_table, args.skip_ledger)
|
|
2321
|
+
elif connector_type == "databricks":
|
|
2322
|
+
_apply_databricks(config, statements, migration_name, checksum, args.ledger_table, args.skip_ledger)
|
|
2323
|
+
elif connector_type == "bigquery":
|
|
2324
|
+
_apply_bigquery(config, statements, migration_name, checksum, args.ledger_table, args.skip_ledger)
|
|
2325
|
+
except ApplyExecutionError as e:
|
|
2326
|
+
finished_ts = time.time()
|
|
2327
|
+
report["status"] = "failed"
|
|
2328
|
+
report["error"] = str(e)
|
|
2329
|
+
report["failed_statement_index"] = e.statement_index
|
|
2330
|
+
report["failed_statement_preview"] = _preview_sql(e.statement)
|
|
2331
|
+
report["finished_at_epoch"] = finished_ts
|
|
2332
|
+
report["finished_at_utc"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(finished_ts))
|
|
2333
|
+
report["duration_ms"] = int((finished_ts - started_ts) * 1000)
|
|
2334
|
+
if getattr(args, "report_json", ""):
|
|
2335
|
+
_write_apply_report(args.report_json, report)
|
|
2336
|
+
if getattr(args, "output_json", False):
|
|
2337
|
+
print(json.dumps(report, indent=2))
|
|
2338
|
+
else:
|
|
2339
|
+
print(str(e), file=sys.stderr)
|
|
2340
|
+
return 1
|
|
2341
|
+
except Exception as e:
|
|
2342
|
+
finished_ts = time.time()
|
|
2343
|
+
report["status"] = "failed"
|
|
2344
|
+
report["error"] = str(e)
|
|
2345
|
+
report["finished_at_epoch"] = finished_ts
|
|
2346
|
+
report["finished_at_utc"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(finished_ts))
|
|
2347
|
+
report["duration_ms"] = int((finished_ts - started_ts) * 1000)
|
|
2348
|
+
if getattr(args, "report_json", ""):
|
|
2349
|
+
_write_apply_report(args.report_json, report)
|
|
2350
|
+
if getattr(args, "output_json", False):
|
|
2351
|
+
print(json.dumps(report, indent=2))
|
|
2352
|
+
else:
|
|
2353
|
+
print(f"Apply failed: {e}", file=sys.stderr)
|
|
2354
|
+
return 1
|
|
2355
|
+
|
|
2356
|
+
finished_ts = time.time()
|
|
2357
|
+
report["status"] = "success"
|
|
2358
|
+
report["finished_at_epoch"] = finished_ts
|
|
2359
|
+
report["finished_at_utc"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(finished_ts))
|
|
2360
|
+
report["duration_ms"] = int((finished_ts - started_ts) * 1000)
|
|
2361
|
+
if getattr(args, "report_json", ""):
|
|
2362
|
+
_write_apply_report(args.report_json, report)
|
|
2363
|
+
|
|
2364
|
+
if getattr(args, "output_json", False):
|
|
2365
|
+
print(json.dumps(report, indent=2))
|
|
2366
|
+
else:
|
|
2367
|
+
print(f"Applied migration '{migration_name}' ({len(statements)} statements) to {connector_type}.")
|
|
2368
|
+
if not args.skip_ledger:
|
|
2369
|
+
print(f"Ledger table: {args.ledger_table}")
|
|
2370
|
+
return 0
|
|
2371
|
+
|
|
2372
|
+
|
|
2373
|
+
def cmd_completion(args: argparse.Namespace) -> int:
|
|
2374
|
+
shell = args.shell
|
|
2375
|
+
if shell == "bash":
|
|
2376
|
+
print(generate_bash_completion())
|
|
2377
|
+
elif shell == "zsh":
|
|
2378
|
+
print(generate_zsh_completion())
|
|
2379
|
+
elif shell == "fish":
|
|
2380
|
+
print(generate_fish_completion())
|
|
2381
|
+
else:
|
|
2382
|
+
print(f"Unsupported shell: {shell}", file=sys.stderr)
|
|
2383
|
+
return 1
|
|
2384
|
+
return 0
|
|
2385
|
+
|
|
2386
|
+
|
|
2387
|
+
def cmd_watch(args: argparse.Namespace) -> int:
|
|
2388
|
+
schema_path = getattr(args, "schema", None) or _default_schema_path()
|
|
2389
|
+
schema = load_schema(schema_path)
|
|
2390
|
+
watch_glob = getattr(args, "glob", "**/*.model.yaml")
|
|
2391
|
+
interval = getattr(args, "interval", 2)
|
|
2392
|
+
root = Path(".").resolve()
|
|
2393
|
+
|
|
2394
|
+
print(f"Watching for changes: {watch_glob} (every {interval}s)")
|
|
2395
|
+
print("Press Ctrl+C to stop.\n")
|
|
2396
|
+
|
|
2397
|
+
mtimes: Dict[str, float] = {}
|
|
2398
|
+
|
|
2399
|
+
try:
|
|
2400
|
+
while True:
|
|
2401
|
+
current_files: Dict[str, float] = {}
|
|
2402
|
+
for pattern in [watch_glob]:
|
|
2403
|
+
for path in sorted(root.glob(pattern)):
|
|
2404
|
+
rel = str(path.relative_to(root))
|
|
2405
|
+
if ".git" in rel or "node_modules" in rel or ".venv" in rel:
|
|
2406
|
+
continue
|
|
2407
|
+
try:
|
|
2408
|
+
mtime = path.stat().st_mtime
|
|
2409
|
+
current_files[str(path)] = mtime
|
|
2410
|
+
except OSError:
|
|
2411
|
+
continue
|
|
2412
|
+
|
|
2413
|
+
changed: List[str] = []
|
|
2414
|
+
for fpath, mtime in current_files.items():
|
|
2415
|
+
if fpath not in mtimes or mtimes[fpath] != mtime:
|
|
2416
|
+
changed.append(fpath)
|
|
2417
|
+
|
|
2418
|
+
mtimes = current_files
|
|
2419
|
+
|
|
2420
|
+
for fpath in changed:
|
|
2421
|
+
rel = str(Path(fpath).relative_to(root))
|
|
2422
|
+
print(f"\n--- Changed: {rel} ---")
|
|
2423
|
+
try:
|
|
2424
|
+
model = load_yaml_model(fpath)
|
|
2425
|
+
s_issues = schema_issues(model, schema)
|
|
2426
|
+
l_issues = lint_issues(model)
|
|
2427
|
+
all_issues = s_issues + l_issues
|
|
2428
|
+
|
|
2429
|
+
if all_issues:
|
|
2430
|
+
for iss in all_issues:
|
|
2431
|
+
sev = iss.severity.upper()
|
|
2432
|
+
print(f" [{sev}] {iss.code}: {iss.message}")
|
|
2433
|
+
error_count = sum(1 for i in all_issues if i.severity == "error")
|
|
2434
|
+
warn_count = sum(1 for i in all_issues if i.severity == "warn")
|
|
2435
|
+
print(f" Result: {error_count} error(s), {warn_count} warning(s)")
|
|
2436
|
+
else:
|
|
2437
|
+
print(" \u2713 Valid")
|
|
2438
|
+
except Exception as exc:
|
|
2439
|
+
print(f" [ERROR] {exc}")
|
|
2440
|
+
|
|
2441
|
+
time.sleep(interval)
|
|
2442
|
+
except KeyboardInterrupt:
|
|
2443
|
+
print("\nWatch stopped.")
|
|
2444
|
+
return 0
|
|
2445
|
+
|
|
2446
|
+
|
|
2447
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
2448
|
+
parser = argparse.ArgumentParser(prog="datalex", description="DataLex CLI")
|
|
2449
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
2450
|
+
|
|
2451
|
+
init_parser = sub.add_parser("init", help="Initialize a new workspace")
|
|
2452
|
+
init_parser.add_argument("--path", default=".", help="Workspace path")
|
|
2453
|
+
init_parser.add_argument(
|
|
2454
|
+
"--template",
|
|
2455
|
+
choices=["single", "multi-model", "end-to-end"],
|
|
2456
|
+
default="single",
|
|
2457
|
+
help="Starter template to scaffold (default: single).",
|
|
2458
|
+
)
|
|
2459
|
+
init_parser.add_argument(
|
|
2460
|
+
"--multi-model",
|
|
2461
|
+
action="store_true",
|
|
2462
|
+
help="Deprecated alias for --template multi-model.",
|
|
2463
|
+
)
|
|
2464
|
+
init_parser.set_defaults(func=cmd_init)
|
|
2465
|
+
|
|
2466
|
+
validate_parser = sub.add_parser("validate", help="Validate model with schema + semantic rules")
|
|
2467
|
+
validate_parser.add_argument("model", help="Path to model YAML")
|
|
2468
|
+
validate_parser.add_argument("--schema", default=_default_schema_path(), help="Path to JSON schema")
|
|
2469
|
+
validate_parser.set_defaults(func=cmd_validate)
|
|
2470
|
+
|
|
2471
|
+
lint_parser = sub.add_parser("lint", help="Run semantic lint checks")
|
|
2472
|
+
lint_parser.add_argument("model", help="Path to model YAML")
|
|
2473
|
+
lint_parser.set_defaults(func=cmd_lint)
|
|
2474
|
+
|
|
2475
|
+
compile_parser = sub.add_parser("compile", help="Compile model to canonical JSON")
|
|
2476
|
+
compile_parser.add_argument("model", help="Path to model YAML")
|
|
2477
|
+
compile_parser.add_argument("--schema", default=_default_schema_path(), help="Path to JSON schema")
|
|
2478
|
+
compile_parser.add_argument("--out", help="Output file for canonical JSON")
|
|
2479
|
+
compile_parser.set_defaults(func=cmd_compile)
|
|
2480
|
+
|
|
2481
|
+
diff_parser = sub.add_parser("diff", help="Semantic diff between two model files")
|
|
2482
|
+
diff_parser.add_argument("old", help="Old model YAML path")
|
|
2483
|
+
diff_parser.add_argument("new", help="New model YAML path")
|
|
2484
|
+
diff_parser.set_defaults(func=cmd_diff)
|
|
2485
|
+
|
|
2486
|
+
validate_all_parser = sub.add_parser(
|
|
2487
|
+
"validate-all", help="Validate all model files matching a glob"
|
|
2488
|
+
)
|
|
2489
|
+
validate_all_parser.add_argument(
|
|
2490
|
+
"--glob", default="**/*.model.yaml", help="Glob pattern for model files"
|
|
2491
|
+
)
|
|
2492
|
+
validate_all_parser.add_argument(
|
|
2493
|
+
"--exclude",
|
|
2494
|
+
nargs="*",
|
|
2495
|
+
default=["**/node_modules/**", "**/.git/**", "**/.venv/**"],
|
|
2496
|
+
help="Glob-style path patterns to exclude",
|
|
2497
|
+
)
|
|
2498
|
+
validate_all_parser.add_argument(
|
|
2499
|
+
"--schema", default=_default_schema_path(), help="Path to JSON schema"
|
|
2500
|
+
)
|
|
2501
|
+
validate_all_parser.set_defaults(func=cmd_validate_all)
|
|
2502
|
+
|
|
2503
|
+
gate_parser = sub.add_parser(
|
|
2504
|
+
"gate",
|
|
2505
|
+
help="PR gate: validate old/new models and fail on breaking changes by default",
|
|
2506
|
+
)
|
|
2507
|
+
gate_parser.add_argument("old", help="Old model YAML path")
|
|
2508
|
+
gate_parser.add_argument("new", help="New model YAML path")
|
|
2509
|
+
gate_parser.add_argument(
|
|
2510
|
+
"--schema", default=_default_schema_path(), help="Path to JSON schema"
|
|
2511
|
+
)
|
|
2512
|
+
gate_parser.add_argument(
|
|
2513
|
+
"--allow-breaking",
|
|
2514
|
+
action="store_true",
|
|
2515
|
+
help="Allow breaking changes (still fails on validation errors)",
|
|
2516
|
+
)
|
|
2517
|
+
gate_parser.add_argument(
|
|
2518
|
+
"--output-json", action="store_true", help="Print semantic diff as JSON"
|
|
2519
|
+
)
|
|
2520
|
+
gate_parser.set_defaults(func=cmd_gate)
|
|
2521
|
+
|
|
2522
|
+
policy_parser = sub.add_parser("policy-check", help="Evaluate a model against a policy pack")
|
|
2523
|
+
policy_parser.add_argument("model", help="Path to model YAML")
|
|
2524
|
+
policy_parser.add_argument(
|
|
2525
|
+
"--policy", default=_default_policy_path(), help="Path to policy pack YAML"
|
|
2526
|
+
)
|
|
2527
|
+
policy_parser.add_argument(
|
|
2528
|
+
"--schema", default=_default_schema_path(), help="Path to model schema JSON"
|
|
2529
|
+
)
|
|
2530
|
+
policy_parser.add_argument(
|
|
2531
|
+
"--policy-schema",
|
|
2532
|
+
default=_default_policy_schema_path(),
|
|
2533
|
+
help="Path to policy schema JSON",
|
|
2534
|
+
)
|
|
2535
|
+
policy_parser.add_argument("--output-json", action="store_true", help="Print policy output as JSON")
|
|
2536
|
+
policy_parser.add_argument("--inherit", action="store_true", help="Resolve pack.extends inheritance chain before evaluation")
|
|
2537
|
+
policy_parser.set_defaults(func=cmd_policy_check)
|
|
2538
|
+
|
|
2539
|
+
generate_parser = sub.add_parser("generate", help="Generate artifacts from model YAML")
|
|
2540
|
+
generate_sub = generate_parser.add_subparsers(dest="generate_command", required=True)
|
|
2541
|
+
|
|
2542
|
+
gen_sql_parser = generate_sub.add_parser("sql", help="Generate SQL DDL")
|
|
2543
|
+
gen_sql_parser.add_argument("model", help="Path to model YAML")
|
|
2544
|
+
gen_sql_parser.add_argument("--dialect", default="postgres", choices=["postgres", "snowflake", "bigquery", "databricks"])
|
|
2545
|
+
gen_sql_parser.add_argument("--out", help="Output SQL file path")
|
|
2546
|
+
gen_sql_parser.add_argument("--schema", default=_default_schema_path(), help="Path to model schema JSON")
|
|
2547
|
+
gen_sql_parser.set_defaults(func=cmd_generate_sql)
|
|
2548
|
+
|
|
2549
|
+
gen_dbt_parser = generate_sub.add_parser("dbt", help="Generate dbt project scaffold")
|
|
2550
|
+
gen_dbt_parser.add_argument("model", help="Path to model YAML")
|
|
2551
|
+
gen_dbt_parser.add_argument("--out-dir", required=True, help="Target directory for scaffold files")
|
|
2552
|
+
gen_dbt_parser.add_argument("--source-name", default="raw", help="dbt source name")
|
|
2553
|
+
gen_dbt_parser.add_argument("--project-name", default="data_modeling_mvp", help="dbt project name")
|
|
2554
|
+
gen_dbt_parser.add_argument("--schema", default=_default_schema_path(), help="Path to model schema JSON")
|
|
2555
|
+
gen_dbt_parser.set_defaults(func=cmd_generate_dbt)
|
|
2556
|
+
|
|
2557
|
+
gen_metadata_parser = generate_sub.add_parser("metadata", help="Generate metadata JSON export")
|
|
2558
|
+
gen_metadata_parser.add_argument("model", help="Path to model YAML")
|
|
2559
|
+
gen_metadata_parser.add_argument("--out", help="Output metadata JSON path")
|
|
2560
|
+
gen_metadata_parser.add_argument("--schema", default=_default_schema_path(), help="Path to model schema JSON")
|
|
2561
|
+
gen_metadata_parser.set_defaults(func=cmd_generate_metadata)
|
|
2562
|
+
|
|
2563
|
+
gen_docs_parser = generate_sub.add_parser("docs", help="Generate data dictionary documentation")
|
|
2564
|
+
gen_docs_parser.add_argument("model", help="Path to model YAML")
|
|
2565
|
+
gen_docs_parser.add_argument("--format", default="html", choices=["html", "markdown"], help="Output format")
|
|
2566
|
+
gen_docs_parser.add_argument("--out", help="Output file path")
|
|
2567
|
+
gen_docs_parser.add_argument("--title", help="Custom page title")
|
|
2568
|
+
gen_docs_parser.set_defaults(func=cmd_generate_docs)
|
|
2569
|
+
|
|
2570
|
+
gen_changelog_parser = generate_sub.add_parser("changelog", help="Generate changelog from model diff")
|
|
2571
|
+
gen_changelog_parser.add_argument("old", help="Old model YAML path")
|
|
2572
|
+
gen_changelog_parser.add_argument("new", help="New model YAML path")
|
|
2573
|
+
gen_changelog_parser.add_argument("--out", help="Output changelog file path")
|
|
2574
|
+
gen_changelog_parser.set_defaults(func=cmd_generate_changelog)
|
|
2575
|
+
|
|
2576
|
+
import_parser = sub.add_parser("import", help="Import SQL/DBML/Spark/dbt schema into model YAML")
|
|
2577
|
+
import_sub = import_parser.add_subparsers(dest="import_command", required=True)
|
|
2578
|
+
|
|
2579
|
+
import_sql_parser = import_sub.add_parser("sql", help="Import SQL DDL file")
|
|
2580
|
+
import_sql_parser.add_argument("input", help="Path to SQL DDL file")
|
|
2581
|
+
import_sql_parser.add_argument("--out", help="Write output YAML model file")
|
|
2582
|
+
import_sql_parser.add_argument("--model-name", default="imported_sql_model", help="Model name")
|
|
2583
|
+
import_sql_parser.add_argument("--domain", default="imported", help="Domain value")
|
|
2584
|
+
import_sql_parser.add_argument(
|
|
2585
|
+
"--owner",
|
|
2586
|
+
action="append",
|
|
2587
|
+
default=[],
|
|
2588
|
+
help="Owner email (repeatable)",
|
|
2589
|
+
)
|
|
2590
|
+
import_sql_parser.add_argument("--schema", default=_default_schema_path(), help="Path to model schema JSON")
|
|
2591
|
+
import_sql_parser.set_defaults(func=cmd_import_sql)
|
|
2592
|
+
|
|
2593
|
+
import_dbml_parser = import_sub.add_parser("dbml", help="Import DBML file")
|
|
2594
|
+
import_dbml_parser.add_argument("input", help="Path to DBML file")
|
|
2595
|
+
import_dbml_parser.add_argument("--out", help="Write output YAML model file")
|
|
2596
|
+
import_dbml_parser.add_argument("--model-name", default="imported_dbml_model", help="Model name")
|
|
2597
|
+
import_dbml_parser.add_argument("--domain", default="imported", help="Domain value")
|
|
2598
|
+
import_dbml_parser.add_argument(
|
|
2599
|
+
"--owner",
|
|
2600
|
+
action="append",
|
|
2601
|
+
default=[],
|
|
2602
|
+
help="Owner email (repeatable)",
|
|
2603
|
+
)
|
|
2604
|
+
import_dbml_parser.add_argument("--schema", default=_default_schema_path(), help="Path to model schema JSON")
|
|
2605
|
+
import_dbml_parser.set_defaults(func=cmd_import_dbml)
|
|
2606
|
+
|
|
2607
|
+
import_spark_parser = import_sub.add_parser("spark-schema", help="Import Spark schema JSON file")
|
|
2608
|
+
import_spark_parser.add_argument("input", help="Path to Spark schema JSON file")
|
|
2609
|
+
import_spark_parser.add_argument("--out", help="Write output YAML model file")
|
|
2610
|
+
import_spark_parser.add_argument("--model-name", default="imported_spark_schema", help="Model name")
|
|
2611
|
+
import_spark_parser.add_argument("--table-name", help="Table name (for single StructType schemas)")
|
|
2612
|
+
import_spark_parser.add_argument("--domain", default="imported", help="Domain value")
|
|
2613
|
+
import_spark_parser.add_argument("--owner", action="append", default=[], help="Owner email (repeatable)")
|
|
2614
|
+
import_spark_parser.add_argument("--schema", default=_default_schema_path(), help="Path to model schema JSON")
|
|
2615
|
+
import_spark_parser.set_defaults(func=cmd_import_spark_schema)
|
|
2616
|
+
|
|
2617
|
+
import_dbt_parser = import_sub.add_parser("dbt", help="Import dbt schema.yml file")
|
|
2618
|
+
import_dbt_parser.add_argument("input", help="Path to dbt schema.yml file")
|
|
2619
|
+
import_dbt_parser.add_argument("--out", help="Write output YAML model file")
|
|
2620
|
+
import_dbt_parser.add_argument("--model-name", default="imported_dbt_model", help="Model name")
|
|
2621
|
+
import_dbt_parser.add_argument("--domain", default="imported", help="Domain value")
|
|
2622
|
+
import_dbt_parser.add_argument("--owner", action="append", default=[], help="Owner email (repeatable)")
|
|
2623
|
+
import_dbt_parser.add_argument("--schema", default=_default_schema_path(), help="Path to model schema JSON")
|
|
2624
|
+
import_dbt_parser.set_defaults(func=cmd_import_dbt)
|
|
2625
|
+
|
|
2626
|
+
# dbt round-trip subcommand group
|
|
2627
|
+
dbt_parser = sub.add_parser("dbt", help="dbt round-trip: sync DataLex metadata into dbt schema.yml files")
|
|
2628
|
+
dbt_sub = dbt_parser.add_subparsers(dest="dbt_command", required=True)
|
|
2629
|
+
|
|
2630
|
+
dbt_sync_parser = dbt_sub.add_parser("sync", help="Merge DataLex metadata into a single dbt schema.yml (non-destructive)")
|
|
2631
|
+
dbt_sync_parser.add_argument("model", help="Path to the DataLex .model.yaml file")
|
|
2632
|
+
dbt_sync_parser.add_argument("--dbt-schema", required=True, help="Path to the existing dbt schema.yml to update")
|
|
2633
|
+
dbt_sync_parser.add_argument("--out", default=None, help="Output path (default: overwrites --dbt-schema in-place)")
|
|
2634
|
+
dbt_sync_parser.set_defaults(func=cmd_dbt_sync)
|
|
2635
|
+
|
|
2636
|
+
dbt_push_parser = dbt_sub.add_parser("push", help="Push DataLex metadata into all schema.yml files found in a dbt project")
|
|
2637
|
+
dbt_push_parser.add_argument("model", help="Path to the DataLex .model.yaml file")
|
|
2638
|
+
dbt_push_parser.add_argument("--dbt-project", required=True, help="Root path of the dbt project to scan for schema.yml files")
|
|
2639
|
+
dbt_push_parser.set_defaults(func=cmd_dbt_push)
|
|
2640
|
+
|
|
2641
|
+
pull_parser = sub.add_parser("pull", help="Pull schema from a live database into a DataLex model")
|
|
2642
|
+
pull_parser.add_argument("connector", help="Connector type (postgres, mysql, snowflake, bigquery, databricks, sqlserver, azure_sql, azure_fabric, redshift)")
|
|
2643
|
+
pull_parser.add_argument("--host", help="Database host (or Snowflake account, Databricks server hostname)")
|
|
2644
|
+
pull_parser.add_argument("--port", type=int, help="Database port")
|
|
2645
|
+
pull_parser.add_argument("--database", help="Database name")
|
|
2646
|
+
pull_parser.add_argument("--db-schema", help="Schema name (default: public/PUBLIC/default)")
|
|
2647
|
+
pull_parser.add_argument("--user", help="Database user")
|
|
2648
|
+
pull_parser.add_argument("--password", help="Database password")
|
|
2649
|
+
pull_parser.add_argument("--warehouse", help="Snowflake warehouse")
|
|
2650
|
+
pull_parser.add_argument("--project", help="BigQuery project ID")
|
|
2651
|
+
pull_parser.add_argument("--dataset", help="BigQuery dataset")
|
|
2652
|
+
pull_parser.add_argument("--catalog", help="Databricks Unity Catalog name")
|
|
2653
|
+
pull_parser.add_argument("--token", help="Access token (Databricks)")
|
|
2654
|
+
pull_parser.add_argument("--http-path", help="Databricks SQL Warehouse/Cluster HTTP path")
|
|
2655
|
+
pull_parser.add_argument("--odbc-driver", help="ODBC driver for SQL Server-family connectors")
|
|
2656
|
+
pull_parser.add_argument("--encrypt", help="SQL Server encryption setting (yes/no)")
|
|
2657
|
+
pull_parser.add_argument("--trust-server-certificate", help="SQL Server TrustServerCertificate setting (yes/no)")
|
|
2658
|
+
pull_parser.add_argument("--private-key-path", help="Path to RSA private key PEM file (Snowflake key-pair auth)")
|
|
2659
|
+
pull_parser.add_argument("--tables", nargs="*", help="Only include these tables")
|
|
2660
|
+
pull_parser.add_argument("--exclude-tables", nargs="*", help="Exclude these tables")
|
|
2661
|
+
pull_parser.add_argument("--model-name", default="imported_model", help="Model name")
|
|
2662
|
+
pull_parser.add_argument("--domain", default="imported", help="Domain value")
|
|
2663
|
+
pull_parser.add_argument("--owner", help="Owner email")
|
|
2664
|
+
pull_parser.add_argument("--out", help="Output YAML model file path")
|
|
2665
|
+
pull_parser.add_argument("--project-dir", help="Project folder to write extracted model YAML")
|
|
2666
|
+
pull_parser.add_argument(
|
|
2667
|
+
"--create-project-dir",
|
|
2668
|
+
action="store_true",
|
|
2669
|
+
help="Create --project-dir if missing (otherwise prompt in interactive mode)",
|
|
2670
|
+
)
|
|
2671
|
+
pull_parser.add_argument("--test", action="store_true", help="Test connection only, do not pull schema")
|
|
2672
|
+
pull_parser.set_defaults(func=cmd_pull)
|
|
2673
|
+
|
|
2674
|
+
connectors_parser = sub.add_parser("connectors", help="List available database connectors and driver status")
|
|
2675
|
+
connectors_parser.add_argument("--output-json", action="store_true", help="Print as JSON")
|
|
2676
|
+
connectors_parser.set_defaults(func=cmd_connectors)
|
|
2677
|
+
|
|
2678
|
+
# Common connection args helper
|
|
2679
|
+
def _add_conn_args(p):
|
|
2680
|
+
p.add_argument("connector", help="Connector type (postgres, mysql, snowflake, bigquery, databricks, sqlserver, azure_sql, azure_fabric, redshift)")
|
|
2681
|
+
p.add_argument("--host", help="Database host")
|
|
2682
|
+
p.add_argument("--port", type=int, help="Database port")
|
|
2683
|
+
p.add_argument("--database", help="Database name")
|
|
2684
|
+
p.add_argument("--db-schema", help="Schema name")
|
|
2685
|
+
p.add_argument("--user", help="Database user")
|
|
2686
|
+
p.add_argument("--password", help="Database password")
|
|
2687
|
+
p.add_argument("--warehouse", help="Snowflake warehouse")
|
|
2688
|
+
p.add_argument("--project", help="BigQuery project ID")
|
|
2689
|
+
p.add_argument("--dataset", help="BigQuery dataset")
|
|
2690
|
+
p.add_argument("--catalog", help="Databricks catalog")
|
|
2691
|
+
p.add_argument("--token", help="Access token")
|
|
2692
|
+
p.add_argument("--http-path", help="Databricks SQL Warehouse/Cluster HTTP path")
|
|
2693
|
+
p.add_argument("--odbc-driver", help="ODBC driver for SQL Server-family connectors")
|
|
2694
|
+
p.add_argument("--encrypt", help="SQL Server encryption setting (yes/no)")
|
|
2695
|
+
p.add_argument("--trust-server-certificate", help="SQL Server TrustServerCertificate setting (yes/no)")
|
|
2696
|
+
p.add_argument("--private-key-path", help="Path to RSA private key PEM file (Snowflake key-pair auth)")
|
|
2697
|
+
p.add_argument("--output-json", action="store_true", help="Print as JSON")
|
|
2698
|
+
|
|
2699
|
+
schemas_parser = sub.add_parser("schemas", help="List schemas/datasets in a database")
|
|
2700
|
+
_add_conn_args(schemas_parser)
|
|
2701
|
+
schemas_parser.set_defaults(func=cmd_schemas)
|
|
2702
|
+
|
|
2703
|
+
tables_parser = sub.add_parser("tables", help="List tables in a database schema")
|
|
2704
|
+
_add_conn_args(tables_parser)
|
|
2705
|
+
tables_parser.set_defaults(func=cmd_tables)
|
|
2706
|
+
|
|
2707
|
+
resolve_parser = sub.add_parser("resolve", help="Resolve cross-model imports and show unified graph")
|
|
2708
|
+
resolve_parser.add_argument("model", help="Path to root model YAML")
|
|
2709
|
+
resolve_parser.add_argument(
|
|
2710
|
+
"--search-dir",
|
|
2711
|
+
action="append",
|
|
2712
|
+
default=[],
|
|
2713
|
+
help="Additional directories to search for imported models (repeatable)",
|
|
2714
|
+
)
|
|
2715
|
+
resolve_parser.add_argument("--output-json", action="store_true", help="Print graph as JSON")
|
|
2716
|
+
resolve_parser.set_defaults(func=cmd_resolve)
|
|
2717
|
+
|
|
2718
|
+
resolve_project_parser = sub.add_parser("resolve-project", help="Resolve all models in a project directory")
|
|
2719
|
+
resolve_project_parser.add_argument("directory", help="Project directory path")
|
|
2720
|
+
resolve_project_parser.add_argument(
|
|
2721
|
+
"--search-dir",
|
|
2722
|
+
action="append",
|
|
2723
|
+
default=[],
|
|
2724
|
+
help="Additional search directories (repeatable)",
|
|
2725
|
+
)
|
|
2726
|
+
resolve_project_parser.add_argument("--output-json", action="store_true", help="Print results as JSON")
|
|
2727
|
+
resolve_project_parser.set_defaults(func=cmd_resolve_project)
|
|
2728
|
+
|
|
2729
|
+
diff_all_parser = sub.add_parser("diff-all", help="Semantic diff between two model directories")
|
|
2730
|
+
diff_all_parser.add_argument("old", help="Old model directory")
|
|
2731
|
+
diff_all_parser.add_argument("new", help="New model directory")
|
|
2732
|
+
diff_all_parser.add_argument("--output-json", action="store_true", help="Print diff as JSON")
|
|
2733
|
+
diff_all_parser.add_argument(
|
|
2734
|
+
"--allow-breaking",
|
|
2735
|
+
action="store_true",
|
|
2736
|
+
help="Allow breaking changes (exit 0 even with breaking changes)",
|
|
2737
|
+
)
|
|
2738
|
+
diff_all_parser.set_defaults(func=cmd_diff_all)
|
|
2739
|
+
|
|
2740
|
+
transform_parser = sub.add_parser("transform", help="Transform a model between conceptual, logical, and physical forms")
|
|
2741
|
+
transform_sub = transform_parser.add_subparsers(dest="transform_command", required=True)
|
|
2742
|
+
|
|
2743
|
+
transform_to_logical = transform_sub.add_parser("conceptual-to-logical", help="Transform a conceptual model into a logical model")
|
|
2744
|
+
transform_to_logical.add_argument("model", help="Path to source model YAML")
|
|
2745
|
+
transform_to_logical.add_argument("--schema", default=_default_schema_path(), help="Path to model schema JSON")
|
|
2746
|
+
transform_to_logical.add_argument("--out", help="Write transformed model YAML")
|
|
2747
|
+
transform_to_logical.set_defaults(func=cmd_transform)
|
|
2748
|
+
|
|
2749
|
+
transform_to_physical = transform_sub.add_parser("logical-to-physical", help="Transform a logical model into a physical model")
|
|
2750
|
+
transform_to_physical.add_argument("model", help="Path to source model YAML")
|
|
2751
|
+
transform_to_physical.add_argument("--dialect", default="postgres", choices=["postgres", "snowflake", "bigquery", "databricks"])
|
|
2752
|
+
transform_to_physical.add_argument("--schema", default=_default_schema_path(), help="Path to model schema JSON")
|
|
2753
|
+
transform_to_physical.add_argument("--out", help="Write transformed model YAML")
|
|
2754
|
+
transform_to_physical.set_defaults(func=cmd_transform)
|
|
2755
|
+
|
|
2756
|
+
standards_parser = sub.add_parser("standards", help="Check or autofix model standards, naming rules, and shared libraries")
|
|
2757
|
+
standards_sub = standards_parser.add_subparsers(dest="standards_command", required=True)
|
|
2758
|
+
|
|
2759
|
+
standards_check = standards_sub.add_parser("check", help="Evaluate standards and naming rules")
|
|
2760
|
+
standards_check.add_argument("model", help="Path to model YAML")
|
|
2761
|
+
standards_check.add_argument("--schema", default=_default_schema_path(), help="Path to model schema JSON")
|
|
2762
|
+
standards_check.add_argument("--output-json", action="store_true", help="Print standards report as JSON")
|
|
2763
|
+
standards_check.set_defaults(func=cmd_standards_check)
|
|
2764
|
+
|
|
2765
|
+
standards_fix = standards_sub.add_parser("fix", help="Apply supported standards autofixes")
|
|
2766
|
+
standards_fix.add_argument("model", help="Path to model YAML")
|
|
2767
|
+
standards_fix.add_argument("--write", "-w", action="store_true", help="Overwrite the input model in-place")
|
|
2768
|
+
standards_fix.add_argument("--out", help="Write fixed YAML to a new path")
|
|
2769
|
+
standards_fix.set_defaults(func=cmd_standards_fix)
|
|
2770
|
+
|
|
2771
|
+
sync_parser = sub.add_parser("sync", help="Round-trip compare, merge, or pull workflows")
|
|
2772
|
+
sync_sub = sync_parser.add_subparsers(dest="sync_command", required=True)
|
|
2773
|
+
|
|
2774
|
+
sync_compare = sync_sub.add_parser("compare", help="Compare current and candidate models")
|
|
2775
|
+
sync_compare.add_argument("current", help="Current local model YAML")
|
|
2776
|
+
sync_compare.add_argument("candidate", help="Candidate/live model YAML")
|
|
2777
|
+
sync_compare.add_argument("--allow-breaking", action="store_true", help="Return 0 even when breaking changes are detected")
|
|
2778
|
+
sync_compare.set_defaults(func=cmd_sync_compare)
|
|
2779
|
+
|
|
2780
|
+
sync_merge = sync_sub.add_parser("merge", help="Merge documentation metadata from current into candidate model")
|
|
2781
|
+
sync_merge.add_argument("current", help="Current local model YAML")
|
|
2782
|
+
sync_merge.add_argument("candidate", help="Candidate/live model YAML")
|
|
2783
|
+
sync_merge.add_argument("--out", help="Write merged model YAML")
|
|
2784
|
+
sync_merge.set_defaults(func=cmd_sync_merge)
|
|
2785
|
+
|
|
2786
|
+
sync_pull = sync_sub.add_parser("pull", help="Alias of 'datalex pull' for round-trip workflows")
|
|
2787
|
+
sync_pull.add_argument("connector", help="Connector type (postgres, mysql, snowflake, bigquery, databricks, sqlserver, azure_sql, azure_fabric, redshift)")
|
|
2788
|
+
sync_pull.add_argument("--host", help="Database host (or Snowflake account, Databricks server hostname)")
|
|
2789
|
+
sync_pull.add_argument("--port", type=int, help="Database port")
|
|
2790
|
+
sync_pull.add_argument("--database", help="Database name")
|
|
2791
|
+
sync_pull.add_argument("--db-schema", help="Schema name (default: public/PUBLIC/default)")
|
|
2792
|
+
sync_pull.add_argument("--user", help="Database user")
|
|
2793
|
+
sync_pull.add_argument("--password", help="Database password")
|
|
2794
|
+
sync_pull.add_argument("--warehouse", help="Snowflake warehouse")
|
|
2795
|
+
sync_pull.add_argument("--project", help="BigQuery project ID")
|
|
2796
|
+
sync_pull.add_argument("--dataset", help="BigQuery dataset")
|
|
2797
|
+
sync_pull.add_argument("--catalog", help="Databricks Unity Catalog name")
|
|
2798
|
+
sync_pull.add_argument("--token", help="Access token (Databricks)")
|
|
2799
|
+
sync_pull.add_argument("--http-path", help="Databricks SQL Warehouse/Cluster HTTP path")
|
|
2800
|
+
sync_pull.add_argument("--odbc-driver", help="ODBC driver for SQL Server-family connectors")
|
|
2801
|
+
sync_pull.add_argument("--encrypt", help="SQL Server encryption setting (yes/no)")
|
|
2802
|
+
sync_pull.add_argument("--trust-server-certificate", help="SQL Server TrustServerCertificate setting (yes/no)")
|
|
2803
|
+
sync_pull.add_argument("--private-key-path", help="Path to RSA private key PEM file (Snowflake key-pair auth)")
|
|
2804
|
+
sync_pull.add_argument("--tables", nargs="*", help="Only include these tables")
|
|
2805
|
+
sync_pull.add_argument("--exclude-tables", nargs="*", help="Exclude these tables")
|
|
2806
|
+
sync_pull.add_argument("--model-name", default="imported_model", help="Model name")
|
|
2807
|
+
sync_pull.add_argument("--domain", default="imported", help="Domain value")
|
|
2808
|
+
sync_pull.add_argument("--owner", help="Owner email")
|
|
2809
|
+
sync_pull.add_argument("--out", help="Output YAML model file path")
|
|
2810
|
+
sync_pull.add_argument("--project-dir", help="Project folder to write extracted model YAML")
|
|
2811
|
+
sync_pull.add_argument("--create-project-dir", action="store_true", help="Create --project-dir if missing")
|
|
2812
|
+
sync_pull.add_argument("--test", action="store_true", help="Test connection only, do not pull schema")
|
|
2813
|
+
sync_pull.set_defaults(func=cmd_sync_pull)
|
|
2814
|
+
|
|
2815
|
+
fmt_parser = sub.add_parser("fmt", help="Auto-format YAML model to canonical style")
|
|
2816
|
+
fmt_parser.add_argument("model", help="Path to model YAML")
|
|
2817
|
+
fmt_parser.add_argument("--write", "-w", action="store_true", help="Overwrite the input file in-place")
|
|
2818
|
+
fmt_parser.add_argument("--out", help="Output file path (alternative to --write)")
|
|
2819
|
+
fmt_parser.set_defaults(func=cmd_fmt)
|
|
2820
|
+
|
|
2821
|
+
stats_parser = sub.add_parser("stats", help="Print model statistics")
|
|
2822
|
+
stats_parser.add_argument("model", help="Path to model YAML")
|
|
2823
|
+
stats_parser.add_argument("--output-json", action="store_true", help="Print stats as JSON")
|
|
2824
|
+
stats_parser.set_defaults(func=cmd_stats)
|
|
2825
|
+
|
|
2826
|
+
completeness_parser = sub.add_parser(
|
|
2827
|
+
"completeness",
|
|
2828
|
+
help="Score each entity against single-source-of-truth completeness dimensions",
|
|
2829
|
+
)
|
|
2830
|
+
completeness_parser.add_argument("model", help="Path to model YAML")
|
|
2831
|
+
completeness_parser.add_argument(
|
|
2832
|
+
"--output-json", action="store_true", help="Emit full report as JSON (for API/CI integration)"
|
|
2833
|
+
)
|
|
2834
|
+
completeness_parser.add_argument(
|
|
2835
|
+
"--summary", action="store_true", help="Show scores only, suppress per-entity missing detail"
|
|
2836
|
+
)
|
|
2837
|
+
completeness_parser.add_argument(
|
|
2838
|
+
"--min-score",
|
|
2839
|
+
type=int,
|
|
2840
|
+
default=None,
|
|
2841
|
+
metavar="N",
|
|
2842
|
+
help="Exit with code 1 if any entity scores below N%% (useful in CI gates)",
|
|
2843
|
+
)
|
|
2844
|
+
completeness_parser.set_defaults(func=cmd_completeness)
|
|
2845
|
+
|
|
2846
|
+
schema_parser = sub.add_parser("print-schema", help="Print active model schema JSON")
|
|
2847
|
+
schema_parser.add_argument("--schema", default=_default_schema_path(), help="Path to JSON schema")
|
|
2848
|
+
schema_parser.set_defaults(func=cmd_schema)
|
|
2849
|
+
|
|
2850
|
+
policy_schema_parser = sub.add_parser("print-policy-schema", help="Print policy schema JSON")
|
|
2851
|
+
policy_schema_parser.add_argument(
|
|
2852
|
+
"--policy-schema",
|
|
2853
|
+
default=_default_policy_schema_path(),
|
|
2854
|
+
help="Path to policy schema JSON",
|
|
2855
|
+
)
|
|
2856
|
+
policy_schema_parser.set_defaults(func=cmd_policy_schema)
|
|
2857
|
+
|
|
2858
|
+
doctor_parser = sub.add_parser("doctor", help="Diagnose project setup issues")
|
|
2859
|
+
doctor_parser.add_argument("--path", default=".", help="Project directory to diagnose")
|
|
2860
|
+
doctor_parser.add_argument("--output-json", action="store_true", help="Print diagnostics as JSON")
|
|
2861
|
+
doctor_parser.set_defaults(func=cmd_doctor)
|
|
2862
|
+
|
|
2863
|
+
migrate_parser = sub.add_parser("migrate", help="Generate SQL migration between two model versions")
|
|
2864
|
+
migrate_parser.add_argument("old", help="Old model YAML path")
|
|
2865
|
+
migrate_parser.add_argument("new", help="New model YAML path")
|
|
2866
|
+
migrate_parser.add_argument("--dialect", default="postgres", choices=["postgres", "snowflake", "bigquery", "databricks"])
|
|
2867
|
+
migrate_parser.add_argument("--out", help="Output SQL migration file path")
|
|
2868
|
+
migrate_parser.set_defaults(func=cmd_migrate)
|
|
2869
|
+
|
|
2870
|
+
apply_parser = sub.add_parser("apply", help="Apply SQL/migration to a live database")
|
|
2871
|
+
apply_parser.add_argument("connector", choices=["snowflake", "databricks", "bigquery"], help="Target connector")
|
|
2872
|
+
apply_parser.add_argument("--dialect", default=None, choices=["snowflake", "bigquery", "databricks"], help="SQL dialect (defaults to connector)")
|
|
2873
|
+
apply_parser.add_argument("--sql-file", help="Path to SQL file to apply")
|
|
2874
|
+
apply_parser.add_argument("--old", help="Old model YAML path (for generated migration)")
|
|
2875
|
+
apply_parser.add_argument("--new", help="New model YAML path (for generated migration)")
|
|
2876
|
+
apply_parser.add_argument("--model-schema", default=_default_schema_path(), help="Path to model schema JSON")
|
|
2877
|
+
apply_parser.add_argument("--host", help="Database host/account")
|
|
2878
|
+
apply_parser.add_argument("--port", type=int, help="Database port")
|
|
2879
|
+
apply_parser.add_argument("--database", help="Database name")
|
|
2880
|
+
apply_parser.add_argument("--db-schema", help="Schema name")
|
|
2881
|
+
apply_parser.add_argument("--user", help="Database user")
|
|
2882
|
+
apply_parser.add_argument("--password", help="Database password or key passphrase")
|
|
2883
|
+
apply_parser.add_argument("--warehouse", help="Snowflake warehouse")
|
|
2884
|
+
apply_parser.add_argument("--project", help="BigQuery project ID")
|
|
2885
|
+
apply_parser.add_argument("--dataset", help="BigQuery dataset")
|
|
2886
|
+
apply_parser.add_argument("--catalog", help="Databricks catalog")
|
|
2887
|
+
apply_parser.add_argument("--token", help="Databricks token")
|
|
2888
|
+
apply_parser.add_argument("--http-path", help="Databricks SQL Warehouse/Cluster HTTP path")
|
|
2889
|
+
apply_parser.add_argument("--private-key-path", help="Path to RSA private key PEM file (Snowflake key-pair auth)")
|
|
2890
|
+
apply_parser.add_argument("--migration-name", help="Migration name override")
|
|
2891
|
+
apply_parser.add_argument("--ledger-table", default="datalex_migrations", help="Migration ledger table name")
|
|
2892
|
+
apply_parser.add_argument("--skip-ledger", action="store_true", help="Skip writing migration ledger record")
|
|
2893
|
+
apply_parser.add_argument("--policy-pack", default=_default_policy_path(), help="Policy pack for model-diff preflight checks")
|
|
2894
|
+
apply_parser.add_argument("--skip-policy-check", action="store_true", help="Skip policy preflight checks for model-diff apply")
|
|
2895
|
+
apply_parser.add_argument("--allow-destructive", action="store_true", help="Allow destructive SQL statements (DROP/TRUNCATE)")
|
|
2896
|
+
apply_parser.add_argument("--write-sql", help="Write final SQL payload to file before execution")
|
|
2897
|
+
apply_parser.add_argument("--report-json", help="Write structured apply report JSON to file")
|
|
2898
|
+
apply_parser.add_argument("--output-json", action="store_true", help="Print structured apply report JSON")
|
|
2899
|
+
apply_parser.add_argument("--dry-run", action="store_true", help="Print SQL and exit without execution")
|
|
2900
|
+
apply_parser.set_defaults(func=cmd_apply)
|
|
2901
|
+
|
|
2902
|
+
completion_parser = sub.add_parser("completion", help="Generate shell completion script")
|
|
2903
|
+
completion_parser.add_argument("shell", choices=["bash", "zsh", "fish"], help="Shell type")
|
|
2904
|
+
completion_parser.set_defaults(func=cmd_completion)
|
|
2905
|
+
|
|
2906
|
+
watch_parser = sub.add_parser("watch", help="Watch model files and validate on change")
|
|
2907
|
+
watch_parser.add_argument("--glob", default="**/*.model.yaml", help="Glob pattern for model files")
|
|
2908
|
+
watch_parser.add_argument("--interval", type=int, default=2, help="Poll interval in seconds")
|
|
2909
|
+
watch_parser.add_argument("--schema", default=_default_schema_path(), help="Path to JSON schema")
|
|
2910
|
+
watch_parser.set_defaults(func=cmd_watch)
|
|
2911
|
+
|
|
2912
|
+
from datalex_cli.datalex_cli import register_datalex
|
|
2913
|
+
register_datalex(sub)
|
|
2914
|
+
|
|
2915
|
+
return parser
|
|
2916
|
+
|
|
2917
|
+
|
|
2918
|
+
def main() -> int:
|
|
2919
|
+
parser = build_parser()
|
|
2920
|
+
args = parser.parse_args()
|
|
2921
|
+
return args.func(args)
|
|
2922
|
+
|
|
2923
|
+
|
|
2924
|
+
if __name__ == "__main__":
|
|
2925
|
+
raise SystemExit(main())
|