sibi-flux 2026.1.2__py3-none-any.whl → 2026.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_flux/__init__.py +0 -1
- sibi_flux/cli.py +29 -6
- sibi_flux/config/settings.py +20 -1
- sibi_flux/datacube/_data_cube.py +1 -0
- sibi_flux/datacube/cli.py +1637 -415
- sibi_flux/datacube/config_engine.py +36 -18
- sibi_flux/datacube/field_factory.py +72 -28
- sibi_flux/datacube/field_mapper.py +93 -69
- sibi_flux/datacube/field_registry.py +1 -1
- sibi_flux/datacube/generator.py +255 -181
- sibi_flux/datacube/orchestrator.py +309 -37
- sibi_flux/datacube/router.py +5 -0
- sibi_flux/df_helper/backends/__init__.py +0 -1
- sibi_flux/df_validator/_df_validator.py +1 -0
- sibi_flux/init/core.py +93 -41
- sibi_flux/init/discovery_updater.py +80 -26
- sibi_flux/init/env.py +63 -36
- sibi_flux/init/env_engine.py +83 -42
- sibi_flux/init/env_generator.py +336 -183
- sibi_flux/init/rule_generator.py +171 -0
- sibi_flux/init/templates/discovery_params.yaml +9 -10
- sibi_flux/init/templates/gen_dc.py +74 -23
- sibi_flux/orchestration/__init__.py +0 -1
- sibi_flux/storage/_storage_manager.py +0 -1
- sibi_flux/utils/date_utils/__init__.py +0 -1
- sibi_flux/utils/date_utils/_business_days.py +0 -1
- {sibi_flux-2026.1.2.dist-info → sibi_flux-2026.1.4.dist-info}/METADATA +2 -4
- {sibi_flux-2026.1.2.dist-info → sibi_flux-2026.1.4.dist-info}/RECORD +30 -29
- {sibi_flux-2026.1.2.dist-info → sibi_flux-2026.1.4.dist-info}/WHEEL +0 -0
- {sibi_flux-2026.1.2.dist-info → sibi_flux-2026.1.4.dist-info}/entry_points.txt +0 -0
sibi_flux/datacube/cli.py
CHANGED
|
@@ -7,6 +7,7 @@ import typer
|
|
|
7
7
|
import subprocess
|
|
8
8
|
import importlib.util
|
|
9
9
|
import importlib
|
|
10
|
+
import shutil
|
|
10
11
|
import sqlalchemy as sa
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
from typing import Optional, Callable, Set, Dict, Any, Iterable, Mapping
|
|
@@ -29,15 +30,106 @@ from sibi_flux.datacube.generator import (
|
|
|
29
30
|
)
|
|
30
31
|
from sibi_flux.datacube.orchestrator import DiscoveryOrchestrator
|
|
31
32
|
from sibi_flux.datacube.field_factory import FieldMapFactory
|
|
33
|
+
from sibi_flux.init.rule_generator import RuleEngine
|
|
32
34
|
|
|
33
35
|
import sibi_flux.datacube.generator
|
|
34
36
|
|
|
35
|
-
|
|
36
37
|
app = typer.Typer(help="Sibi-Flux Data Cube Generator")
|
|
37
38
|
console = Console()
|
|
38
39
|
|
|
39
40
|
# --- Context Management ---
|
|
40
41
|
|
|
42
|
+
|
|
43
|
+
def _load_and_resolve_config(config_path: Path) -> dict:
|
|
44
|
+
if not config_path.exists():
|
|
45
|
+
return {}
|
|
46
|
+
with open(config_path, "r") as f:
|
|
47
|
+
config_data = yaml.safe_load(f) or {}
|
|
48
|
+
|
|
49
|
+
# Heuristic: Config is in generators/datacubes/discovery_params.yaml
|
|
50
|
+
# Project Root is 3 levels up from FILE
|
|
51
|
+
try:
|
|
52
|
+
project_root = config_path.parent.parent.parent
|
|
53
|
+
except Exception:
|
|
54
|
+
project_root = Path.cwd()
|
|
55
|
+
|
|
56
|
+
if "paths" in config_data:
|
|
57
|
+
if "target" in config_data["paths"]:
|
|
58
|
+
target = config_data["paths"]["target"]
|
|
59
|
+
for key in ["datacubes_dir", "field_maps_dir"]:
|
|
60
|
+
if key in target:
|
|
61
|
+
rel_path = target[key]
|
|
62
|
+
if rel_path and not Path(rel_path).is_absolute():
|
|
63
|
+
abs_path = (project_root / rel_path).resolve()
|
|
64
|
+
target[key] = str(abs_path)
|
|
65
|
+
|
|
66
|
+
# Resolve Registry File (Dual Support)
|
|
67
|
+
repos = config_data.get("paths", {}).get("repositories", {})
|
|
68
|
+
|
|
69
|
+
# New location
|
|
70
|
+
if "global_datacube_registry_file" in repos:
|
|
71
|
+
reg_file = repos["global_datacube_registry_file"]
|
|
72
|
+
if not Path(reg_file).is_absolute():
|
|
73
|
+
repos["global_datacube_registry_file"] = str(
|
|
74
|
+
(project_root / reg_file).resolve()
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Old location (fallback)
|
|
78
|
+
elif "global_datacube_registry_file" in config_data.get("paths", {}):
|
|
79
|
+
reg_file = config_data.get("paths", {})["global_datacube_registry_file"]
|
|
80
|
+
if not Path(reg_file).is_absolute():
|
|
81
|
+
config_data["paths"]["global_datacube_registry_file"] = str(
|
|
82
|
+
(project_root / reg_file).resolve()
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Resolve Repositories
|
|
86
|
+
if "repositories" in config_data["paths"]:
|
|
87
|
+
repos = config_data["paths"]["repositories"]
|
|
88
|
+
for key in [
|
|
89
|
+
"global_field_repository_file",
|
|
90
|
+
"global_field_translations_file",
|
|
91
|
+
]:
|
|
92
|
+
if key in repos:
|
|
93
|
+
rel = repos[key]
|
|
94
|
+
if rel and not Path(rel).is_absolute():
|
|
95
|
+
config_data["paths"]["repositories"][key] = str(
|
|
96
|
+
(project_root / rel).resolve()
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Resolve Discovery Paths (Dual Support: root or paths.discovery)
|
|
100
|
+
discovery_block = None
|
|
101
|
+
if "paths" in config_data and "discovery" in config_data["paths"]:
|
|
102
|
+
discovery_block = config_data["paths"]["discovery"]
|
|
103
|
+
elif "discovery" in config_data:
|
|
104
|
+
discovery_block = config_data["discovery"]
|
|
105
|
+
|
|
106
|
+
if discovery_block:
|
|
107
|
+
for key in ["all_tables_file", "rules_file", "whitelist_file"]:
|
|
108
|
+
if key in discovery_block:
|
|
109
|
+
rel = discovery_block[key]
|
|
110
|
+
if rel and not Path(rel).is_absolute():
|
|
111
|
+
discovery_block[key] = str((project_root / rel).resolve())
|
|
112
|
+
|
|
113
|
+
# Normalize databases (id -> name mapping) for CLI compatibility
|
|
114
|
+
# Ensure this matches logic in gen_dc.py wrapper
|
|
115
|
+
if "databases" in config_data:
|
|
116
|
+
for db in config_data["databases"]:
|
|
117
|
+
if "id" in db and "name" not in db:
|
|
118
|
+
db["name"] = db["id"]
|
|
119
|
+
if "connection_ref" in db and "connection_obj" not in db:
|
|
120
|
+
db["connection_obj"] = db["connection_ref"]
|
|
121
|
+
|
|
122
|
+
# Normalize import_spec to global_import string for resolve_db_url
|
|
123
|
+
if "import_spec" in db and "global_import" not in db:
|
|
124
|
+
spec = db["import_spec"]
|
|
125
|
+
if "module" in spec and "symbol" in spec:
|
|
126
|
+
db["global_import"] = (
|
|
127
|
+
f"from {spec['module']} import {spec['symbol']}"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
return config_data
|
|
131
|
+
|
|
132
|
+
|
|
41
133
|
class CLIContext:
|
|
42
134
|
def __init__(self):
|
|
43
135
|
self.default_config: Optional[Path] = None
|
|
@@ -52,7 +144,7 @@ class CLIContext:
|
|
|
52
144
|
field_translations_file: Path,
|
|
53
145
|
valid_paths: list[str],
|
|
54
146
|
valid_fieldmap_paths: list[str],
|
|
55
|
-
params: Optional[dict] = None
|
|
147
|
+
params: Optional[dict] = None,
|
|
56
148
|
):
|
|
57
149
|
self.default_config = default_config
|
|
58
150
|
self.field_translations_file = field_translations_file
|
|
@@ -60,23 +152,59 @@ class CLIContext:
|
|
|
60
152
|
self.valid_fieldmap_paths = valid_fieldmap_paths
|
|
61
153
|
self.params = params or {}
|
|
62
154
|
|
|
155
|
+
def auto_configure(self):
|
|
156
|
+
"""Attempts to find defaults if not configured."""
|
|
157
|
+
if self.default_config:
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
# Heuristic check for standard project layout
|
|
161
|
+
# Case 1: Run from project root -> generators/datacubes/discovery_params.yaml
|
|
162
|
+
candidate = Path("generators/datacubes/discovery_params.yaml")
|
|
163
|
+
if candidate.exists():
|
|
164
|
+
# Use shared resolver to get normalization and project root paths
|
|
165
|
+
raw_params = _load_and_resolve_config(candidate)
|
|
166
|
+
|
|
167
|
+
self.configure(
|
|
168
|
+
default_config=candidate.resolve(),
|
|
169
|
+
field_translations_file=(
|
|
170
|
+
candidate.parent.parent.parent
|
|
171
|
+
/ "dataobjects/globals/global_field_translations.yaml"
|
|
172
|
+
).resolve(),
|
|
173
|
+
valid_paths=[], # Would need params to populate
|
|
174
|
+
valid_fieldmap_paths=[],
|
|
175
|
+
params=raw_params,
|
|
176
|
+
)
|
|
177
|
+
console.print(f"[dim]Auto-configured context from {candidate}[/dim]")
|
|
178
|
+
|
|
179
|
+
|
|
63
180
|
context = CLIContext()
|
|
181
|
+
context.auto_configure()
|
|
182
|
+
|
|
64
183
|
|
|
65
184
|
def set_context_defaults(
|
|
66
185
|
default_config: Path,
|
|
67
186
|
field_translations_file: Path,
|
|
68
187
|
valid_paths: list[str],
|
|
69
188
|
valid_fieldmap_paths: list[str],
|
|
70
|
-
params: Optional[dict] = None
|
|
189
|
+
params: Optional[dict] = None,
|
|
71
190
|
):
|
|
72
191
|
"""Configures the CLI context with project-specific defaults."""
|
|
73
|
-
context.configure(
|
|
74
|
-
|
|
192
|
+
context.configure(
|
|
193
|
+
default_config,
|
|
194
|
+
field_translations_file,
|
|
195
|
+
valid_paths,
|
|
196
|
+
valid_fieldmap_paths,
|
|
197
|
+
params,
|
|
198
|
+
)
|
|
199
|
+
|
|
75
200
|
# Ensure directories exist based on configured params
|
|
76
201
|
if params:
|
|
77
202
|
ensure_directories_exist(params, logger=console.log)
|
|
78
203
|
|
|
79
|
-
|
|
204
|
+
|
|
205
|
+
def _get_db_url_callback(
|
|
206
|
+
registry: DatacubeRegistry, db_url_map: Optional[str]
|
|
207
|
+
) -> Callable[[str], str]:
|
|
80
208
|
"""Helper to create a callback that resolves DB URLs from CLI overrides or registry."""
|
|
81
209
|
cli_urls = json.loads(db_url_map) if db_url_map else {}
|
|
82
210
|
|
|
@@ -88,19 +216,29 @@ def _get_db_url_callback(registry: DatacubeRegistry, db_url_map: Optional[str])
|
|
|
88
216
|
url = resolve_db_url(conf_name, registry.global_imports)
|
|
89
217
|
if url:
|
|
90
218
|
return url
|
|
91
|
-
raise ValueError(
|
|
219
|
+
raise ValueError(
|
|
220
|
+
f"Could not resolve DB URL for '{conf_name}'. Provide via --db-urls or check imports."
|
|
221
|
+
)
|
|
92
222
|
|
|
93
223
|
return get_url
|
|
94
224
|
|
|
225
|
+
|
|
95
226
|
# --- Commands ---
|
|
96
227
|
|
|
228
|
+
|
|
97
229
|
@app.command()
|
|
98
230
|
def sync(
|
|
99
231
|
config_file: Optional[Path] = typer.Option(None, "--config"),
|
|
100
|
-
db_url_map: Optional[str] = typer.Option(
|
|
232
|
+
db_url_map: Optional[str] = typer.Option(
|
|
233
|
+
None,
|
|
234
|
+
"--db-urls",
|
|
235
|
+
help="Optional JSON mapping. If omitted, tries to resolve from code.",
|
|
236
|
+
),
|
|
101
237
|
force: bool = typer.Option(False, "--force", "-f"),
|
|
102
|
-
env_file: Optional[Path] = typer.Option(
|
|
103
|
-
|
|
238
|
+
env_file: Optional[Path] = typer.Option(
|
|
239
|
+
None, "--env-file", "-e", help="Path to environment file"
|
|
240
|
+
),
|
|
241
|
+
dry_run: bool = typer.Option(False, "--dry-run"),
|
|
104
242
|
) -> None:
|
|
105
243
|
"""Generates all Datacube classes based on the whitelists and field maps."""
|
|
106
244
|
config_path = config_file or context.default_config
|
|
@@ -119,64 +257,188 @@ def sync(
|
|
|
119
257
|
load_environment(env_path, logger=console.print)
|
|
120
258
|
|
|
121
259
|
# Start with empty/default registry
|
|
122
|
-
|
|
123
|
-
config_data = yaml.safe_load(f)
|
|
260
|
+
config_data = _load_and_resolve_config(config_path)
|
|
124
261
|
registry = DatacubeRegistry(config_data, params=context.params)
|
|
125
|
-
|
|
262
|
+
|
|
263
|
+
# --- Aggregation Phase ---
|
|
126
264
|
# --- Aggregation Phase ---
|
|
127
265
|
params = context.params
|
|
128
266
|
databases = params.get("databases", [])
|
|
129
|
-
|
|
267
|
+
|
|
268
|
+
# JIT DISCOVERY CHECK
|
|
269
|
+
# If using simplified whitelist workflow, registry might be empty.
|
|
270
|
+
# Auto-discover from whitelist in-memory.
|
|
271
|
+
if not registry.tables:
|
|
272
|
+
console.print(
|
|
273
|
+
"[dim]Registry empty. Attempting JIT Discovery from Whitelists...[/dim]"
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Prepare URL resolver for orchestrator usage if needed
|
|
277
|
+
import json
|
|
278
|
+
|
|
279
|
+
cli_urls = json.loads(db_url_map) if db_url_map else {}
|
|
280
|
+
|
|
281
|
+
for db in databases:
|
|
282
|
+
conn_obj = db.get("connection_ref") or db.get("connection_obj")
|
|
283
|
+
nm = db.get("id") or db.get("name")
|
|
284
|
+
|
|
285
|
+
# Resolve whitelist/rules paths (reusing logic from discover command or simplifying?)
|
|
286
|
+
# Orchestrator handles defaults if paths passed are relative/simple strings.
|
|
287
|
+
# We need to resolve full paths to be safe, or trust Orchestrator logic.
|
|
288
|
+
# Let's rely on params provided to orchestrator logic via context.params
|
|
289
|
+
|
|
290
|
+
try:
|
|
291
|
+
# Resolve DB URL
|
|
292
|
+
import_spec = db.get("import_spec")
|
|
293
|
+
if import_spec and isinstance(import_spec, dict):
|
|
294
|
+
imp = import_spec.get("module")
|
|
295
|
+
else:
|
|
296
|
+
imp = db.get("global_import")
|
|
297
|
+
db_imports = [imp] if imp else registry.global_imports
|
|
298
|
+
|
|
299
|
+
# Helper to resolve
|
|
300
|
+
if conn_obj in cli_urls:
|
|
301
|
+
db_conn_str = cli_urls[conn_obj]
|
|
302
|
+
else:
|
|
303
|
+
db_conn_str = resolve_db_url(conn_obj, db_imports)
|
|
304
|
+
|
|
305
|
+
if not db_conn_str:
|
|
306
|
+
console.print(
|
|
307
|
+
f"[yellow]Skipping JIT discovery for {nm}: No DB URL.[/yellow]"
|
|
308
|
+
)
|
|
309
|
+
continue
|
|
310
|
+
|
|
311
|
+
# Initialize Orchestrator
|
|
312
|
+
# We need to construct paths similar to 'discover' command logic
|
|
313
|
+
# Or let Orchestrator defaults handle it.
|
|
314
|
+
# Better to pass explicit defaults from params if available.
|
|
315
|
+
|
|
316
|
+
disc_paths = params.get("paths", {}).get("discovery", {}) or params.get(
|
|
317
|
+
"discovery", {}
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
whitelist_file = (
|
|
321
|
+
db.get("whitelist_file")
|
|
322
|
+
or disc_paths.get("whitelist_file")
|
|
323
|
+
or params.get("whitelist_file")
|
|
324
|
+
or f"discovery_whitelist_{conn_obj}.yaml"
|
|
325
|
+
)
|
|
326
|
+
rules_file = (
|
|
327
|
+
db.get("rules_file")
|
|
328
|
+
or disc_paths.get("rules_file")
|
|
329
|
+
or params.get("rules_file")
|
|
330
|
+
or f"discovery_rules_{conn_obj}.yaml"
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# Anchoring
|
|
334
|
+
try:
|
|
335
|
+
prj_root = config_path.parent.parent.parent
|
|
336
|
+
except Exception:
|
|
337
|
+
prj_root = Path.cwd()
|
|
338
|
+
|
|
339
|
+
# Resolve Whitelist
|
|
340
|
+
if Path(whitelist_file).is_absolute():
|
|
341
|
+
wl_path = whitelist_file
|
|
342
|
+
else:
|
|
343
|
+
wl_path = str(prj_root / whitelist_file)
|
|
344
|
+
|
|
345
|
+
# Resolve Rules
|
|
346
|
+
if Path(rules_file).is_absolute():
|
|
347
|
+
r_path = rules_file
|
|
348
|
+
else:
|
|
349
|
+
r_path = str(prj_root / rules_file)
|
|
350
|
+
|
|
351
|
+
# console.print(f"DEBUG: {nm} -> WL Path: {wl_path} (Exists: {Path(wl_path).exists()})")
|
|
352
|
+
|
|
353
|
+
orchestrator = DiscoveryOrchestrator(
|
|
354
|
+
params=context.params,
|
|
355
|
+
rules_path=r_path,
|
|
356
|
+
whitelist_path=wl_path,
|
|
357
|
+
registry_path=str(config_path), # Not saving, but needed for init?
|
|
358
|
+
db_connection_str=db_conn_str,
|
|
359
|
+
db_config=db,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
entries = orchestrator.discover()
|
|
363
|
+
registry.merge_discovered(entries)
|
|
364
|
+
|
|
365
|
+
except Exception as e:
|
|
366
|
+
console.print(f"[red]JIT Discovery failed for {nm}: {e}[/red]")
|
|
367
|
+
|
|
130
368
|
# 0. Generate Field Maps (if enabled)
|
|
131
369
|
# Check generation.enable_field_maps (defaults to True)
|
|
132
370
|
if params.get("generation", {}).get("enable_field_maps", True):
|
|
133
371
|
import json
|
|
372
|
+
|
|
134
373
|
cli_urls = json.loads(db_url_map) if db_url_map else {}
|
|
135
|
-
|
|
374
|
+
|
|
136
375
|
def get_url_safe(conf_name, db_imp):
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
376
|
+
if conf_name in cli_urls:
|
|
377
|
+
return cli_urls[conf_name]
|
|
378
|
+
imp = [db_imp] if db_imp else registry.global_imports
|
|
379
|
+
return resolve_db_url(conf_name, imp)
|
|
140
380
|
|
|
141
|
-
_run_field_map_generation(
|
|
381
|
+
_run_field_map_generation(
|
|
382
|
+
context, config_path, databases, get_url_safe, force=force
|
|
383
|
+
)
|
|
142
384
|
# Ensure new modules are picked up
|
|
143
385
|
importlib.invalidate_caches()
|
|
144
386
|
|
|
145
387
|
# Inject valid paths for security from context
|
|
146
|
-
|
|
147
|
-
|
|
148
388
|
|
|
149
389
|
# Inject valid paths for security from context
|
|
150
|
-
|
|
390
|
+
# Also inject the resolved datacubes_dir since params determines it.
|
|
391
|
+
|
|
392
|
+
# Resolving datacubes_dir locally just in case context.valid_paths misses it
|
|
393
|
+
# (Context might rely on static registry or defaults, but params can contain overrides)
|
|
394
|
+
dc_dir = params.get("paths", {}).get("target", {}).get("datacubes_dir")
|
|
395
|
+
|
|
396
|
+
valid_paths = set(context.valid_paths) # Use set for deduplication
|
|
397
|
+
if dc_dir:
|
|
398
|
+
# Resolve against project root if relative
|
|
399
|
+
if Path(dc_dir).is_absolute():
|
|
400
|
+
valid_paths.add(str(dc_dir))
|
|
401
|
+
else:
|
|
402
|
+
try:
|
|
403
|
+
# Heuristic re-resolution
|
|
404
|
+
prj_root = config_path.parent.parent.parent
|
|
405
|
+
valid_paths.add(str(prj_root / dc_dir))
|
|
406
|
+
except Exception:
|
|
407
|
+
valid_paths.add(str(Path.cwd() / dc_dir))
|
|
408
|
+
|
|
409
|
+
# Debug: Check if registry uses valid_paths correctly
|
|
410
|
+
registry.valid_paths = list(valid_paths)
|
|
151
411
|
registry.valid_fieldmap_paths = context.valid_fieldmap_paths
|
|
152
|
-
|
|
412
|
+
|
|
153
413
|
get_url = _get_db_url_callback(registry, db_url_map)
|
|
154
414
|
|
|
155
415
|
# Group tables by target file
|
|
156
416
|
file_groups = registry.group_tables_by_file()
|
|
157
417
|
|
|
158
418
|
summary_table = Table(title="Sync Results")
|
|
159
|
-
|
|
160
|
-
|
|
161
419
|
|
|
162
420
|
summary_table.add_column("File", style="magenta")
|
|
163
421
|
summary_table.add_column("Classes", style="cyan")
|
|
164
422
|
summary_table.add_column("Status")
|
|
165
423
|
|
|
424
|
+
generated_registry = {}
|
|
425
|
+
|
|
166
426
|
for file_path_str, items in file_groups.items():
|
|
167
427
|
if not is_secure_path(file_path_str, registry.valid_paths):
|
|
168
|
-
console.print(
|
|
428
|
+
console.print(
|
|
429
|
+
f"[bold red]Blocked:[/bold red] {file_path_str} is outside allowed paths."
|
|
430
|
+
)
|
|
169
431
|
continue
|
|
170
432
|
|
|
171
433
|
file_path = Path(file_path_str)
|
|
172
|
-
|
|
434
|
+
|
|
173
435
|
is_append = False
|
|
174
436
|
existing_content = ""
|
|
175
|
-
|
|
437
|
+
|
|
176
438
|
if file_path.exists() and not force:
|
|
177
|
-
with open(file_path,
|
|
439
|
+
with open(file_path, "r") as f:
|
|
178
440
|
existing_content = f.read()
|
|
179
|
-
|
|
441
|
+
|
|
180
442
|
missing_items = []
|
|
181
443
|
for item in items:
|
|
182
444
|
# item is (table_name, conf_obj, base_cls, base_imp, cls_name)
|
|
@@ -184,44 +446,59 @@ def sync(
|
|
|
184
446
|
cls_name = item[4]
|
|
185
447
|
if f"class {cls_name}" not in existing_content:
|
|
186
448
|
missing_items.append(item)
|
|
187
|
-
|
|
449
|
+
|
|
188
450
|
if not missing_items:
|
|
189
|
-
summary_table.add_row(
|
|
451
|
+
summary_table.add_row(
|
|
452
|
+
file_path_str,
|
|
453
|
+
str(len(items)),
|
|
454
|
+
"[yellow]Skipped (All Exist)[/yellow]",
|
|
455
|
+
)
|
|
190
456
|
continue
|
|
191
|
-
|
|
457
|
+
|
|
192
458
|
items = missing_items
|
|
193
459
|
is_append = True
|
|
194
|
-
|
|
460
|
+
|
|
195
461
|
if dry_run:
|
|
196
|
-
status =
|
|
462
|
+
status = (
|
|
463
|
+
"[blue]Dry Run (Append)[/blue]" if is_append else "[blue]Dry Run[/blue]"
|
|
464
|
+
)
|
|
197
465
|
summary_table.add_row(file_path_str, str(len(items)), status)
|
|
198
466
|
continue
|
|
199
467
|
|
|
200
468
|
# Prepare File Content
|
|
201
469
|
imports_list, classes_code = generate_datacube_module_code(
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
470
|
+
items=items,
|
|
471
|
+
registry=registry,
|
|
472
|
+
get_db_url_callback=get_url,
|
|
473
|
+
logger=console.print,
|
|
206
474
|
)
|
|
207
475
|
imports = set(imports_list)
|
|
208
476
|
|
|
209
477
|
# Collect used config objects for this file to filter imports
|
|
210
|
-
used_configs = set(item[1] for item in items if item[1])
|
|
211
|
-
filtered_global_imports = filter_global_imports(
|
|
212
|
-
|
|
478
|
+
used_configs = set(item[1] for item in items if item[1]) # item[1] is conf_obj
|
|
479
|
+
filtered_global_imports = filter_global_imports(
|
|
480
|
+
registry.global_imports, used_configs, ignored_prefixes=["solutions.conf"]
|
|
481
|
+
)
|
|
482
|
+
|
|
213
483
|
if not classes_code:
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
484
|
+
if not is_append:
|
|
485
|
+
summary_table.add_row(
|
|
486
|
+
file_path_str, "0", "[red]Failed (No Classes Generated)[/red]"
|
|
487
|
+
)
|
|
488
|
+
else:
|
|
489
|
+
summary_table.add_row(file_path_str, "0", "[red]Failed to Append[/red]")
|
|
490
|
+
continue
|
|
219
491
|
|
|
220
492
|
if not is_append:
|
|
221
493
|
# We are generating the field map with Mapping type hint, so we should allow it in the generator
|
|
222
494
|
# but this file writes the datacube class.
|
|
223
|
-
|
|
224
|
-
full_content =
|
|
495
|
+
|
|
496
|
+
full_content = (
|
|
497
|
+
sorted(list(imports))
|
|
498
|
+
+ filtered_global_imports
|
|
499
|
+
+ ["\n# --- Generated ---"]
|
|
500
|
+
+ classes_code
|
|
501
|
+
)
|
|
225
502
|
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
226
503
|
with open(file_path, "w") as f:
|
|
227
504
|
f.write("\n".join(full_content))
|
|
@@ -233,24 +510,106 @@ def sync(
|
|
|
233
510
|
status_msg = f"[green]Appended {len(classes_code)} Classes[/green]"
|
|
234
511
|
|
|
235
512
|
# Format using Ruff
|
|
236
|
-
subprocess.run(
|
|
513
|
+
subprocess.run(
|
|
514
|
+
["uv", "run", "ruff", "format", str(file_path)], capture_output=True
|
|
515
|
+
)
|
|
237
516
|
summary_table.add_row(file_path_str, str(len(items)), status_msg)
|
|
238
517
|
|
|
518
|
+
# --- Registry Collection ---
|
|
519
|
+
# Collect metadata for generated datacubes
|
|
520
|
+
# Structure: {conf_obj: {table_name: {class_name: ..., path: ...}}}
|
|
521
|
+
for item in items:
|
|
522
|
+
t_name = item[0]
|
|
523
|
+
conf_obj = item[1]
|
|
524
|
+
cls_n = item[4]
|
|
525
|
+
# Calculate path relative to project root
|
|
526
|
+
try:
|
|
527
|
+
if "project_root" not in locals():
|
|
528
|
+
project_root = config_path.parent.parent.parent
|
|
529
|
+
rel_path = file_path.relative_to(project_root)
|
|
530
|
+
except Exception:
|
|
531
|
+
rel_path = file_path
|
|
532
|
+
|
|
533
|
+
if conf_obj not in generated_registry:
|
|
534
|
+
generated_registry[conf_obj] = {}
|
|
535
|
+
|
|
536
|
+
generated_registry[conf_obj][t_name] = {
|
|
537
|
+
"class_name": cls_n,
|
|
538
|
+
"path": str(rel_path),
|
|
539
|
+
}
|
|
540
|
+
|
|
239
541
|
console.print(summary_table)
|
|
240
542
|
|
|
543
|
+
# --- Write Datacube Registry ---
|
|
544
|
+
reg_rel_path = params.get("paths", {}).get("repositories", {}).get(
|
|
545
|
+
"global_datacube_registry_file"
|
|
546
|
+
) or params.get("global_datacube_registry_file")
|
|
547
|
+
|
|
548
|
+
if reg_rel_path and generated_registry:
|
|
549
|
+
try:
|
|
550
|
+
if Path(reg_rel_path).is_absolute():
|
|
551
|
+
reg_file = Path(reg_rel_path)
|
|
552
|
+
else:
|
|
553
|
+
if "project_root" not in locals():
|
|
554
|
+
project_root = config_path.parent.parent.parent
|
|
555
|
+
reg_file = project_root / reg_rel_path
|
|
556
|
+
|
|
557
|
+
reg_file.parent.mkdir(parents=True, exist_ok=True)
|
|
558
|
+
|
|
559
|
+
# Group Logic Applied above.
|
|
560
|
+
# Sort keys for stability
|
|
561
|
+
reg_data = {
|
|
562
|
+
k: dict(sorted(v.items()))
|
|
563
|
+
for k, v in sorted(generated_registry.items())
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
with open(reg_file, "w") as f:
|
|
567
|
+
yaml.dump(reg_data, f, sort_keys=False)
|
|
568
|
+
|
|
569
|
+
console.print(
|
|
570
|
+
f"[green]Updated Datacube Registry at {reg_rel_path} ({len(generated_registry)} entries)[/green]"
|
|
571
|
+
)
|
|
572
|
+
except Exception as e:
|
|
573
|
+
console.print(f"[red]Failed to write Datacube Registry: {e}[/red]")
|
|
574
|
+
|
|
575
|
+
|
|
241
576
|
@app.command()
|
|
242
577
|
def discover(
|
|
243
578
|
config_file: Optional[Path] = typer.Option(None, "--config"),
|
|
244
|
-
db_conf: str = typer.Option(
|
|
579
|
+
db_conf: str = typer.Option(
|
|
580
|
+
"replica_db_conf", help="Config object to use for discovery introspection"
|
|
581
|
+
),
|
|
245
582
|
db_url_map: Optional[str] = typer.Option(None, "--db-urls"),
|
|
246
|
-
env_file: Optional[Path] = typer.Option(
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
583
|
+
env_file: Optional[Path] = typer.Option(
|
|
584
|
+
None, "--env-file", "-e", help="Path to environment file"
|
|
585
|
+
),
|
|
586
|
+
update: bool = typer.Option(
|
|
587
|
+
False, "--update", help="Update the registry file in place"
|
|
588
|
+
),
|
|
589
|
+
prune: bool = typer.Option(
|
|
590
|
+
False,
|
|
591
|
+
"--prune",
|
|
592
|
+
help="Remove tables from registry if they are not in the discovery result",
|
|
593
|
+
),
|
|
594
|
+
run_sync: bool = typer.Option(
|
|
595
|
+
False, "--sync", help="Run sync immediately after update"
|
|
596
|
+
),
|
|
597
|
+
dry_run: bool = typer.Option(
|
|
598
|
+
False, "--dry-run", help="Preview changes without saving (overrides --update)"
|
|
599
|
+
),
|
|
600
|
+
generate_fields: bool = typer.Option(
|
|
601
|
+
False,
|
|
602
|
+
"--generate-fields",
|
|
603
|
+
help="Generate field_map files for discovered tables",
|
|
604
|
+
),
|
|
605
|
+
force: bool = typer.Option(
|
|
606
|
+
False, "--force", "-f", help="Force overwrite of existing field maps"
|
|
607
|
+
),
|
|
608
|
+
fields_root: str = typer.Option(
|
|
609
|
+
"solutions.conf.transforms.fields",
|
|
610
|
+
"--fields-root",
|
|
611
|
+
help="Python path root for field maps",
|
|
612
|
+
),
|
|
254
613
|
) -> None:
|
|
255
614
|
config_path = config_file or context.default_config
|
|
256
615
|
if not config_path:
|
|
@@ -258,7 +617,7 @@ def discover(
|
|
|
258
617
|
raise typer.Exit(code=1)
|
|
259
618
|
|
|
260
619
|
gen_config_path = config_path.parent / "generator_config.yaml"
|
|
261
|
-
|
|
620
|
+
|
|
262
621
|
# Resolve env_file: CLI > Params > Default
|
|
263
622
|
if env_file:
|
|
264
623
|
env_path = env_file
|
|
@@ -266,24 +625,49 @@ def discover(
|
|
|
266
625
|
env_path = Path(context.params.get("defaults", {})["env_file"])
|
|
267
626
|
else:
|
|
268
627
|
env_path = Path(".env.linux")
|
|
269
|
-
|
|
628
|
+
|
|
270
629
|
load_environment(env_path, logger=console.print)
|
|
271
630
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
config_data =
|
|
631
|
+
# Load Registry Config (Bootstrap if missing)
|
|
632
|
+
if config_path.exists():
|
|
633
|
+
config_data = _load_and_resolve_config(config_path)
|
|
634
|
+
else:
|
|
635
|
+
# If registry file doesn't exist (e.g. first run), initialize with minimal settings
|
|
636
|
+
# We need "cubes_root_path" from params usually.
|
|
637
|
+
# But wait, config_path IS the registry file path.
|
|
638
|
+
console.print(
|
|
639
|
+
f"[yellow]Registry file {config_path} not found. Initializing empty registry.[/yellow]"
|
|
640
|
+
)
|
|
641
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
642
|
+
# Try to infer cubes_root_path from context params if available
|
|
643
|
+
cubes_root = "dataobjects/gencubes" # Default fallback
|
|
644
|
+
if context.params and "paths" in context.params:
|
|
645
|
+
cubes_root = (
|
|
646
|
+
context.params.get("paths", {})
|
|
647
|
+
.get("target", {})
|
|
648
|
+
.get("datacubes_dir", cubes_root)
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
config_data = {"settings": {"cubes_root_path": cubes_root}}
|
|
652
|
+
# We don't save it yet? Or should we?
|
|
653
|
+
# DatacubeRegistry will use this data. If we save later, it's fine.
|
|
654
|
+
|
|
275
655
|
registry = DatacubeRegistry(config_data)
|
|
276
656
|
|
|
277
657
|
import json
|
|
658
|
+
|
|
278
659
|
# Resolve DB URL
|
|
279
660
|
cli_urls = json.loads(db_url_map) if db_url_map else {}
|
|
661
|
+
|
|
280
662
|
def get_url(conf_name):
|
|
281
663
|
if conf_name in cli_urls:
|
|
282
664
|
return cli_urls[conf_name]
|
|
283
665
|
url = resolve_db_url(conf_name, registry.global_imports)
|
|
284
666
|
if url:
|
|
285
667
|
return url
|
|
286
|
-
raise ValueError(
|
|
668
|
+
raise ValueError(
|
|
669
|
+
f"Could not resolve DB URL for '{conf_name}'. Provide via --db-urls or check imports."
|
|
670
|
+
)
|
|
287
671
|
|
|
288
672
|
# --- Initialize Global Field Registry (DEPRECATED) ---
|
|
289
673
|
field_registry = None
|
|
@@ -297,34 +681,46 @@ def discover(
|
|
|
297
681
|
|
|
298
682
|
params = context.params
|
|
299
683
|
databases = params.get("databases", [])
|
|
300
|
-
|
|
684
|
+
|
|
301
685
|
# Fallback to single DB mode if no databases defined (Backwards Compatibility)
|
|
302
686
|
if not databases:
|
|
303
|
-
databases = [
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
687
|
+
databases = [
|
|
688
|
+
{
|
|
689
|
+
"name": db_conf,
|
|
690
|
+
"connection_obj": db_conf,
|
|
691
|
+
# Use standard name if not defined
|
|
692
|
+
"whitelist_file": "discovery_whitelist.yaml",
|
|
693
|
+
"rules_file": "discovery_rules.yaml",
|
|
694
|
+
}
|
|
695
|
+
]
|
|
310
696
|
|
|
311
697
|
# Filter if user requested specific DB via CLI (using db_conf arg as filter name)
|
|
312
|
-
# The `db_conf` argument defaults to "replica_db_conf".
|
|
698
|
+
# The `db_conf` argument defaults to "replica_db_conf".
|
|
313
699
|
target_db_name = None
|
|
314
|
-
|
|
700
|
+
|
|
315
701
|
aggregated_entries = {}
|
|
316
702
|
last_orchestrator = None
|
|
317
703
|
|
|
704
|
+
# Load existing all_tables data if accumulating
|
|
705
|
+
global_tables_file = params.get("all_tables_file") or "all_tables.yaml"
|
|
706
|
+
global_tables_path = config_path.parent / global_tables_file
|
|
707
|
+
all_tables_data = {}
|
|
708
|
+
if global_tables_path.exists():
|
|
709
|
+
with open(global_tables_path, "r") as f:
|
|
710
|
+
all_tables_data = yaml.safe_load(f) or {}
|
|
711
|
+
|
|
318
712
|
for db_config in databases:
|
|
319
713
|
db_name = db_config.get("id") or db_config.get("name", "unknown")
|
|
320
714
|
conn_obj = db_config.get("connection_ref") or db_config.get("connection_obj")
|
|
321
|
-
|
|
715
|
+
|
|
322
716
|
# Determine whitelist path
|
|
323
717
|
wl_filename = db_config.get("whitelist_file")
|
|
324
718
|
if not wl_filename:
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
719
|
+
# Try global param fallback
|
|
720
|
+
wl_filename = params.get("discovery", {}).get(
|
|
721
|
+
"whitelist_file"
|
|
722
|
+
) or params.get("whitelist_file")
|
|
723
|
+
|
|
328
724
|
if not wl_filename:
|
|
329
725
|
# Default convention: discovery_whitelist_<db_name>.yaml
|
|
330
726
|
wl_filename = f"discovery_whitelist_{conn_obj}.yaml"
|
|
@@ -333,14 +729,18 @@ def discover(
|
|
|
333
729
|
# Determine rules path
|
|
334
730
|
rules_filename = db_config.get("rules_file")
|
|
335
731
|
if not rules_filename:
|
|
336
|
-
rules_filename = params.get("discovery", {}).get(
|
|
337
|
-
|
|
732
|
+
rules_filename = params.get("discovery", {}).get(
|
|
733
|
+
"rules_file"
|
|
734
|
+
) or params.get("rules_file")
|
|
735
|
+
|
|
338
736
|
if not rules_filename:
|
|
339
|
-
|
|
737
|
+
rules_filename = f"discovery_rules_{conn_obj}.yaml"
|
|
340
738
|
rules_path = config_path.parent / rules_filename
|
|
341
739
|
|
|
342
740
|
# Determine blacklist path
|
|
343
|
-
bl_filename = db_config.get(
|
|
741
|
+
bl_filename = db_config.get(
|
|
742
|
+
"blacklist_file", f"discovery_blacklist_{conn_obj}.yaml"
|
|
743
|
+
)
|
|
344
744
|
blacklist_path = config_path.parent / bl_filename
|
|
345
745
|
|
|
346
746
|
console.print(f"[bold cyan]Discovering: {db_name} ({conn_obj})[/]")
|
|
@@ -351,16 +751,18 @@ def discover(
|
|
|
351
751
|
# Support proper import_spec from new config or legacy global_import
|
|
352
752
|
import_spec = db_config.get("import_spec")
|
|
353
753
|
if import_spec and isinstance(import_spec, dict):
|
|
354
|
-
|
|
754
|
+
imp = import_spec.get("module")
|
|
355
755
|
else:
|
|
356
|
-
|
|
756
|
+
imp = db_config.get("global_import")
|
|
357
757
|
db_imports = [imp] if imp else registry.global_imports
|
|
358
758
|
if not db_imports and registry.global_imports:
|
|
359
|
-
|
|
360
|
-
|
|
759
|
+
db_imports = registry.global_imports
|
|
760
|
+
|
|
361
761
|
db_conn_str = resolve_db_url(conn_obj, db_imports)
|
|
362
762
|
except Exception:
|
|
363
|
-
console.print(
|
|
763
|
+
console.print(
|
|
764
|
+
f"[red]Could not resolve connection {conn_obj}. Skipping.[/red]"
|
|
765
|
+
)
|
|
364
766
|
continue
|
|
365
767
|
|
|
366
768
|
orchestrator = DiscoveryOrchestrator(
|
|
@@ -370,43 +772,62 @@ def discover(
|
|
|
370
772
|
whitelist_path=str(whitelist_path),
|
|
371
773
|
registry_path=str(config_path),
|
|
372
774
|
db_connection_str=db_conn_str,
|
|
373
|
-
db_config=db_config
|
|
775
|
+
db_config=db_config,
|
|
374
776
|
)
|
|
375
|
-
|
|
777
|
+
|
|
376
778
|
try:
|
|
377
779
|
entries = orchestrator.discover()
|
|
378
780
|
aggregated_entries.update(entries)
|
|
379
781
|
last_orchestrator = orchestrator
|
|
782
|
+
|
|
783
|
+
# --- Capture Raw Tables for all_tables.yaml ---
|
|
784
|
+
if hasattr(orchestrator, "raw_tables") and orchestrator.raw_tables:
|
|
785
|
+
# Sort for consistency
|
|
786
|
+
all_tables_data[conn_obj] = sorted(list(orchestrator.raw_tables))
|
|
787
|
+
console.print(
|
|
788
|
+
f"[green]Captured {len(orchestrator.raw_tables)} raw tables for {conn_obj}[/green]"
|
|
789
|
+
)
|
|
380
790
|
except Exception as e:
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
791
|
+
console.print(f"[red]Discovery failed for {db_name}: {e}[/red]")
|
|
792
|
+
if not dry_run:
|
|
793
|
+
raise # Fail hard if not dry run? Or continue? Let's buffer errors?
|
|
794
|
+
# For now, log and continue might result in partial registry which is bad (prune would wipe missing).
|
|
795
|
+
# Fail safe:
|
|
796
|
+
return
|
|
387
797
|
|
|
388
798
|
# Aggregate global imports from ALL databases to ensure registry has them
|
|
389
799
|
aggregated_global_imports = set(params.get("global_imports", []))
|
|
390
800
|
for db in databases:
|
|
391
801
|
if "global_import" in db:
|
|
392
802
|
aggregated_global_imports.add(db["global_import"])
|
|
393
|
-
|
|
803
|
+
|
|
394
804
|
# Save Aggregated Registry
|
|
395
805
|
if last_orchestrator:
|
|
396
806
|
console.print("")
|
|
397
807
|
# Inject aggregated imports into the last orchestrator's update logic?
|
|
398
808
|
# The orchestrator's save_registry loads existing, updates tables, and saves.
|
|
399
809
|
# It DOES NOT currently update global_imports. We need to add that cap.
|
|
400
|
-
|
|
810
|
+
|
|
401
811
|
# Helper manual update for now, or update Orchestrator to support it?
|
|
402
|
-
# Let's update Orchestrator.save_registry to accept global_imports update.
|
|
403
812
|
last_orchestrator.save_registry(
|
|
404
|
-
aggregated_entries,
|
|
405
|
-
dry_run=dry_run,
|
|
406
|
-
prune=prune,
|
|
407
|
-
global_imports=list(aggregated_global_imports)
|
|
813
|
+
aggregated_entries,
|
|
814
|
+
dry_run=dry_run,
|
|
815
|
+
prune=prune,
|
|
816
|
+
global_imports=list(aggregated_global_imports),
|
|
408
817
|
)
|
|
409
|
-
|
|
818
|
+
|
|
819
|
+
# Save all_tables.yaml
|
|
820
|
+
if not dry_run and all_tables_data:
|
|
821
|
+
with open(global_tables_path, "w") as f:
|
|
822
|
+
yaml.dump(all_tables_data, f, sort_keys=False)
|
|
823
|
+
console.print(
|
|
824
|
+
f"[bold green]Updated {global_tables_file} with raw tables from providers.[/bold green]"
|
|
825
|
+
)
|
|
826
|
+
elif dry_run:
|
|
827
|
+
console.print(
|
|
828
|
+
f"[yellow]DRY RUN: Would update {global_tables_file} with {len(all_tables_data)} providers.[/yellow]"
|
|
829
|
+
)
|
|
830
|
+
|
|
410
831
|
# Save Registry changes (collected during discovery)
|
|
411
832
|
if not dry_run and field_registry:
|
|
412
833
|
field_registry.save()
|
|
@@ -415,29 +836,29 @@ def discover(
|
|
|
415
836
|
if generate_fields and not dry_run:
|
|
416
837
|
console.print("")
|
|
417
838
|
console.rule("[bold blue]Generating Field Maps[/]")
|
|
418
|
-
|
|
839
|
+
|
|
419
840
|
# Reload registry to ensure we have latest discovered tables
|
|
420
|
-
with open(config_path,
|
|
841
|
+
with open(config_path, "r") as f:
|
|
421
842
|
updated_config_data = yaml.safe_load(f)
|
|
422
843
|
updated_registry = DatacubeRegistry(updated_config_data, params=context.params)
|
|
423
844
|
|
|
424
845
|
# Convert python path to physical path
|
|
425
|
-
phys_root = Path(fields_root.replace(
|
|
426
|
-
|
|
846
|
+
phys_root = Path(fields_root.replace(".", "/"))
|
|
847
|
+
|
|
427
848
|
# Group tables by connection to use correct inspector
|
|
428
849
|
tables_by_conn = {}
|
|
429
850
|
for t_name, t_data in updated_registry.tables.items():
|
|
430
|
-
conn = t_data.get(
|
|
851
|
+
conn = t_data.get("connection_obj", updated_registry.default_connection_obj)
|
|
431
852
|
if conn not in tables_by_conn:
|
|
432
853
|
tables_by_conn[conn] = {}
|
|
433
854
|
tables_by_conn[conn][t_name] = t_data
|
|
434
|
-
|
|
855
|
+
|
|
435
856
|
for conn_obj, table_group in tables_by_conn.items():
|
|
436
857
|
try:
|
|
437
858
|
db_url = get_url(conn_obj)
|
|
438
859
|
engine = sa.create_engine(db_url)
|
|
439
860
|
inspector = inspect(engine)
|
|
440
|
-
|
|
861
|
+
|
|
441
862
|
# Use the promoted Generator
|
|
442
863
|
generate_field_map_files(
|
|
443
864
|
discovered_entries=table_group,
|
|
@@ -445,35 +866,46 @@ def discover(
|
|
|
445
866
|
root_path=phys_root,
|
|
446
867
|
force=force,
|
|
447
868
|
logger=console.print,
|
|
448
|
-
allowed_paths=
|
|
869
|
+
allowed_paths=(
|
|
870
|
+
updated_registry.valid_fieldmap_paths
|
|
871
|
+
if hasattr(updated_registry, "valid_fieldmap_paths")
|
|
872
|
+
else None
|
|
873
|
+
),
|
|
449
874
|
)
|
|
450
875
|
except Exception as e:
|
|
451
|
-
console.print(
|
|
452
|
-
|
|
876
|
+
console.print(
|
|
877
|
+
f"[red]Error generating fields for connection {conn_obj}: {e}[/red]"
|
|
878
|
+
)
|
|
879
|
+
|
|
453
880
|
# Chained Sync
|
|
454
881
|
if run_sync:
|
|
455
882
|
if dry_run:
|
|
456
883
|
console.print("[yellow]Skipping sync in dry-run mode.[/yellow]")
|
|
457
884
|
elif not update:
|
|
458
|
-
|
|
885
|
+
console.print(
|
|
886
|
+
"[yellow]Sync skipped: Registry not updated (use --update to enable chaining).[/yellow]"
|
|
887
|
+
)
|
|
459
888
|
else:
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
889
|
+
console.print("")
|
|
890
|
+
console.rule("[bold blue]Auto-Syncing Datacubes[/]")
|
|
891
|
+
# Call sync command directly with current context options
|
|
892
|
+
sync(
|
|
893
|
+
config_file=config_path,
|
|
894
|
+
db_url_map=db_url_map,
|
|
895
|
+
force=True,
|
|
896
|
+
env_file=env_file,
|
|
897
|
+
dry_run=False,
|
|
898
|
+
)
|
|
899
|
+
|
|
470
900
|
|
|
471
901
|
@app.command()
|
|
472
902
|
def scan(
|
|
473
903
|
config_file: Optional[Path] = typer.Option(None, "--config"),
|
|
474
904
|
db_url_map: Optional[str] = typer.Option(None, "--db-urls"),
|
|
475
905
|
env_file: Optional[Path] = typer.Option(None, "--env-file", "-e"),
|
|
476
|
-
db_name: Optional[str] = typer.Option(
|
|
906
|
+
db_name: Optional[str] = typer.Option(
|
|
907
|
+
None, "--db", help="Target specific database from params"
|
|
908
|
+
),
|
|
477
909
|
) -> None:
|
|
478
910
|
"""
|
|
479
911
|
Introspects configured databases and dumps table lists to YAML.
|
|
@@ -487,41 +919,41 @@ def scan(
|
|
|
487
919
|
# Resolve env_file: CLI > Params > Default
|
|
488
920
|
if env_file:
|
|
489
921
|
env_path = env_file
|
|
490
|
-
elif context.params and "
|
|
491
|
-
env_path = Path(context.params["env_file"])
|
|
922
|
+
elif context.params and context.params.get("defaults", {}).get("env_file"):
|
|
923
|
+
env_path = Path(context.params.get("defaults", {})["env_file"])
|
|
492
924
|
else:
|
|
493
925
|
env_path = Path(".env.linux")
|
|
494
926
|
load_environment(env_path, logger=console.print)
|
|
495
927
|
|
|
496
|
-
|
|
497
|
-
config_data = yaml.safe_load(f)
|
|
928
|
+
config_data = _load_and_resolve_config(config_path)
|
|
498
929
|
registry = DatacubeRegistry(config_data, params=context.params)
|
|
499
930
|
|
|
500
931
|
import json
|
|
932
|
+
|
|
501
933
|
cli_urls = json.loads(db_url_map) if db_url_map else {}
|
|
502
|
-
|
|
934
|
+
|
|
503
935
|
# Helper Resolution
|
|
504
936
|
def get_url_safe(conf_name, db_imp):
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
937
|
+
if conf_name in cli_urls:
|
|
938
|
+
return cli_urls[conf_name]
|
|
939
|
+
imp = [db_imp] if db_imp else registry.global_imports
|
|
940
|
+
return resolve_db_url(conf_name, imp)
|
|
508
941
|
|
|
509
942
|
params = context.params
|
|
510
943
|
databases = params.get("databases", [])
|
|
511
|
-
|
|
944
|
+
|
|
512
945
|
# Filter targets
|
|
513
946
|
target_dbs = databases
|
|
514
947
|
if db_name:
|
|
515
948
|
target_dbs = [d for d in databases if d.get("name") == db_name]
|
|
516
949
|
if not target_dbs:
|
|
517
|
-
|
|
518
|
-
|
|
950
|
+
console.print(f"[red]Database '{db_name}' not found.[/red]")
|
|
951
|
+
raise typer.Exit(code=1)
|
|
519
952
|
|
|
520
|
-
|
|
521
953
|
# Resolve global output file
|
|
522
954
|
global_tables_file = params.get("all_tables_file") or "all_tables.yaml"
|
|
523
955
|
global_tables_path = config_path.parent / global_tables_file
|
|
524
|
-
|
|
956
|
+
|
|
525
957
|
# Load existing data to preserve config for DBs not being scanned
|
|
526
958
|
all_tables_data = {}
|
|
527
959
|
if global_tables_path.exists():
|
|
@@ -531,37 +963,47 @@ def scan(
|
|
|
531
963
|
for db in target_dbs:
|
|
532
964
|
name = db.get("name")
|
|
533
965
|
conn_obj = db.get("connection_obj")
|
|
534
|
-
|
|
966
|
+
|
|
535
967
|
console.print(f"[bold cyan]Scanning: {name} ...[/bold cyan]")
|
|
536
968
|
try:
|
|
537
969
|
db_url = get_url_safe(conn_obj, db.get("global_import"))
|
|
538
970
|
if not db_url:
|
|
539
971
|
console.print(f"[red]Could not resolve URL for {conn_obj}[/red]")
|
|
540
972
|
continue
|
|
541
|
-
|
|
973
|
+
|
|
542
974
|
engine = sa.create_engine(db_url)
|
|
543
975
|
inspector = inspect(engine)
|
|
544
976
|
tables = sorted(inspector.get_table_names())
|
|
545
|
-
|
|
977
|
+
|
|
546
978
|
# Update shared dictionary
|
|
547
979
|
all_tables_data[conn_obj] = tables
|
|
548
980
|
console.print(f"[green]Found {len(tables)} tables for {conn_obj}[/green]")
|
|
549
|
-
|
|
981
|
+
|
|
550
982
|
except Exception as e:
|
|
551
983
|
console.print(f"[red]Scan failed for {name}: {e}[/red]")
|
|
552
|
-
if params.get("debug"):
|
|
984
|
+
if params.get("debug"):
|
|
985
|
+
raise e
|
|
553
986
|
|
|
554
987
|
# Persist aggregated result
|
|
555
988
|
with open(global_tables_path, "w") as f:
|
|
556
989
|
yaml.dump(all_tables_data, f, sort_keys=False)
|
|
557
|
-
|
|
558
|
-
console.print(
|
|
990
|
+
|
|
991
|
+
console.print(
|
|
992
|
+
f"[bold green]Updated table list at {global_tables_path}[/bold green]"
|
|
993
|
+
)
|
|
994
|
+
|
|
559
995
|
|
|
560
996
|
@app.command()
|
|
561
997
|
def drift(
|
|
562
998
|
config_file: Optional[Path] = typer.Option(None, "--config"),
|
|
563
|
-
db_url_map: Optional[str] = typer.Option(
|
|
564
|
-
|
|
999
|
+
db_url_map: Optional[str] = typer.Option(
|
|
1000
|
+
None,
|
|
1001
|
+
"--db-urls",
|
|
1002
|
+
help="Optional JSON mapping. If omitted, tries to resolve from code.",
|
|
1003
|
+
),
|
|
1004
|
+
env_file: Optional[Path] = typer.Option(
|
|
1005
|
+
None, "--env-file", "-e", help="Path to environment file"
|
|
1006
|
+
),
|
|
565
1007
|
) -> None:
|
|
566
1008
|
"""
|
|
567
1009
|
Checks for 'drift' between the generated Python classes and the DB schema.
|
|
@@ -574,21 +1016,19 @@ def drift(
|
|
|
574
1016
|
# Resolve env_file: CLI > Params > Default
|
|
575
1017
|
if env_file:
|
|
576
1018
|
env_path = env_file
|
|
577
|
-
elif context.params and "
|
|
578
|
-
env_path = Path(context.params["env_file"])
|
|
1019
|
+
elif context.params and context.params.get("defaults", {}).get("env_file"):
|
|
1020
|
+
env_path = Path(context.params.get("defaults", {})["env_file"])
|
|
579
1021
|
else:
|
|
580
1022
|
env_path = Path(".env.linux")
|
|
581
1023
|
|
|
582
1024
|
load_environment(env_path, logger=console.print)
|
|
583
1025
|
|
|
1026
|
+
config_data = _load_and_resolve_config(config_path)
|
|
584
1027
|
|
|
585
|
-
with open(config_path, 'r') as f:
|
|
586
|
-
config_data = yaml.safe_load(f)
|
|
587
|
-
|
|
588
1028
|
registry = DatacubeRegistry(config_data)
|
|
589
1029
|
get_url = _get_db_url_callback(registry, db_url_map)
|
|
590
1030
|
cli_urls = json.loads(db_url_map) if db_url_map else {}
|
|
591
|
-
|
|
1031
|
+
|
|
592
1032
|
drift_table = Table(title="Schema Drift Analysis")
|
|
593
1033
|
drift_table.add_column("Class", style="cyan")
|
|
594
1034
|
drift_table.add_column("Status", style="bold")
|
|
@@ -598,22 +1038,32 @@ def drift(
|
|
|
598
1038
|
attribute_names = list(registry.processed_mappings.keys())
|
|
599
1039
|
|
|
600
1040
|
for table_name, details in registry.tables.items():
|
|
601
|
-
target = details.get(
|
|
1041
|
+
target = details.get("save_to_path", details.get("path"))
|
|
602
1042
|
if not target:
|
|
603
|
-
|
|
604
|
-
|
|
1043
|
+
drift_table.add_row(
|
|
1044
|
+
table_name, "[red]Config Error[/red]", "Missing save_to_path"
|
|
1045
|
+
)
|
|
1046
|
+
continue
|
|
605
1047
|
path = Path(target)
|
|
606
1048
|
if not path.exists():
|
|
607
|
-
console.print(
|
|
1049
|
+
console.print(
|
|
1050
|
+
f"[yellow]Skipping {table_name}: File {path} not found.[/yellow]"
|
|
1051
|
+
)
|
|
608
1052
|
continue
|
|
609
1053
|
|
|
610
1054
|
# 1. Determine Class Name
|
|
611
|
-
provided_class_name = details.get(
|
|
612
|
-
class_name =
|
|
1055
|
+
provided_class_name = details.get("class_name")
|
|
1056
|
+
class_name = (
|
|
1057
|
+
provided_class_name
|
|
1058
|
+
if provided_class_name
|
|
1059
|
+
else "".join(w.capitalize() for w in table_name.split("_")) + "Dc"
|
|
1060
|
+
)
|
|
613
1061
|
|
|
614
1062
|
# 2. Dynamically load the generated class from the file
|
|
615
1063
|
try:
|
|
616
|
-
spec = importlib.util.spec_from_file_location(
|
|
1064
|
+
spec = importlib.util.spec_from_file_location(
|
|
1065
|
+
f"dynamic_mod_{table_name}", path
|
|
1066
|
+
)
|
|
617
1067
|
mod = importlib.util.module_from_spec(spec)
|
|
618
1068
|
spec.loader.exec_module(mod)
|
|
619
1069
|
dc_class = getattr(mod, class_name)
|
|
@@ -624,35 +1074,41 @@ def drift(
|
|
|
624
1074
|
# 3. Determine DB URL
|
|
625
1075
|
# Priority: CLI Override > Class Attribute > Registry Config
|
|
626
1076
|
db_url = None
|
|
627
|
-
conf_obj = details.get(
|
|
1077
|
+
conf_obj = details.get(
|
|
1078
|
+
"connection_obj", details.get("config_obj", registry.default_connection_obj)
|
|
1079
|
+
)
|
|
628
1080
|
|
|
629
1081
|
if conf_obj in cli_urls:
|
|
630
1082
|
db_url = cli_urls[conf_obj]
|
|
631
|
-
elif hasattr(dc_class,
|
|
632
|
-
db_url = getattr(dc_class,
|
|
633
|
-
elif hasattr(dc_class,
|
|
634
|
-
db_url = dc_class.config.get(
|
|
635
|
-
|
|
1083
|
+
elif hasattr(dc_class, "connection_url"):
|
|
1084
|
+
db_url = getattr(dc_class, "connection_url")
|
|
1085
|
+
elif hasattr(dc_class, "config") and isinstance(dc_class.config, dict):
|
|
1086
|
+
db_url = dc_class.config.get("connection_url")
|
|
1087
|
+
|
|
636
1088
|
# Fallback to registry resolution
|
|
637
1089
|
if not db_url:
|
|
638
1090
|
try:
|
|
639
1091
|
db_url = get_url(conf_obj)
|
|
640
1092
|
except Exception:
|
|
641
|
-
pass
|
|
1093
|
+
pass
|
|
642
1094
|
|
|
643
1095
|
# 4. Introspect DB
|
|
644
1096
|
try:
|
|
645
1097
|
engine = sa.create_engine(db_url)
|
|
646
1098
|
inspector = inspect(engine)
|
|
647
|
-
db_cols = {c[
|
|
1099
|
+
db_cols = {c["name"] for c in inspector.get_columns(table_name)}
|
|
648
1100
|
except Exception as e:
|
|
649
|
-
|
|
650
|
-
|
|
1101
|
+
drift_table.add_row(class_name, "[red]DB Error[/red]", repr(e))
|
|
1102
|
+
continue
|
|
651
1103
|
|
|
652
1104
|
# 5. Extract Field Map (if any)
|
|
653
|
-
field_map = getattr(dc_class,
|
|
654
|
-
if
|
|
655
|
-
|
|
1105
|
+
field_map = getattr(dc_class, "field_map", None)
|
|
1106
|
+
if (
|
|
1107
|
+
not field_map
|
|
1108
|
+
and hasattr(dc_class, "config")
|
|
1109
|
+
and isinstance(dc_class.config, dict)
|
|
1110
|
+
):
|
|
1111
|
+
field_map = dc_class.config.get("field_map")
|
|
656
1112
|
|
|
657
1113
|
# 6. Check Drift
|
|
658
1114
|
issues = check_drift(dc_class, db_cols, attribute_names, field_map=field_map)
|
|
@@ -665,12 +1121,80 @@ def drift(
|
|
|
665
1121
|
console.print(drift_table)
|
|
666
1122
|
|
|
667
1123
|
|
|
1124
|
+
@app.command()
|
|
1125
|
+
def propose_rules(
|
|
1126
|
+
config_file: Optional[Path] = typer.Option(None, "--config"),
|
|
1127
|
+
dry_run: bool = typer.Option(
|
|
1128
|
+
False, "--dry-run", help="Preview rules without saving"
|
|
1129
|
+
),
|
|
1130
|
+
):
|
|
1131
|
+
"""
|
|
1132
|
+
Analyzes all_tables.yaml and proposes new discovery rules.
|
|
1133
|
+
"""
|
|
1134
|
+
config_path = config_file or context.default_config
|
|
1135
|
+
if not config_path:
|
|
1136
|
+
console.print("[red]No config file specified and no default configured.[/red]")
|
|
1137
|
+
raise typer.Exit(code=1)
|
|
1138
|
+
|
|
1139
|
+
# Resolve paths via helper to ensure Project Root logic is applied
|
|
1140
|
+
resolved_config = _load_and_resolve_config(config_path)
|
|
1141
|
+
|
|
1142
|
+
# We rely on all_tables.yaml being generated by scan/discover
|
|
1143
|
+
# The resolved config will have absolute paths for these if the helper worked.
|
|
1144
|
+
params = context.params
|
|
1145
|
+
|
|
1146
|
+
# Prefer resolved values if available
|
|
1147
|
+
if (
|
|
1148
|
+
"discovery" in resolved_config
|
|
1149
|
+
and "all_tables_file" in resolved_config["discovery"]
|
|
1150
|
+
):
|
|
1151
|
+
all_tables_path = Path(resolved_config["discovery"]["all_tables_file"])
|
|
1152
|
+
else:
|
|
1153
|
+
# Fallback to manual resolution (legacy or if not in discovery block)
|
|
1154
|
+
raw_val = (
|
|
1155
|
+
params.get("discovery", {}).get("all_tables_file")
|
|
1156
|
+
or params.get("all_tables_file")
|
|
1157
|
+
or "all_tables.yaml"
|
|
1158
|
+
)
|
|
1159
|
+
all_tables_path = config_path.parent / raw_val
|
|
1160
|
+
|
|
1161
|
+
# Rules File
|
|
1162
|
+
if "discovery" in resolved_config and "rules_file" in resolved_config["discovery"]:
|
|
1163
|
+
rules_path = Path(resolved_config["discovery"]["rules_file"])
|
|
1164
|
+
else:
|
|
1165
|
+
raw_rules = (
|
|
1166
|
+
params.get("discovery", {}).get("rules_file")
|
|
1167
|
+
or params.get("rules_file")
|
|
1168
|
+
or "discovery_rules.yaml"
|
|
1169
|
+
)
|
|
1170
|
+
rules_path = config_path.parent / raw_rules
|
|
1171
|
+
|
|
1172
|
+
if not all_tables_path.exists():
|
|
1173
|
+
console.print(
|
|
1174
|
+
f"[red]Error: {all_tables_path} not found. Run 'dc-scan' first.[/red]"
|
|
1175
|
+
)
|
|
1176
|
+
raise typer.Exit(code=1)
|
|
1177
|
+
|
|
1178
|
+
engine = RuleEngine(all_tables_path, rules_path)
|
|
1179
|
+
engine.load()
|
|
1180
|
+
updates = engine.propose_rules()
|
|
1181
|
+
|
|
1182
|
+
if dry_run:
|
|
1183
|
+
console.print("[bold yellow]Proposed Updates:[/]")
|
|
1184
|
+
for conn, rules in updates.items():
|
|
1185
|
+
console.print(f"[cyan]{conn}:[/]")
|
|
1186
|
+
for r in rules:
|
|
1187
|
+
console.print(f" - {r}")
|
|
1188
|
+
else:
|
|
1189
|
+
engine.save_proposal(updates)
|
|
668
1190
|
|
|
669
1191
|
|
|
670
1192
|
@app.command()
|
|
671
1193
|
def match(
|
|
672
1194
|
config_file: Optional[Path] = typer.Option(None, "--config"),
|
|
673
|
-
db_name: Optional[str] = typer.Option(
|
|
1195
|
+
db_name: Optional[str] = typer.Option(
|
|
1196
|
+
None, "--db", help="Target specific database from params"
|
|
1197
|
+
),
|
|
674
1198
|
) -> None:
|
|
675
1199
|
"""
|
|
676
1200
|
Applies discovery rules to scanned tables and generates whitelists (registry).
|
|
@@ -682,23 +1206,23 @@ def match(
|
|
|
682
1206
|
raise typer.Exit(code=1)
|
|
683
1207
|
|
|
684
1208
|
import yaml
|
|
685
|
-
|
|
1209
|
+
|
|
686
1210
|
# Load Params
|
|
687
|
-
params_path =
|
|
1211
|
+
params_path = (
|
|
1212
|
+
config_path.parent / "discovery_params.yaml"
|
|
1213
|
+
) # Assuming relative location or loaded via context
|
|
688
1214
|
# Context should already have params if set_context_defaults ran, but to be safe/standalone:
|
|
689
1215
|
params = context.params
|
|
690
1216
|
databases = params.get("databases", [])
|
|
691
1217
|
folder_prefix = params.get("folder_prefix", "solutions/dataobjects/gencubes/")
|
|
692
1218
|
fields_suffix = params.get("fields_module_root", "fields")
|
|
693
1219
|
|
|
694
|
-
|
|
695
|
-
|
|
696
1220
|
target_dbs = databases
|
|
697
1221
|
if db_name:
|
|
698
1222
|
target_dbs = [d for d in databases if d.get("name") == db_name]
|
|
699
1223
|
if not target_dbs:
|
|
700
|
-
|
|
701
|
-
|
|
1224
|
+
console.print(f"[red]Database '{db_name}' not found.[/red]")
|
|
1225
|
+
raise typer.Exit(code=1)
|
|
702
1226
|
|
|
703
1227
|
for db in target_dbs:
|
|
704
1228
|
name = db.get("name")
|
|
@@ -706,36 +1230,40 @@ def match(
|
|
|
706
1230
|
rules_file = db.get("rules_file")
|
|
707
1231
|
whitelist_file = db.get("whitelist_file")
|
|
708
1232
|
conn_obj = db.get("connection_obj")
|
|
709
|
-
|
|
1233
|
+
|
|
710
1234
|
# Path Composition
|
|
711
1235
|
db_domain = db.get("db_domain")
|
|
712
1236
|
import_base = Path(folder_prefix)
|
|
713
1237
|
if db_domain:
|
|
714
1238
|
import_base = import_base / db_domain
|
|
715
1239
|
import_base = import_base / fields_suffix
|
|
716
|
-
|
|
1240
|
+
|
|
717
1241
|
try:
|
|
718
1242
|
import_base = import_base.relative_to(Path.cwd())
|
|
719
1243
|
except ValueError:
|
|
720
1244
|
pass
|
|
721
1245
|
fields_module_base = str(import_base).replace("/", ".")
|
|
722
|
-
|
|
1246
|
+
|
|
723
1247
|
if not (all_tables_file and rules_file and whitelist_file):
|
|
724
|
-
|
|
725
|
-
|
|
1248
|
+
console.print(f"[yellow]Skipping {name}: Missing file config.[/yellow]")
|
|
1249
|
+
continue
|
|
726
1250
|
|
|
727
1251
|
tables_path = config_path.parent / all_tables_file
|
|
728
1252
|
rules_path = config_path.parent / rules_file
|
|
729
1253
|
out_path = config_path.parent / whitelist_file
|
|
730
|
-
|
|
1254
|
+
|
|
731
1255
|
if not tables_path.exists():
|
|
732
|
-
console.print(
|
|
1256
|
+
console.print(
|
|
1257
|
+
f"[red]Skipping {name}: {all_tables_file} not found. Run 'scan' first.[/red]"
|
|
1258
|
+
)
|
|
733
1259
|
continue
|
|
734
|
-
|
|
1260
|
+
|
|
735
1261
|
if not rules_path.exists():
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
1262
|
+
console.print(
|
|
1263
|
+
f"[yellow]Skipping {name}: Rules file {rules_file} not found.[/yellow]"
|
|
1264
|
+
)
|
|
1265
|
+
continue
|
|
1266
|
+
|
|
739
1267
|
# Load existing whitelist to preserve customizations
|
|
740
1268
|
existing_whitelist = {}
|
|
741
1269
|
if out_path.exists():
|
|
@@ -745,71 +1273,79 @@ def match(
|
|
|
745
1273
|
|
|
746
1274
|
with open(tables_path, "r") as f:
|
|
747
1275
|
all_tables = yaml.safe_load(f) or []
|
|
748
|
-
|
|
1276
|
+
|
|
749
1277
|
with open(rules_path, "r") as f:
|
|
750
1278
|
rules_data = yaml.safe_load(f) or []
|
|
751
|
-
|
|
1279
|
+
|
|
752
1280
|
# Match Logic
|
|
753
|
-
console.print(
|
|
754
|
-
|
|
1281
|
+
console.print(
|
|
1282
|
+
f"[bold cyan]Matching: {name} ({len(all_tables)} tables)[/bold cyan]"
|
|
1283
|
+
)
|
|
1284
|
+
|
|
755
1285
|
matches = {}
|
|
756
1286
|
matched_count = 0
|
|
757
|
-
|
|
1287
|
+
|
|
758
1288
|
for table in sorted(all_tables):
|
|
759
1289
|
# Find first matching rule
|
|
760
1290
|
matched_rule = None
|
|
761
1291
|
for r in rules_data:
|
|
762
1292
|
pattern = r.get("pattern")
|
|
763
1293
|
mtype = r.get("match_type", "exact")
|
|
764
|
-
|
|
1294
|
+
|
|
765
1295
|
is_match = False
|
|
766
1296
|
if mtype == "exact" and table == pattern:
|
|
767
1297
|
is_match = True
|
|
768
1298
|
elif mtype == "prefix" and table.startswith(pattern):
|
|
769
1299
|
is_match = True
|
|
770
1300
|
elif mtype == "regex":
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
1301
|
+
import re
|
|
1302
|
+
|
|
1303
|
+
if re.search(pattern, table):
|
|
1304
|
+
is_match = True
|
|
1305
|
+
|
|
775
1306
|
if is_match:
|
|
776
1307
|
matched_rule = r
|
|
777
1308
|
break
|
|
778
|
-
|
|
1309
|
+
|
|
779
1310
|
if matched_rule:
|
|
780
1311
|
# Construct Registry Entry
|
|
781
1312
|
# Resolve path using folder_prefix
|
|
782
1313
|
template = matched_rule.get("output_template", f"{table}_cubes.py")
|
|
783
1314
|
domain = matched_rule.get("domain", "common")
|
|
784
|
-
|
|
1315
|
+
|
|
785
1316
|
# Path Construction: folder_prefix + db_domain + domain + output_template
|
|
786
1317
|
db_domain = db.get("db_domain", "")
|
|
787
|
-
|
|
1318
|
+
|
|
788
1319
|
# Careful not to double slash if db_domain is empty, but Path handles it.
|
|
789
1320
|
# template should now be just filename per rule updates.
|
|
790
1321
|
full_path_obj = Path(folder_prefix)
|
|
791
1322
|
if db_domain:
|
|
792
1323
|
full_path_obj = full_path_obj / db_domain
|
|
793
|
-
|
|
1324
|
+
|
|
794
1325
|
full_path_obj = full_path_obj / domain / template
|
|
795
1326
|
full_path = str(full_path_obj)
|
|
796
|
-
|
|
1327
|
+
|
|
797
1328
|
# Class Name Generation
|
|
798
1329
|
# Check if exists in old whitelist
|
|
799
1330
|
existing_entry = existing_whitelist.get(table, {})
|
|
800
|
-
|
|
1331
|
+
|
|
801
1332
|
custom_name = existing_entry.get("custom_name")
|
|
802
|
-
|
|
1333
|
+
|
|
803
1334
|
if custom_name:
|
|
804
1335
|
class_name = custom_name
|
|
805
1336
|
elif existing_entry.get("class_name"):
|
|
806
1337
|
class_name = existing_entry.get("class_name")
|
|
807
1338
|
else:
|
|
808
1339
|
class_suffix = params.get("class_suffix", "Dc")
|
|
809
|
-
class_name =
|
|
1340
|
+
class_name = (
|
|
1341
|
+
"".join(w.capitalize() for w in table.split("_")) + class_suffix
|
|
1342
|
+
)
|
|
1343
|
+
|
|
1344
|
+
field_map_template = matched_rule.get(
|
|
1345
|
+
"field_map_template",
|
|
1346
|
+
f"{fields_module_base}.{{domain}}.{{table}}.field_map",
|
|
1347
|
+
)
|
|
810
1348
|
|
|
811
|
-
field_map_template = matched_rule.get("field_map_template", f"{fields_module_base}.{{domain}}.{{table}}.field_map")
|
|
812
|
-
|
|
813
1349
|
# Construct defaults
|
|
814
1350
|
entry = {
|
|
815
1351
|
"path": full_path,
|
|
@@ -817,17 +1353,17 @@ def match(
|
|
|
817
1353
|
"domain": domain,
|
|
818
1354
|
"class_name": class_name,
|
|
819
1355
|
# Ensure field_map is assigned
|
|
820
|
-
"field_map": field_map_template.format(domain=domain, table=table)
|
|
1356
|
+
"field_map": field_map_template.format(domain=domain, table=table),
|
|
821
1357
|
}
|
|
822
|
-
|
|
1358
|
+
|
|
823
1359
|
# Preserve custom_name if present, else default to None
|
|
824
1360
|
entry["custom_name"] = custom_name if custom_name else None
|
|
825
|
-
|
|
826
|
-
# Preserve other fields if needed?
|
|
1361
|
+
|
|
1362
|
+
# Preserve other fields if needed?
|
|
827
1363
|
# User asked specifically for keys on class_name preservation.
|
|
828
|
-
# But generally we might want to respect other overrides?
|
|
1364
|
+
# But generally we might want to respect other overrides?
|
|
829
1365
|
# For now, strict to class_name per request + generation logic.
|
|
830
|
-
|
|
1366
|
+
|
|
831
1367
|
matches[table] = entry
|
|
832
1368
|
matched_count += 1
|
|
833
1369
|
|
|
@@ -835,15 +1371,20 @@ def match(
|
|
|
835
1371
|
output_data = {"tables": matches}
|
|
836
1372
|
with open(out_path, "w") as f:
|
|
837
1373
|
yaml.dump(output_data, f, sort_keys=False)
|
|
838
|
-
|
|
839
|
-
console.print(
|
|
1374
|
+
|
|
1375
|
+
console.print(
|
|
1376
|
+
f"[green]Matched {matched_count} tables. Written to {out_path}[/green]"
|
|
1377
|
+
)
|
|
1378
|
+
|
|
840
1379
|
|
|
841
1380
|
@app.command()
|
|
842
1381
|
def map(
|
|
843
1382
|
config_file: Optional[Path] = typer.Option(None, "--config"),
|
|
844
1383
|
db_url_map: Optional[str] = typer.Option(None, "--db-urls"),
|
|
845
1384
|
env_file: Optional[Path] = typer.Option(None, "--env-file", "-e"),
|
|
846
|
-
db_name: Optional[str] = typer.Option(
|
|
1385
|
+
db_name: Optional[str] = typer.Option(
|
|
1386
|
+
None, "--db", help="Target specific database from params"
|
|
1387
|
+
),
|
|
847
1388
|
force: bool = typer.Option(False, "--force", "-f"),
|
|
848
1389
|
) -> None:
|
|
849
1390
|
"""
|
|
@@ -855,50 +1396,66 @@ def map(
|
|
|
855
1396
|
console.print("[red]No config file specified.[/red]")
|
|
856
1397
|
raise typer.Exit(code=1)
|
|
857
1398
|
|
|
1399
|
+
# Env Load
|
|
858
1400
|
# Env Load
|
|
859
1401
|
if env_file:
|
|
860
1402
|
env_path = env_file
|
|
861
|
-
elif context.params and "
|
|
862
|
-
env_path = Path(context.params["env_file"])
|
|
1403
|
+
elif context.params and context.params.get("defaults", {}).get("env_file"):
|
|
1404
|
+
env_path = Path(context.params.get("defaults", {})["env_file"])
|
|
863
1405
|
else:
|
|
864
1406
|
env_path = Path(".env.linux")
|
|
865
1407
|
load_environment(env_path, logger=console.print)
|
|
866
|
-
|
|
1408
|
+
|
|
867
1409
|
import json
|
|
1410
|
+
|
|
868
1411
|
cli_urls = json.loads(db_url_map) if db_url_map else {}
|
|
869
|
-
registry = DatacubeRegistry({}, params=context.params)
|
|
1412
|
+
registry = DatacubeRegistry({}, params=context.params) # Dummy reg for imports
|
|
870
1413
|
|
|
871
1414
|
def get_url_safe(conf_name, db_imp):
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
1415
|
+
if conf_name in cli_urls:
|
|
1416
|
+
return cli_urls[conf_name]
|
|
1417
|
+
imp = [db_imp] if db_imp else registry.global_imports
|
|
1418
|
+
return resolve_db_url(conf_name, imp)
|
|
875
1419
|
|
|
876
1420
|
params = context.params
|
|
877
1421
|
databases = params.get("databases", [])
|
|
878
|
-
|
|
1422
|
+
|
|
879
1423
|
target_dbs = databases
|
|
880
1424
|
if db_name:
|
|
881
1425
|
target_dbs = [d for d in databases if d.get("name") == db_name]
|
|
882
|
-
|
|
883
|
-
_run_field_map_generation(
|
|
1426
|
+
|
|
1427
|
+
_run_field_map_generation(
|
|
1428
|
+
context, config_path, target_dbs, get_url_safe, force=force
|
|
1429
|
+
)
|
|
884
1430
|
return
|
|
1431
|
+
|
|
1432
|
+
|
|
885
1433
|
@app.command()
|
|
886
1434
|
def init(
|
|
887
1435
|
config_file: Optional[Path] = typer.Option(None, "--config"),
|
|
888
|
-
db_conf: Optional[str] = typer.Option(
|
|
889
|
-
|
|
1436
|
+
db_conf: Optional[str] = typer.Option(
|
|
1437
|
+
None, help="Config object to use for introspection"
|
|
1438
|
+
),
|
|
1439
|
+
db_name: Optional[str] = typer.Option(
|
|
1440
|
+
None, "--db", help="Target specific database from params"
|
|
1441
|
+
),
|
|
890
1442
|
db_url_map: Optional[str] = typer.Option(None, "--db-urls"),
|
|
891
1443
|
env_file: Optional[Path] = typer.Option(None, "--env-file", "-e"),
|
|
892
|
-
dump_schema: Optional[Path] = typer.Option(
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
1444
|
+
dump_schema: Optional[Path] = typer.Option(
|
|
1445
|
+
None, "--dump-schema", help="Dump database schema"
|
|
1446
|
+
),
|
|
1447
|
+
init_rules: Optional[Path] = typer.Option(
|
|
1448
|
+
None, "--init-rules", help="Initialize discovery rules from DB tables"
|
|
1449
|
+
),
|
|
1450
|
+
reset: bool = typer.Option(
|
|
1451
|
+
False, "--reset", help="Reset registry and config to defaults"
|
|
1452
|
+
),
|
|
896
1453
|
) -> None:
|
|
897
1454
|
"""
|
|
898
|
-
Initializes configuration
|
|
1455
|
+
Initializes configuration and schema dumps.
|
|
899
1456
|
"""
|
|
900
1457
|
params = context.params
|
|
901
|
-
|
|
1458
|
+
|
|
902
1459
|
# Determine Targets
|
|
903
1460
|
databases = params.get("databases", [])
|
|
904
1461
|
target_dbs = []
|
|
@@ -907,203 +1464,657 @@ def init(
|
|
|
907
1464
|
# Filter specific DB
|
|
908
1465
|
target_dbs = [d for d in databases if d.get("name") == db_name]
|
|
909
1466
|
if not target_dbs:
|
|
910
|
-
|
|
911
|
-
|
|
1467
|
+
console.print(f"[red]Database '{db_name}' not found in params.[/red]")
|
|
1468
|
+
raise typer.Exit(code=1)
|
|
912
1469
|
elif databases:
|
|
913
1470
|
# All DBs
|
|
914
1471
|
target_dbs = databases
|
|
915
1472
|
else:
|
|
916
1473
|
# Legacy Fallback
|
|
917
|
-
target_db_conf = db_conf or params.get(
|
|
918
|
-
|
|
1474
|
+
target_db_conf = db_conf or params.get(
|
|
1475
|
+
"default_connection_obj", "replica_db_conf"
|
|
1476
|
+
)
|
|
1477
|
+
target_dbs = [
|
|
1478
|
+
{
|
|
1479
|
+
"name": target_db_conf,
|
|
1480
|
+
"connection_obj": target_db_conf,
|
|
1481
|
+
"whitelist_file": "discovery_whitelist.yaml",
|
|
1482
|
+
}
|
|
1483
|
+
]
|
|
919
1484
|
|
|
920
1485
|
# Validate db_conf override if provided (only if single target or legacy)
|
|
921
1486
|
if db_conf and not db_name and not databases:
|
|
922
|
-
|
|
1487
|
+
target_dbs[0]["connection_obj"] = db_conf
|
|
923
1488
|
|
|
924
1489
|
# Resolve env_file: CLI > Params > Default
|
|
925
1490
|
if env_file:
|
|
926
1491
|
env_path = env_file
|
|
927
|
-
elif context.params and "
|
|
928
|
-
env_path = Path(context.params["env_file"])
|
|
1492
|
+
elif context.params and context.params.get("defaults", {}).get("env_file"):
|
|
1493
|
+
env_path = Path(context.params.get("defaults", {})["env_file"])
|
|
929
1494
|
else:
|
|
930
1495
|
env_path = Path(".env.linux")
|
|
931
1496
|
load_environment(env_path, logger=console.print)
|
|
932
1497
|
|
|
933
|
-
|
|
934
1498
|
# Resolve Context (Registry/Config Paths)
|
|
935
1499
|
# Be robust if config_file doesn't exist yet
|
|
936
1500
|
config_path = config_file or context.default_config
|
|
937
|
-
|
|
1501
|
+
|
|
938
1502
|
if not config_path:
|
|
939
1503
|
console.print("[red]No config file target specified.[/red]")
|
|
940
1504
|
raise typer.Exit(code=1)
|
|
941
1505
|
|
|
942
1506
|
import json
|
|
1507
|
+
|
|
943
1508
|
cli_urls = json.loads(db_url_map) if db_url_map else {}
|
|
944
1509
|
|
|
945
1510
|
# Helper to resolve URL without a full registry instance if file missing
|
|
946
1511
|
def resolve_url_safe(conf_name):
|
|
947
|
-
if conf_name in cli_urls:
|
|
948
|
-
|
|
1512
|
+
if conf_name in cli_urls:
|
|
1513
|
+
return cli_urls[conf_name]
|
|
1514
|
+
|
|
949
1515
|
# Check params first
|
|
950
1516
|
for db in databases:
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
1517
|
+
if db.get("connection_obj") == conf_name:
|
|
1518
|
+
imp = db.get("global_import")
|
|
1519
|
+
if imp:
|
|
1520
|
+
url = resolve_db_url(conf_name, [imp])
|
|
1521
|
+
if url:
|
|
1522
|
+
return url
|
|
1523
|
+
|
|
957
1524
|
# Try loading defaults if registry file exists
|
|
958
1525
|
if config_path.exists():
|
|
959
|
-
with open(config_path,
|
|
1526
|
+
with open(config_path, "r") as f:
|
|
960
1527
|
data = yaml.safe_load(f)
|
|
961
1528
|
# Minimal registry just to get imports/resolution
|
|
962
1529
|
reg = DatacubeRegistry(data, params=context.params)
|
|
963
1530
|
url = resolve_db_url(conf_name, reg.global_imports)
|
|
964
|
-
if url:
|
|
965
|
-
|
|
1531
|
+
if url:
|
|
1532
|
+
return url
|
|
1533
|
+
|
|
966
1534
|
# Fallback: try raw resolve (might fail if imports missing)
|
|
967
|
-
url = resolve_db_url(conf_name, [])
|
|
968
|
-
if url:
|
|
969
|
-
|
|
1535
|
+
url = resolve_db_url(conf_name, [])
|
|
1536
|
+
if url:
|
|
1537
|
+
return url
|
|
1538
|
+
raise ValueError(
|
|
1539
|
+
"Cannot resolve DB URL. Please ensure registry exists or use --db-urls."
|
|
1540
|
+
)
|
|
970
1541
|
|
|
971
1542
|
# 1. Reset / Initialize Files (Global)
|
|
972
1543
|
if reset:
|
|
973
1544
|
if typer.confirm("Are you sure you want to reset the registry?"):
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
1545
|
+
# We rely on params being provided by the wrapper/context now.
|
|
1546
|
+
if not params:
|
|
1547
|
+
console.print(
|
|
1548
|
+
"[yellow]Warning: No params loaded from context. Defaults may be minimal.[/yellow]"
|
|
1549
|
+
)
|
|
1550
|
+
|
|
1551
|
+
# Registry Default
|
|
1552
|
+
default_registry = {
|
|
1553
|
+
"global_imports": params.get("global_imports", []),
|
|
1554
|
+
"tables": {},
|
|
1555
|
+
}
|
|
1556
|
+
with open(config_path, "w") as f:
|
|
1557
|
+
yaml.dump(default_registry, f, sort_keys=False)
|
|
1558
|
+
console.print(f"[green]Reset {config_path}[/green]")
|
|
986
1559
|
|
|
987
1560
|
# Loop over targets for DB-specific actions
|
|
988
1561
|
for db in target_dbs:
|
|
989
1562
|
db_name = db.get("name")
|
|
990
1563
|
conn_obj = db.get("connection_obj")
|
|
991
|
-
|
|
1564
|
+
|
|
992
1565
|
try:
|
|
993
1566
|
db_url = resolve_url_safe(conn_obj)
|
|
994
1567
|
engine = sa.create_engine(db_url)
|
|
995
1568
|
except Exception as e:
|
|
996
|
-
console.print(
|
|
1569
|
+
console.print(
|
|
1570
|
+
f"[red]Skipping {db_name}: Cannot check DB connection ({e})[/red]"
|
|
1571
|
+
)
|
|
997
1572
|
continue
|
|
998
1573
|
|
|
999
1574
|
# 2. Dump Schema
|
|
1000
1575
|
if dump_schema:
|
|
1001
1576
|
from sibi_flux.datacube.generator import dump_db_schema
|
|
1577
|
+
|
|
1002
1578
|
console.print(f"[bold]Dumping schema for {db_name}...[/bold]")
|
|
1003
1579
|
dump_db_schema(
|
|
1004
1580
|
engine=engine,
|
|
1005
1581
|
db_name=db_name,
|
|
1006
1582
|
output_dir=dump_schema,
|
|
1007
|
-
logger=console.print
|
|
1583
|
+
logger=console.print,
|
|
1008
1584
|
)
|
|
1009
|
-
|
|
1010
|
-
# 3. Initialize
|
|
1011
|
-
if
|
|
1585
|
+
|
|
1586
|
+
# 3. Initialize Rules (Global/Merged?)
|
|
1587
|
+
if init_rules:
|
|
1012
1588
|
insp = inspect(engine)
|
|
1013
1589
|
tables = insp.get_table_names()
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
if
|
|
1017
|
-
#
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1590
|
+
|
|
1591
|
+
target_path = init_rules
|
|
1592
|
+
if not target_path.is_absolute():
|
|
1593
|
+
# Default to Project Root anchoring for consistency
|
|
1594
|
+
try:
|
|
1595
|
+
project_root = config_path.parent.parent.parent
|
|
1596
|
+
except Exception:
|
|
1597
|
+
project_root = Path.cwd()
|
|
1598
|
+
|
|
1599
|
+
target_path = project_root / target_path
|
|
1600
|
+
|
|
1601
|
+
# Ensure parent dir exists
|
|
1602
|
+
if not target_path.parent.exists():
|
|
1603
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1604
|
+
|
|
1605
|
+
# Load existing to append?
|
|
1606
|
+
existing_rules = []
|
|
1607
|
+
if target_path.exists():
|
|
1608
|
+
with open(target_path, "r") as f:
|
|
1609
|
+
existing_rules = yaml.safe_load(f) or []
|
|
1610
|
+
|
|
1611
|
+
console.print(f"Appending rules for {db_name} tables...")
|
|
1612
|
+
|
|
1613
|
+
new_rules = []
|
|
1614
|
+
for table in sorted(tables):
|
|
1615
|
+
# Check existence
|
|
1616
|
+
if any(r["pattern"] == table for r in existing_rules):
|
|
1617
|
+
continue
|
|
1618
|
+
|
|
1619
|
+
new_rules.append(
|
|
1620
|
+
{
|
|
1621
|
+
"pattern": table,
|
|
1622
|
+
"match_type": "exact",
|
|
1623
|
+
"domain": "common",
|
|
1624
|
+
"output_template": f"common/{table}_cubes.py",
|
|
1625
|
+
"db_conn_override": conn_obj,
|
|
1626
|
+
}
|
|
1627
|
+
)
|
|
1628
|
+
|
|
1629
|
+
all_rules = existing_rules + new_rules
|
|
1630
|
+
|
|
1631
|
+
with open(target_path, "w") as f:
|
|
1632
|
+
yaml.dump(all_rules, f, sort_keys=False)
|
|
1633
|
+
console.print(f"[green]Rules updated for {db_name}.[/green]")
|
|
1634
|
+
|
|
1635
|
+
|
|
1636
|
+
@app.command()
|
|
1637
|
+
def whitelist(
|
|
1638
|
+
config_file: Optional[Path] = typer.Option(None, "--config"),
|
|
1639
|
+
db_name: Optional[str] = typer.Option(
|
|
1640
|
+
None, "--db", help="Target specific database from params"
|
|
1641
|
+
),
|
|
1642
|
+
db_url_map: Optional[str] = typer.Option(None, "--db-urls"),
|
|
1643
|
+
env_file: Optional[Path] = typer.Option(None, "--env-file", "-e"),
|
|
1644
|
+
force: bool = typer.Option(False, "--force", "-f"),
|
|
1645
|
+
) -> None:
|
|
1646
|
+
"""
|
|
1647
|
+
Generates whitelist files based on discovery rules and database schema.
|
|
1648
|
+
"""
|
|
1649
|
+
params = context.params
|
|
1650
|
+
databases = params.get("databases", [])
|
|
1651
|
+
|
|
1652
|
+
# 1. Determine Targets
|
|
1653
|
+
target_dbs = []
|
|
1654
|
+
if db_name:
|
|
1655
|
+
target_dbs = [d for d in databases if d.get("name") == db_name]
|
|
1656
|
+
if not target_dbs:
|
|
1657
|
+
console.print(f"[red]Database '{db_name}' not found in params.[/red]")
|
|
1658
|
+
raise typer.Exit(code=1)
|
|
1659
|
+
elif databases:
|
|
1660
|
+
target_dbs = databases
|
|
1661
|
+
if not target_dbs:
|
|
1662
|
+
# Legacy
|
|
1663
|
+
target_db_conf = params.get("default_connection_obj", "replica_db_conf")
|
|
1664
|
+
target_dbs = [{"name": target_db_conf, "connection_obj": target_db_conf}]
|
|
1665
|
+
|
|
1666
|
+
# 2. Env Load
|
|
1667
|
+
if env_file:
|
|
1668
|
+
env_path = env_file
|
|
1669
|
+
elif context.params and context.params.get("defaults", {}).get("env_file"):
|
|
1670
|
+
env_path = Path(context.params.get("defaults", {})["env_file"])
|
|
1671
|
+
else:
|
|
1672
|
+
env_path = Path(".env.linux")
|
|
1673
|
+
load_environment(env_path, logger=console.print)
|
|
1674
|
+
|
|
1675
|
+
# 3. Config Path
|
|
1676
|
+
config_path = config_file or context.default_config
|
|
1677
|
+
if not config_path:
|
|
1678
|
+
console.print("[red]No config file target specified.[/red]")
|
|
1679
|
+
raise typer.Exit(code=1)
|
|
1680
|
+
|
|
1681
|
+
import json
|
|
1682
|
+
|
|
1683
|
+
cli_urls = json.loads(db_url_map) if db_url_map else {}
|
|
1684
|
+
|
|
1685
|
+
config_data = _load_and_resolve_config(
|
|
1686
|
+
config_path
|
|
1687
|
+
) # Since we need rules/paths resolved
|
|
1688
|
+
|
|
1689
|
+
registry = DatacubeRegistry(config_data, params=context.params)
|
|
1690
|
+
|
|
1691
|
+
def resolve_url_safe(conf_name, db_imp):
|
|
1692
|
+
if conf_name in cli_urls:
|
|
1693
|
+
return cli_urls[conf_name]
|
|
1694
|
+
imp = [db_imp] if db_imp else registry.global_imports
|
|
1695
|
+
return resolve_db_url(conf_name, imp)
|
|
1696
|
+
|
|
1697
|
+
# 4. Iterate and Generate
|
|
1698
|
+
for db in target_dbs:
|
|
1699
|
+
db_name = db.get("name")
|
|
1700
|
+
conn_obj = db.get("connection_obj")
|
|
1701
|
+
|
|
1702
|
+
try:
|
|
1703
|
+
db_url = resolve_url_safe(conn_obj, db.get("global_import"))
|
|
1704
|
+
engine = sa.create_engine(db_url)
|
|
1705
|
+
except Exception as e:
|
|
1706
|
+
console.print(
|
|
1707
|
+
f"[red]Skipping {db_name}: Cannot check DB connection ({e})[/red]"
|
|
1708
|
+
)
|
|
1709
|
+
continue
|
|
1710
|
+
|
|
1711
|
+
insp = inspect(engine)
|
|
1712
|
+
tables = insp.get_table_names()
|
|
1713
|
+
|
|
1714
|
+
# Determine path (Config Driven)
|
|
1715
|
+
discovery_cfg = params.get("paths", {}).get("discovery") or params.get(
|
|
1716
|
+
"discovery", {}
|
|
1717
|
+
)
|
|
1718
|
+
wl_file = (
|
|
1719
|
+
discovery_cfg.get("whitelist_file")
|
|
1720
|
+
or params.get("whitelist_file")
|
|
1721
|
+
or "whitelist.yaml"
|
|
1722
|
+
)
|
|
1723
|
+
target_path = Path(wl_file)
|
|
1724
|
+
if not target_path.is_absolute():
|
|
1725
|
+
try:
|
|
1726
|
+
project_root = config_path.parent.parent.parent
|
|
1727
|
+
except Exception:
|
|
1728
|
+
project_root = Path.cwd()
|
|
1729
|
+
target_path = project_root / target_path
|
|
1730
|
+
|
|
1731
|
+
# Load Rules
|
|
1732
|
+
discovery_cfg = params.get("paths", {}).get("discovery") or params.get(
|
|
1733
|
+
"discovery", {}
|
|
1734
|
+
)
|
|
1735
|
+
rules_file = (
|
|
1736
|
+
discovery_cfg.get("rules_file")
|
|
1737
|
+
or params.get("rules_file")
|
|
1738
|
+
or "discovery_rules.yaml"
|
|
1739
|
+
)
|
|
1740
|
+
if Path(rules_file).is_absolute():
|
|
1741
|
+
rules_path = Path(rules_file)
|
|
1742
|
+
else:
|
|
1743
|
+
try:
|
|
1744
|
+
prj_root = config_path.parent.parent.parent
|
|
1745
|
+
except Exception:
|
|
1746
|
+
prj_root = Path.cwd()
|
|
1747
|
+
rules_path = prj_root / rules_file
|
|
1748
|
+
|
|
1749
|
+
filtered_tables = {} # Default to ALL if no rules? No, tables is list.
|
|
1750
|
+
# If no rules, we might want to default to empty dicts for all tables?
|
|
1751
|
+
# Let's keep logic: if rules exist, filter.
|
|
1752
|
+
|
|
1753
|
+
# 4. Use ConfigurationEngine for logic
|
|
1754
|
+
# Initialize engine with the resolved rules file
|
|
1755
|
+
from sibi_flux.datacube.config_engine import ConfigurationEngine
|
|
1756
|
+
|
|
1757
|
+
# We need to construct a lightweight 'params' dict or use existing context.params
|
|
1758
|
+
# But we need to ensure 'engine' uses the correct 'rules_path' for THIS connection.
|
|
1759
|
+
|
|
1760
|
+
# NOTE: ConfigEngine takes 'params' and 'rules_path'.
|
|
1761
|
+
# It handles scoped dictionary rules (via 'context_key') correctly.
|
|
1762
|
+
|
|
1763
|
+
eng = ConfigurationEngine(
|
|
1764
|
+
context.params, rules_path=str(rules_path), context_key=conn_obj
|
|
1765
|
+
)
|
|
1766
|
+
|
|
1767
|
+
filtered_tables = {}
|
|
1768
|
+
|
|
1769
|
+
if rules_path.exists():
|
|
1770
|
+
for t in tables:
|
|
1771
|
+
# Resolve using engine (handles prefix, regex, template logic)
|
|
1772
|
+
# Pass mocked db_config for domain resolution logic inside engine
|
|
1773
|
+
mock_db_config = {
|
|
1774
|
+
"db_domain": db.get("db_domain", "common"),
|
|
1775
|
+
"connection_obj": conn_obj,
|
|
1776
|
+
}
|
|
1777
|
+
|
|
1778
|
+
res = eng.resolve_table(t, db_config=mock_db_config)
|
|
1779
|
+
|
|
1780
|
+
if res:
|
|
1781
|
+
# Calculate relative paths
|
|
1782
|
+
db_dom = db.get("db_domain")
|
|
1783
|
+
|
|
1784
|
+
# 1. Datacube Path Explicit
|
|
1785
|
+
# Structure: datacubes_dir / [db_domain] / domain / template
|
|
1786
|
+
|
|
1787
|
+
# datacubes_dir resolved from params
|
|
1788
|
+
dc_root_dir = (
|
|
1789
|
+
context.params.get("paths", {})
|
|
1790
|
+
.get("target", {})
|
|
1791
|
+
.get("datacubes_dir", "dataobjects/gencubes")
|
|
1792
|
+
)
|
|
1793
|
+
|
|
1794
|
+
dc_base = Path(dc_root_dir)
|
|
1795
|
+
if db_dom:
|
|
1796
|
+
dc_base = dc_base / db_dom
|
|
1797
|
+
|
|
1798
|
+
# Rule might provide 'output_template' (e.g. 'asm_cubes.py')
|
|
1799
|
+
# We assume template is just the filename now per robust rules
|
|
1800
|
+
template_name = res.get("output_template", Path(res["path"]).name)
|
|
1801
|
+
|
|
1802
|
+
full_dc_path = dc_base / res["domain"] / template_name
|
|
1803
|
+
|
|
1804
|
+
# 2. Field Map Path Explicit
|
|
1805
|
+
# Structure: field_maps_dir / [db_domain] / domain / table.py
|
|
1806
|
+
|
|
1807
|
+
fields_root_dir = (
|
|
1808
|
+
context.params.get("paths", {})
|
|
1809
|
+
.get("target", {})
|
|
1810
|
+
.get("field_maps_dir", "dataobjects/fields")
|
|
1811
|
+
)
|
|
1812
|
+
|
|
1813
|
+
fm_base = Path(fields_root_dir)
|
|
1814
|
+
if db_dom:
|
|
1815
|
+
fm_base = fm_base / db_dom
|
|
1816
|
+
|
|
1817
|
+
fm_path_full = fm_base / res["domain"] / f"{t}.py"
|
|
1818
|
+
|
|
1819
|
+
# Relativize logic
|
|
1820
|
+
def _safe_rel(p):
|
|
1821
|
+
try:
|
|
1822
|
+
pp = Path(p)
|
|
1823
|
+
# Heuristic for project root if not clear
|
|
1824
|
+
root = (
|
|
1825
|
+
config_path.parent.parent.parent
|
|
1826
|
+
if config_path.parent.name == "datacubes"
|
|
1827
|
+
else Path.cwd()
|
|
1828
|
+
)
|
|
1829
|
+
if pp.is_absolute():
|
|
1830
|
+
try:
|
|
1831
|
+
return pp.relative_to(root)
|
|
1832
|
+
except ValueError:
|
|
1833
|
+
return pp.resolve().relative_to(root.resolve())
|
|
1834
|
+
return pp
|
|
1835
|
+
except Exception:
|
|
1836
|
+
return Path(p)
|
|
1837
|
+
|
|
1838
|
+
rel_dc_path = _safe_rel(full_dc_path)
|
|
1839
|
+
rel_fm_path = _safe_rel(fm_path_full)
|
|
1840
|
+
|
|
1841
|
+
filtered_tables[t] = {
|
|
1842
|
+
"domain": res["domain"],
|
|
1843
|
+
"output_template": (
|
|
1844
|
+
Path(res["path"]).name
|
|
1845
|
+
if "output_template" not in res
|
|
1846
|
+
else res.get("output_template", Path(res["path"]).name)
|
|
1847
|
+
),
|
|
1848
|
+
"datacube_path": str(rel_dc_path),
|
|
1849
|
+
"field_map_path": str(rel_fm_path),
|
|
1850
|
+
}
|
|
1851
|
+
# Recover template logic: if rule-based, template was used.
|
|
1852
|
+
# But since we have the path, template is secondary.
|
|
1853
|
+
# We'll stick to what we have.
|
|
1854
|
+
|
|
1855
|
+
else:
|
|
1856
|
+
console.print(
|
|
1857
|
+
f"[yellow]No rules found for {conn_obj}. Whitelisting ALL tables (No Paths Calculated).[/yellow]"
|
|
1858
|
+
)
|
|
1859
|
+
filtered_tables = {t: {} for t in tables}
|
|
1860
|
+
|
|
1861
|
+
# 5. Load Current Whitelist
|
|
1862
|
+
current_wl = {}
|
|
1863
|
+
if target_path.exists():
|
|
1864
|
+
try:
|
|
1865
|
+
with open(target_path, "r") as rf:
|
|
1866
|
+
current_wl = yaml.safe_load(rf) or {}
|
|
1867
|
+
except:
|
|
1868
|
+
pass
|
|
1869
|
+
|
|
1870
|
+
# Existing whitelist for this connection
|
|
1871
|
+
existing_tables_map = {}
|
|
1872
|
+
if conn_obj in current_wl:
|
|
1873
|
+
raw_wl = current_wl[conn_obj]
|
|
1874
|
+
if isinstance(raw_wl, list):
|
|
1875
|
+
# Upgrade legacy list to dict
|
|
1876
|
+
existing_tables_map = {t: {} for t in raw_wl}
|
|
1025
1877
|
else:
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
#
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1878
|
+
existing_tables_map = raw_wl.get("tables", {})
|
|
1879
|
+
|
|
1880
|
+
# Calculate sets
|
|
1881
|
+
current_table_names = set(existing_tables_map.keys())
|
|
1882
|
+
new_table_names = set(filtered_tables.keys())
|
|
1883
|
+
|
|
1884
|
+
# Sync Logic:
|
|
1885
|
+
# 1. Start with intersection (preserve config, but update rule-based defaults if missing?)
|
|
1886
|
+
# We want to keep manual overrides in existing map, but maybe refresh defaults?
|
|
1887
|
+
retained = current_table_names.intersection(new_table_names)
|
|
1888
|
+
|
|
1889
|
+
# 2. Add new filtered tables (additions)
|
|
1890
|
+
added = new_table_names - current_table_names
|
|
1891
|
+
|
|
1892
|
+
# 3. Removed (in current but not in filtered)
|
|
1893
|
+
removed = current_table_names - new_table_names
|
|
1894
|
+
|
|
1895
|
+
# Construct new table map
|
|
1896
|
+
new_table_map = {}
|
|
1897
|
+
|
|
1898
|
+
# 1. Retained: Merge existing with new rule metadata
|
|
1899
|
+
# Priority: Existing (Manual) > New (Rule)
|
|
1900
|
+
# BUT: Enforce calculated paths to avoid stale absolute paths
|
|
1901
|
+
for t in retained:
|
|
1902
|
+
existing_meta = existing_tables_map[t]
|
|
1903
|
+
rule_meta = filtered_tables[t]
|
|
1904
|
+
# Merge: update rule defaults only if not set in existing
|
|
1905
|
+
merged = rule_meta.copy()
|
|
1906
|
+
merged.update(existing_meta) # Existing overwrites rule
|
|
1907
|
+
|
|
1908
|
+
# Restore calculated paths (Enforce Relative)
|
|
1909
|
+
if "datacube_path" in rule_meta:
|
|
1910
|
+
merged["datacube_path"] = rule_meta["datacube_path"]
|
|
1911
|
+
if "field_map_path" in rule_meta:
|
|
1912
|
+
merged["field_map_path"] = rule_meta["field_map_path"]
|
|
1913
|
+
|
|
1914
|
+
new_table_map[t] = merged
|
|
1915
|
+
|
|
1916
|
+
# 2. Add new
|
|
1917
|
+
for t in added:
|
|
1918
|
+
new_table_map[t] = filtered_tables[t]
|
|
1919
|
+
|
|
1920
|
+
# 3. Removed are omitted
|
|
1921
|
+
|
|
1922
|
+
# Update structure
|
|
1923
|
+
# Inject Global Paths
|
|
1924
|
+
paths_cfg = params.get("paths", {}).get("target", {})
|
|
1925
|
+
|
|
1926
|
+
# Helper to ensure relative paths
|
|
1927
|
+
def _to_rel(p):
|
|
1928
|
+
if not p:
|
|
1929
|
+
return p
|
|
1930
|
+
try:
|
|
1931
|
+
pp = Path(p)
|
|
1932
|
+
# Heuristic for project root if not clear
|
|
1933
|
+
root = (
|
|
1934
|
+
config_path.parent.parent.parent
|
|
1935
|
+
if config_path.parent.name == "datacubes"
|
|
1936
|
+
else Path.cwd()
|
|
1937
|
+
)
|
|
1938
|
+
if pp.is_absolute():
|
|
1939
|
+
try:
|
|
1940
|
+
return str(pp.relative_to(root))
|
|
1941
|
+
except ValueError:
|
|
1942
|
+
return str(pp.resolve().relative_to(root.resolve()))
|
|
1943
|
+
return p
|
|
1944
|
+
except Exception:
|
|
1945
|
+
pass
|
|
1946
|
+
return p
|
|
1947
|
+
|
|
1948
|
+
dc_dir = _to_rel(paths_cfg.get("datacubes_dir", "dataobjects/gencubes/"))
|
|
1949
|
+
fm_dir = _to_rel(
|
|
1950
|
+
paths_cfg.get("field_maps_dir", "dataobjects/gencubes/fields/")
|
|
1951
|
+
)
|
|
1952
|
+
|
|
1953
|
+
# Get db_domain from config (fallback to db_name if missing)
|
|
1954
|
+
db_domain = db.get("db_domain") or db.get("name")
|
|
1955
|
+
|
|
1956
|
+
rich_structure = {
|
|
1957
|
+
"db_domain": db_domain,
|
|
1958
|
+
"datacubes_dir": dc_dir,
|
|
1959
|
+
"field_maps_dir": fm_dir,
|
|
1960
|
+
"tables": dict(sorted(new_table_map.items())), # Sort keys for stability
|
|
1961
|
+
}
|
|
1962
|
+
|
|
1963
|
+
current_wl[conn_obj] = rich_structure
|
|
1964
|
+
|
|
1965
|
+
with open(target_path, "w") as f:
|
|
1966
|
+
yaml.dump(current_wl, f, sort_keys=False)
|
|
1967
|
+
|
|
1968
|
+
# Report
|
|
1969
|
+
console.print(f"[bold underline]Sync Report for {db_name}[/bold underline]")
|
|
1970
|
+
if added:
|
|
1971
|
+
console.print(
|
|
1972
|
+
f"[green] + Added {len(added)} tables:[/green] {', '.join(sorted(list(added))[:5])}{'...' if len(added)>5 else ''}"
|
|
1973
|
+
)
|
|
1974
|
+
if removed:
|
|
1975
|
+
console.print(
|
|
1976
|
+
f"[red] - Removed {len(removed)} tables:[/red] {', '.join(sorted(list(removed))[:5])}{'...' if len(removed)>5 else ''}"
|
|
1977
|
+
)
|
|
1978
|
+
if not added and not removed:
|
|
1979
|
+
console.print("[dim] No changes.[/dim]")
|
|
1980
|
+
|
|
1981
|
+
# Count keys directly
|
|
1982
|
+
final_count = len(rich_structure["tables"])
|
|
1983
|
+
console.print(f"[blue] Total Whitelisted: {final_count}[/blue]")
|
|
1984
|
+
|
|
1985
|
+
|
|
1986
|
+
def _run_field_map_generation(
|
|
1987
|
+
context, config_path, target_dbs, url_resolver, force=False
|
|
1988
|
+
):
|
|
1079
1989
|
"""Shared logic for generating field map files."""
|
|
1080
1990
|
params = context.params
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1991
|
+
# Logic: modern nested > legacy flat > default
|
|
1992
|
+
folder_prefix = (
|
|
1993
|
+
params.get("paths", {}).get("target", {}).get("datacubes_dir")
|
|
1994
|
+
or params.get("folder_prefix")
|
|
1995
|
+
or "solutions/dataobjects/gencubes/"
|
|
1996
|
+
)
|
|
1997
|
+
|
|
1998
|
+
# Priority: explicit 'field_maps_dir' > legacy suffix construction
|
|
1999
|
+
configured_fm_dir = params.get("paths", {}).get("target", {}).get("field_maps_dir")
|
|
2000
|
+
|
|
2001
|
+
if configured_fm_dir:
|
|
2002
|
+
# If fm_dir is provided, it is the root for fields.
|
|
2003
|
+
# We don't append suffix to it unless it's just a root base?
|
|
2004
|
+
# Usually 'field_maps_dir' is the full relative path e.g. 'dataobjects/fields'
|
|
2005
|
+
fields_root_path_base = Path(configured_fm_dir)
|
|
2006
|
+
use_legacy_construction = False
|
|
2007
|
+
else:
|
|
2008
|
+
fields_suffix = (
|
|
2009
|
+
params.get("generation", {}).get("fields_subpackage")
|
|
2010
|
+
or params.get("fields_module_root")
|
|
2011
|
+
or "fields"
|
|
2012
|
+
)
|
|
2013
|
+
fields_root_path_base = Path(folder_prefix) / fields_suffix
|
|
2014
|
+
use_legacy_construction = True
|
|
2015
|
+
|
|
1084
2016
|
from sibi_flux.datacube.field_mapper import FieldTranslationManager
|
|
1085
|
-
|
|
2017
|
+
|
|
2018
|
+
# Initialize Manager & Load Global Repo
|
|
1086
2019
|
# Initialize Manager & Load Global Repo
|
|
1087
|
-
repo_rel_path =
|
|
1088
|
-
|
|
1089
|
-
|
|
2020
|
+
repo_rel_path = (
|
|
2021
|
+
params.get("paths", {})
|
|
2022
|
+
.get("repositories", {})
|
|
2023
|
+
.get("global_field_repository_file")
|
|
2024
|
+
or params.get("global_repo_path")
|
|
2025
|
+
or "solutions/conf/global_field_repository.yaml"
|
|
2026
|
+
)
|
|
2027
|
+
|
|
2028
|
+
# Resolve relative to project root if needed
|
|
2029
|
+
if Path(repo_rel_path).is_absolute():
|
|
2030
|
+
global_repo_path = Path(repo_rel_path)
|
|
2031
|
+
else:
|
|
2032
|
+
try:
|
|
2033
|
+
# Heuristic: config is in generators/datacubes, project root is 3 levels up
|
|
2034
|
+
# But better to check if config_path is passed
|
|
2035
|
+
project_root = config_path.parent.parent.parent
|
|
2036
|
+
except Exception:
|
|
2037
|
+
project_root = Path.cwd()
|
|
2038
|
+
global_repo_path = (project_root / repo_rel_path).resolve()
|
|
2039
|
+
|
|
1090
2040
|
manager = FieldTranslationManager()
|
|
1091
|
-
|
|
2041
|
+
|
|
2042
|
+
# 1. Load Repository (Definitions)
|
|
1092
2043
|
if global_repo_path.exists():
|
|
1093
2044
|
with open(global_repo_path, "r") as f:
|
|
1094
2045
|
repo_data = yaml.safe_load(f) or []
|
|
1095
2046
|
manager.load_from_list(repo_data)
|
|
1096
|
-
console.print(
|
|
1097
|
-
|
|
1098
|
-
|
|
2047
|
+
console.print(
|
|
2048
|
+
f"[green]Loaded {len(manager.fields)} fields from Global Repository.[/green]"
|
|
2049
|
+
)
|
|
2050
|
+
|
|
2051
|
+
# 2. Load Translations (Overrides)
|
|
2052
|
+
trans_rel_path = params.get("paths", {}).get("repositories", {}).get(
|
|
2053
|
+
"global_field_translations_file"
|
|
2054
|
+
) or params.get("global_field_translations_file")
|
|
2055
|
+
if trans_rel_path:
|
|
2056
|
+
if Path(trans_rel_path).is_absolute():
|
|
2057
|
+
trans_path = Path(trans_rel_path)
|
|
2058
|
+
else:
|
|
2059
|
+
trans_path = (project_root / trans_rel_path).resolve()
|
|
2060
|
+
|
|
2061
|
+
if trans_path.exists():
|
|
2062
|
+
with open(trans_path, "r") as f:
|
|
2063
|
+
trans_data = yaml.safe_load(f) or []
|
|
2064
|
+
manager.load_from_list(trans_data)
|
|
2065
|
+
console.print(
|
|
2066
|
+
f"[green]Loaded translations from {trans_rel_path}.[/green]"
|
|
2067
|
+
)
|
|
2068
|
+
|
|
2069
|
+
# --- GLOBAL CLEAN BUILD ---
|
|
2070
|
+
if force:
|
|
2071
|
+
# Determine global field maps root to wipe
|
|
2072
|
+
# Logic matches default resolution used later
|
|
2073
|
+
tgt = params.get("paths", {}).get("target", {})
|
|
2074
|
+
fm_dir = tgt.get("field_maps_dir")
|
|
2075
|
+
|
|
2076
|
+
# If not set in params, check if we can infer from default
|
|
2077
|
+
# But wait, whitelist overrides this per DB.
|
|
2078
|
+
# User said "fields folder is exclusive".
|
|
2079
|
+
# So we should wipe the configured 'field_maps_dir' from global params.
|
|
2080
|
+
|
|
2081
|
+
if fm_dir:
|
|
2082
|
+
if Path(fm_dir).is_absolute():
|
|
2083
|
+
abs_fm_dir = Path(fm_dir)
|
|
2084
|
+
else:
|
|
2085
|
+
abs_fm_dir = (project_root / fm_dir).resolve()
|
|
2086
|
+
|
|
2087
|
+
if abs_fm_dir.exists():
|
|
2088
|
+
console.print(
|
|
2089
|
+
f"[bold red]Global Clean: Removing entire fields directory {abs_fm_dir}[/bold red]"
|
|
2090
|
+
)
|
|
2091
|
+
try:
|
|
2092
|
+
shutil.rmtree(abs_fm_dir)
|
|
2093
|
+
abs_fm_dir.mkdir(parents=True, exist_ok=True)
|
|
2094
|
+
(abs_fm_dir / "__init__.py").touch()
|
|
2095
|
+
except Exception as e:
|
|
2096
|
+
console.print(f"[red]Failed to clean global fields dir: {e}[/red]")
|
|
2097
|
+
else:
|
|
2098
|
+
console.print(
|
|
2099
|
+
"[yellow]Warning: Could not determine global field_maps_dir for clean build.[/yellow]"
|
|
2100
|
+
)
|
|
1099
2101
|
|
|
1100
2102
|
for db in target_dbs:
|
|
2103
|
+
# console.print(f"DEBUG: Processing DB entry: {db} (Type: {type(db)})")
|
|
2104
|
+
if isinstance(db, str):
|
|
2105
|
+
console.print(
|
|
2106
|
+
f"[red]Error: Database entry is a string '{db}', expected dict. Check config.[/red]"
|
|
2107
|
+
)
|
|
2108
|
+
continue
|
|
2109
|
+
|
|
1101
2110
|
name = db.get("id") or db.get("name")
|
|
1102
2111
|
conn_obj = db.get("connection_ref") or db.get("connection_obj")
|
|
1103
|
-
|
|
1104
|
-
if not db.get("enable_field_map_generation", True) and not params.get(
|
|
1105
|
-
|
|
1106
|
-
|
|
2112
|
+
|
|
2113
|
+
if not db.get("enable_field_map_generation", True) and not params.get(
|
|
2114
|
+
"generation", {}
|
|
2115
|
+
).get("enable_field_maps", True):
|
|
2116
|
+
console.print(f"[dim]Skipping {name}: Field map generation disabled.[/dim]")
|
|
2117
|
+
continue
|
|
1107
2118
|
|
|
1108
2119
|
# Language Settings
|
|
1109
2120
|
source_lang = db.get("db_source_lang", "es")
|
|
@@ -1111,96 +2122,298 @@ def _run_field_map_generation(context, config_path, target_dbs, url_resolver, fo
|
|
|
1111
2122
|
|
|
1112
2123
|
# Path Composition
|
|
1113
2124
|
db_domain = db.get("db_domain")
|
|
1114
|
-
|
|
1115
|
-
if
|
|
1116
|
-
fields_root_path =
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
2125
|
+
|
|
2126
|
+
if use_legacy_construction:
|
|
2127
|
+
fields_root_path = Path(folder_prefix)
|
|
2128
|
+
if db_domain:
|
|
2129
|
+
fields_root_path = fields_root_path / db_domain
|
|
2130
|
+
fields_root_path = fields_root_path / fields_suffix
|
|
2131
|
+
else:
|
|
2132
|
+
# Modern Logic: configured_fm_dir is root
|
|
2133
|
+
fields_root_path = fields_root_path_base
|
|
2134
|
+
if db_domain:
|
|
2135
|
+
fields_root_path = fields_root_path / db_domain
|
|
2136
|
+
|
|
2137
|
+
# Resolve global whitelist path
|
|
1120
2138
|
# Resolve global whitelist path
|
|
1121
2139
|
# Try nested discovery param first, then legacy flat
|
|
2140
|
+
discovery_cfg = params.get("paths", {}).get("discovery") or params.get(
|
|
2141
|
+
"discovery", {}
|
|
2142
|
+
)
|
|
1122
2143
|
global_whitelist_file = (
|
|
1123
|
-
|
|
1124
|
-
params.get("whitelist_file")
|
|
1125
|
-
"whitelist.yaml"
|
|
2144
|
+
discovery_cfg.get("whitelist_file")
|
|
2145
|
+
or params.get("whitelist_file")
|
|
2146
|
+
or "whitelist.yaml"
|
|
1126
2147
|
)
|
|
1127
|
-
|
|
2148
|
+
|
|
2149
|
+
# If absolute, use directly (handled by gen_dc.py resolution), else resolve
|
|
1128
2150
|
# If absolute, use directly (handled by gen_dc.py resolution), else resolve
|
|
1129
2151
|
if Path(global_whitelist_file).is_absolute():
|
|
1130
2152
|
wl_path = Path(global_whitelist_file)
|
|
1131
2153
|
else:
|
|
1132
|
-
|
|
1133
|
-
|
|
2154
|
+
# Use Project Root anchoring
|
|
2155
|
+
try:
|
|
2156
|
+
# Heuristic: config is in generators/datacubes
|
|
2157
|
+
project_root = config_path.parent.parent.parent
|
|
2158
|
+
except Exception:
|
|
2159
|
+
project_root = Path.cwd()
|
|
2160
|
+
wl_path = project_root / global_whitelist_file
|
|
2161
|
+
|
|
1134
2162
|
if not wl_path.exists():
|
|
1135
|
-
console.print(
|
|
2163
|
+
console.print(
|
|
2164
|
+
f"[yellow]Skipping {name}: Whitelist {global_whitelist_file} not found.[/yellow]"
|
|
2165
|
+
)
|
|
1136
2166
|
continue
|
|
1137
|
-
|
|
2167
|
+
|
|
1138
2168
|
with open(wl_path, "r") as f:
|
|
1139
2169
|
registry_data = yaml.safe_load(f) or {}
|
|
1140
2170
|
|
|
1141
|
-
#
|
|
2171
|
+
# Load Rules for Domain Inference
|
|
2172
|
+
rules_filename = db.get("rules_file")
|
|
2173
|
+
if not rules_filename:
|
|
2174
|
+
# Check deep keys
|
|
2175
|
+
disc = params.get("paths", {}).get("discovery", {})
|
|
2176
|
+
# console.print(f"DEBUG: Discovery Block: {disc}")
|
|
2177
|
+
rules_filename = disc.get("rules_file") or params.get("discovery", {}).get(
|
|
2178
|
+
"rules_file"
|
|
2179
|
+
)
|
|
2180
|
+
|
|
2181
|
+
if not rules_filename:
|
|
2182
|
+
console.print(
|
|
2183
|
+
f"DEBUG: Fallback for {name}. Params keys: {disc.keys() if 'disc' in locals() else 'N/A'}"
|
|
2184
|
+
)
|
|
2185
|
+
rules_filename = f"discovery_rules_{conn_obj}.yaml"
|
|
2186
|
+
|
|
2187
|
+
# Resolve path relative to project root properly
|
|
2188
|
+
if Path(rules_filename).is_absolute():
|
|
2189
|
+
rules_path = Path(rules_filename)
|
|
2190
|
+
else:
|
|
2191
|
+
try:
|
|
2192
|
+
# Heuristic: config is in generators/datacubes
|
|
2193
|
+
# Reuse project_root calculation from above
|
|
2194
|
+
if "project_root" not in locals():
|
|
2195
|
+
try:
|
|
2196
|
+
project_root = config_path.parent.parent.parent
|
|
2197
|
+
except Exception:
|
|
2198
|
+
project_root = Path.cwd()
|
|
2199
|
+
|
|
2200
|
+
console.print(
|
|
2201
|
+
f"DEBUG: ConfigRoot={config_path.parent}, ProjRoot={project_root}, RulesFile={rules_filename}"
|
|
2202
|
+
)
|
|
2203
|
+
rules_path = project_root / rules_filename
|
|
2204
|
+
except Exception:
|
|
2205
|
+
rules_path = config_path.parent / rules_filename
|
|
2206
|
+
|
|
2207
|
+
rules = []
|
|
2208
|
+
# console.print(f"DEBUG: Checking rules path: {rules_path}")
|
|
2209
|
+
if rules_path.exists():
|
|
2210
|
+
with open(rules_path, "r") as f:
|
|
2211
|
+
rules = yaml.safe_load(f) or []
|
|
2212
|
+
|
|
2213
|
+
# PROPERLY HANDLE DICT vs LIST RULES
|
|
2214
|
+
if isinstance(rules, dict):
|
|
2215
|
+
# Try to find specific rules for this connection
|
|
2216
|
+
candidates = [conn_obj, name]
|
|
2217
|
+
found = False
|
|
2218
|
+
for key in candidates:
|
|
2219
|
+
if key in rules:
|
|
2220
|
+
rules = rules[key]
|
|
2221
|
+
found = True
|
|
2222
|
+
break
|
|
2223
|
+
if not found:
|
|
2224
|
+
rules = []
|
|
2225
|
+
|
|
2226
|
+
# console.print(f"DEBUG: Loaded {len(rules)} rules from {rules_path} for {conn_obj}")
|
|
2227
|
+
|
|
2228
|
+
# console.print(f"DEBUG: Loaded {len(rules)} rules from {rules_path}")
|
|
2229
|
+
|
|
2230
|
+
# Support List or Dict Format
|
|
1142
2231
|
scoped_data = registry_data.get(conn_obj, {})
|
|
1143
|
-
|
|
2232
|
+
if isinstance(scoped_data, list):
|
|
2233
|
+
tables = {t: {} for t in scoped_data}
|
|
2234
|
+
else:
|
|
2235
|
+
tables = scoped_data.get("tables", {})
|
|
2236
|
+
|
|
1144
2237
|
if not tables:
|
|
1145
|
-
|
|
1146
|
-
|
|
2238
|
+
console.print(f"[dim]No tables in whitelist for {name}.[/dim]")
|
|
2239
|
+
continue
|
|
2240
|
+
|
|
2241
|
+
# Override paths from whitelist if present
|
|
2242
|
+
# This allows whitelist to drive output location efficiently
|
|
2243
|
+
# Retrieve db_domain from whitelist entry (preferred) or registry config
|
|
2244
|
+
# Hoisted from loop for Clean Build capability
|
|
2245
|
+
wl_entry = registry_data.get(conn_obj, {})
|
|
2246
|
+
db_domain = wl_entry.get("db_domain") or db.get("db_domain") or db.get("name")
|
|
2247
|
+
|
|
2248
|
+
wl_fields_dir = wl_entry.get("field_maps_dir")
|
|
2249
|
+
if wl_fields_dir:
|
|
2250
|
+
# Resolve relative to project root
|
|
2251
|
+
if Path(wl_fields_dir).is_absolute():
|
|
2252
|
+
fields_root_path = Path(wl_fields_dir)
|
|
2253
|
+
else:
|
|
2254
|
+
# Use cached or recalculated project_root
|
|
2255
|
+
if "project_root" not in locals():
|
|
2256
|
+
try:
|
|
2257
|
+
project_root = config_path.parent.parent.parent
|
|
2258
|
+
except Exception:
|
|
2259
|
+
project_root = Path.cwd()
|
|
2260
|
+
fields_root_path = project_root / wl_fields_dir
|
|
2261
|
+
else:
|
|
2262
|
+
# Fallback to global setting (calculated earlier, but we need to re-calc if not in scope or just reuse?)
|
|
2263
|
+
|
|
2264
|
+
# Logic: modern nested > legacy flat > default
|
|
2265
|
+
folder_prefix = (
|
|
2266
|
+
params.get("paths", {}).get("target", {}).get("datacubes_dir")
|
|
2267
|
+
or params.get("folder_prefix")
|
|
2268
|
+
or "solutions/dataobjects/gencubes/"
|
|
2269
|
+
)
|
|
2270
|
+
fields_suffix = (
|
|
2271
|
+
params.get("generation", {}).get("fields_subpackage")
|
|
2272
|
+
or params.get("fields_module_root")
|
|
2273
|
+
or "fields"
|
|
2274
|
+
)
|
|
2275
|
+
# Resolve
|
|
2276
|
+
if not Path(folder_prefix).is_absolute():
|
|
2277
|
+
try:
|
|
2278
|
+
prj_root = config_path.parent.parent.parent
|
|
2279
|
+
except Exception:
|
|
2280
|
+
prj_root = Path.cwd()
|
|
2281
|
+
folder_prefix = str(prj_root / folder_prefix)
|
|
2282
|
+
|
|
2283
|
+
# Construct
|
|
2284
|
+
fields_root_path = fields_root_path_base
|
|
2285
|
+
|
|
2286
|
+
# Ensure Parent Chain has __init__.py (Backtrack to root)
|
|
2287
|
+
try:
|
|
2288
|
+
# Make sure fields_root_path itself exists
|
|
2289
|
+
fields_root_path.mkdir(parents=True, exist_ok=True)
|
|
2290
|
+
if not (fields_root_path / "__init__.py").exists():
|
|
2291
|
+
(fields_root_path / "__init__.py").touch()
|
|
2292
|
+
|
|
2293
|
+
curr = fields_root_path.parent
|
|
2294
|
+
# Stop at project root (.) or root (/)
|
|
2295
|
+
while str(curr) != "." and str(curr) != "/" and len(curr.parts) > 0:
|
|
2296
|
+
if not (curr / "__init__.py").exists():
|
|
2297
|
+
(curr / "__init__.py").touch()
|
|
2298
|
+
curr = curr.parent
|
|
2299
|
+
except Exception:
|
|
2300
|
+
pass
|
|
2301
|
+
|
|
2302
|
+
# This is safe because db_domain is specific to this connection.
|
|
2303
|
+
db_target_root = fields_root_path / db_domain
|
|
2304
|
+
|
|
2305
|
+
# Ensure Root Exists
|
|
2306
|
+
db_target_root.mkdir(parents=True, exist_ok=True)
|
|
2307
|
+
if not (db_target_root / "__init__.py").exists():
|
|
2308
|
+
(db_target_root / "__init__.py").touch()
|
|
2309
|
+
|
|
2310
|
+
console.print(
|
|
2311
|
+
f"[bold cyan]Mapping fields for {name} ({len(tables)} tables) [Src:{source_lang} -> Tgt:{target_lang}]...[/bold cyan]"
|
|
2312
|
+
)
|
|
1147
2313
|
|
|
1148
|
-
console.print(f"[bold cyan]Mapping fields for {name} ({len(tables)} tables) [Src:{source_lang} -> Tgt:{target_lang}]...[/bold cyan]")
|
|
1149
|
-
|
|
1150
2314
|
try:
|
|
1151
2315
|
db_url = url_resolver(conn_obj, db.get("global_import"))
|
|
1152
2316
|
engine = sa.create_engine(db_url)
|
|
1153
2317
|
inspector = inspect(engine)
|
|
1154
|
-
|
|
2318
|
+
|
|
1155
2319
|
for table_name, details in tables.items():
|
|
1156
|
-
|
|
1157
|
-
|
|
2320
|
+
domain = details.get("domain")
|
|
2321
|
+
|
|
2322
|
+
# If domain is missing (e.g. manual entry without rule), try fallback inference
|
|
2323
|
+
if not domain:
|
|
2324
|
+
# Infer from rules
|
|
2325
|
+
for r in rules:
|
|
2326
|
+
pat = r.get("pattern")
|
|
2327
|
+
mtype = r.get("match_type", "exact")
|
|
2328
|
+
is_match = False
|
|
2329
|
+
if mtype == "exact" and table_name == pat:
|
|
2330
|
+
is_match = True
|
|
2331
|
+
elif mtype == "prefix" and table_name.startswith(pat):
|
|
2332
|
+
is_match = True
|
|
2333
|
+
elif mtype == "regex":
|
|
2334
|
+
import re
|
|
2335
|
+
|
|
2336
|
+
if re.search(pat, table_name):
|
|
2337
|
+
is_match = True
|
|
2338
|
+
|
|
2339
|
+
if is_match:
|
|
2340
|
+
domain = r.get("domain")
|
|
2341
|
+
# console.print(f"DEBUG: {table_name} matched rule {pat} -> {domain}")
|
|
2342
|
+
break
|
|
2343
|
+
|
|
2344
|
+
if not domain:
|
|
2345
|
+
# console.print(f"DEBUG: No match for {table_name}, defaulting to common")
|
|
2346
|
+
domain = "common"
|
|
2347
|
+
|
|
2348
|
+
# Target Resolution
|
|
2349
|
+
explicit_fm_path = details.get("field_map_path")
|
|
2350
|
+
if explicit_fm_path:
|
|
2351
|
+
# Use Explicit Path from Whitelist
|
|
2352
|
+
if Path(explicit_fm_path).is_absolute():
|
|
2353
|
+
target_file = Path(explicit_fm_path)
|
|
2354
|
+
else:
|
|
2355
|
+
# Ensure project_root is available or fallback to cwd
|
|
2356
|
+
if "project_root" not in locals():
|
|
2357
|
+
try:
|
|
2358
|
+
project_root = config_path.parent.parent.parent
|
|
2359
|
+
except:
|
|
2360
|
+
project_root = Path.cwd()
|
|
2361
|
+
target_file = project_root / explicit_fm_path
|
|
2362
|
+
|
|
2363
|
+
target_dir = target_file.parent
|
|
2364
|
+
else:
|
|
2365
|
+
# Fallback Logic
|
|
2366
|
+
# Target Dir: fields/{db_domain}/{domain}
|
|
2367
|
+
target_dir = db_target_root / domain
|
|
2368
|
+
target_file = target_dir / f"{table_name}.py"
|
|
1158
2369
|
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
# Target Dir
|
|
1162
|
-
target_dir = fields_root_path / domain
|
|
2370
|
+
# Output Initialization
|
|
1163
2371
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
1164
2372
|
if not (target_dir / "__init__.py").exists():
|
|
1165
2373
|
(target_dir / "__init__.py").touch()
|
|
1166
|
-
|
|
1167
|
-
# Python Target File
|
|
1168
|
-
target_file = target_dir / f"{table_name}.py"
|
|
1169
|
-
|
|
2374
|
+
|
|
1170
2375
|
# Skip if exists and not force
|
|
1171
2376
|
if target_file.exists() and not force:
|
|
1172
2377
|
# console.print(f"DEBUG: Skipping existing {table_name}")
|
|
1173
2378
|
continue
|
|
1174
|
-
|
|
2379
|
+
|
|
1175
2380
|
try:
|
|
1176
|
-
|
|
2381
|
+
# Fix for SA 2.0 / Clickhouse: inspect connection, not engine
|
|
2382
|
+
with engine.connect() as conn:
|
|
2383
|
+
cols = inspect(conn).get_columns(table_name)
|
|
1177
2384
|
# console.print(f"DEBUG: Generating {table_name} with {len(cols)} columns")
|
|
1178
2385
|
field_map = {}
|
|
1179
|
-
|
|
1180
|
-
full_table_name = f"{domain}.{table_name}"
|
|
1181
|
-
|
|
2386
|
+
|
|
2387
|
+
full_table_name = f"{db_domain}.{domain}.{table_name}"
|
|
2388
|
+
|
|
1182
2389
|
for c in cols:
|
|
1183
2390
|
col_name = c["name"]
|
|
1184
2391
|
col_type = str(c["type"])
|
|
1185
|
-
|
|
2392
|
+
|
|
1186
2393
|
# 1. Register / Get Canonical Field
|
|
1187
|
-
trans_msg = manager.register_field(
|
|
2394
|
+
trans_msg = manager.register_field(
|
|
2395
|
+
col_name, col_type, full_table_name
|
|
2396
|
+
)
|
|
1188
2397
|
if trans_msg:
|
|
1189
2398
|
console.print(f" [dim]{trans_msg}[/dim]")
|
|
1190
|
-
|
|
2399
|
+
|
|
1191
2400
|
# 2. Get Field Definition
|
|
1192
2401
|
fid = manager._generate_id(col_name, col_type)
|
|
1193
2402
|
field_def = manager.fields.get(fid)
|
|
1194
|
-
|
|
2403
|
+
|
|
1195
2404
|
if field_def:
|
|
1196
2405
|
# 3. Translate if needed
|
|
1197
2406
|
if target_lang not in field_def.aliases:
|
|
1198
2407
|
manager.translate_alias(fid, target_lang)
|
|
1199
|
-
|
|
2408
|
+
|
|
1200
2409
|
# 4. Determine Target Column
|
|
1201
|
-
target_alias =
|
|
2410
|
+
target_alias = (
|
|
2411
|
+
field_def.aliases.get(target_lang)
|
|
2412
|
+
or field_def.aliases.get("en")
|
|
2413
|
+
or col_name
|
|
2414
|
+
)
|
|
1202
2415
|
target_col = manager.generate_target_column(target_alias)
|
|
1203
|
-
|
|
2416
|
+
|
|
1204
2417
|
if not field_def.target_column:
|
|
1205
2418
|
field_def.target_column = target_col
|
|
1206
2419
|
else:
|
|
@@ -1220,27 +2433,36 @@ def _run_field_map_generation(context, config_path, target_dbs, url_resolver, fo
|
|
|
1220
2433
|
for col_name in field_map.keys():
|
|
1221
2434
|
lines.append(f' "{col_name}",')
|
|
1222
2435
|
lines.append("]")
|
|
1223
|
-
|
|
2436
|
+
|
|
1224
2437
|
lines.append("")
|
|
1225
|
-
lines.append(
|
|
1226
|
-
|
|
2438
|
+
lines.append(
|
|
2439
|
+
f'field_map: Mapping[str, str] = FieldMapFactory.create("{full_table_name}", COLUMNS)'
|
|
2440
|
+
)
|
|
2441
|
+
|
|
1227
2442
|
# Generate metadata call
|
|
1228
2443
|
lines.append("")
|
|
1229
|
-
lines.append(
|
|
1230
|
-
|
|
2444
|
+
lines.append(
|
|
2445
|
+
f'metadata: Mapping[str, Any] = FieldMapFactory.create_metadata("{full_table_name}", COLUMNS)'
|
|
2446
|
+
)
|
|
2447
|
+
|
|
1231
2448
|
with open(target_file, "w") as f:
|
|
1232
2449
|
f.write("\n".join(lines))
|
|
1233
|
-
|
|
2450
|
+
|
|
1234
2451
|
except Exception as e:
|
|
1235
|
-
console.print(f"[red]Error
|
|
1236
|
-
|
|
1237
|
-
|
|
2452
|
+
console.print(f"[red]Error processing {table_name}: {e}[/red]")
|
|
2453
|
+
continue
|
|
2454
|
+
|
|
1238
2455
|
except Exception as e:
|
|
1239
|
-
|
|
2456
|
+
console.print(f"[red]Error connecting/inspecting DB {name}: {e}[/red]")
|
|
1240
2457
|
|
|
1241
|
-
# Save Global
|
|
1242
|
-
|
|
1243
|
-
|
|
2458
|
+
# Save Updates to Global Repository
|
|
2459
|
+
try:
|
|
2460
|
+
manager.save_to_yaml(global_repo_path)
|
|
2461
|
+
console.print(
|
|
2462
|
+
f"[green]Updated Global Field Repository at {global_repo_path} ({len(manager.fields)} fields)[/green]"
|
|
2463
|
+
)
|
|
2464
|
+
except Exception as e:
|
|
2465
|
+
console.print(f"[red]Failed to save Global Field Repository: {e}[/red]")
|
|
1244
2466
|
|
|
1245
2467
|
|
|
1246
2468
|
if __name__ == "__main__":
|