sibi-flux 2026.1.9__py3-none-any.whl → 2026.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_flux/datacube/cli.py +96 -44
- sibi_flux/datacube/config_engine.py +9 -1
- {sibi_flux-2026.1.9.dist-info → sibi_flux-2026.1.10.dist-info}/METADATA +1 -1
- {sibi_flux-2026.1.9.dist-info → sibi_flux-2026.1.10.dist-info}/RECORD +6 -6
- {sibi_flux-2026.1.9.dist-info → sibi_flux-2026.1.10.dist-info}/WHEEL +0 -0
- {sibi_flux-2026.1.9.dist-info → sibi_flux-2026.1.10.dist-info}/entry_points.txt +0 -0
sibi_flux/datacube/cli.py
CHANGED
|
@@ -1057,20 +1057,31 @@ def scan(
|
|
|
1057
1057
|
|
|
1058
1058
|
# Resolve global output file
|
|
1059
1059
|
# First check nested discovery block (standard), then root (fallback)
|
|
1060
|
-
|
|
1060
|
+
# Resolve global output file
|
|
1061
|
+
# First check nested discovery block (standard), then root (fallback)
|
|
1062
|
+
discovery_conf = params.get("paths", {}).get("discovery") or params.get("discovery", {})
|
|
1061
1063
|
global_tables_file = (
|
|
1062
1064
|
discovery_conf.get("all_tables_file")
|
|
1063
1065
|
or params.get("all_tables_file")
|
|
1064
|
-
or "all_tables.yaml"
|
|
1066
|
+
or "dataobjects/globals/all_tables.yaml"
|
|
1065
1067
|
)
|
|
1066
1068
|
|
|
1067
1069
|
# If using absolute path (resolved from context), use it directly
|
|
1068
|
-
# Otherwise treat as relative to config file location
|
|
1070
|
+
# Otherwise treat as relative to PROJECT ROOT (not config file location)
|
|
1069
1071
|
path_obj = Path(global_tables_file)
|
|
1070
1072
|
if path_obj.is_absolute():
|
|
1071
1073
|
global_tables_path = path_obj
|
|
1072
1074
|
else:
|
|
1073
|
-
|
|
1075
|
+
try:
|
|
1076
|
+
# Heuristic: config is in generators/datacubes
|
|
1077
|
+
project_root = config_path.parent.parent.parent
|
|
1078
|
+
except Exception:
|
|
1079
|
+
project_root = Path.cwd()
|
|
1080
|
+
global_tables_path = project_root / global_tables_file
|
|
1081
|
+
|
|
1082
|
+
# Ensure dir exists
|
|
1083
|
+
if not global_tables_path.parent.exists():
|
|
1084
|
+
global_tables_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1074
1085
|
|
|
1075
1086
|
# Load existing data to preserve config for DBs not being scanned
|
|
1076
1087
|
all_tables_data = {}
|
|
@@ -1245,6 +1256,8 @@ def propose_rules(
|
|
|
1245
1256
|
dry_run: bool = typer.Option(
|
|
1246
1257
|
False, "--dry-run", help="Preview rules without saving"
|
|
1247
1258
|
),
|
|
1259
|
+
conf_override: Optional[str] = typer.Option(None, "--rules", help="Override rules file"),
|
|
1260
|
+
force: bool = typer.Option(False, "--force", "-f", help="Force overwrite existing rules"),
|
|
1248
1261
|
):
|
|
1249
1262
|
"""
|
|
1250
1263
|
Analyzes all_tables.yaml and proposes new discovery rules.
|
|
@@ -1261,23 +1274,35 @@ def propose_rules(
|
|
|
1261
1274
|
# The resolved config will have absolute paths for these if the helper worked.
|
|
1262
1275
|
params = context.params
|
|
1263
1276
|
|
|
1264
|
-
# Prefer resolved values if available
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1277
|
+
# Prefer resolved values if available, but check nested 'paths' structure first
|
|
1278
|
+
# Structure: paths -> discovery -> all_tables_file
|
|
1279
|
+
discovery_conf = params.get("paths", {}).get("discovery") or params.get("discovery", {})
|
|
1280
|
+
all_tables_file = (
|
|
1281
|
+
discovery_conf.get("all_tables_file")
|
|
1282
|
+
or params.get("all_tables_file")
|
|
1283
|
+
or "dataobjects/globals/all_tables.yaml"
|
|
1284
|
+
)
|
|
1285
|
+
|
|
1286
|
+
path_obj = Path(all_tables_file)
|
|
1287
|
+
if path_obj.is_absolute():
|
|
1288
|
+
all_tables_path = path_obj
|
|
1270
1289
|
else:
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1290
|
+
try:
|
|
1291
|
+
# Heuristic: config is in generators/datacubes
|
|
1292
|
+
project_root = config_path.parent.parent.parent
|
|
1293
|
+
except Exception:
|
|
1294
|
+
project_root = Path.cwd()
|
|
1295
|
+
all_tables_path = project_root / all_tables_file
|
|
1296
|
+
|
|
1297
|
+
# Verify Existence
|
|
1298
|
+
if not all_tables_path.exists():
|
|
1299
|
+
console.print(f"[red]Error: {all_tables_path} not found. Run 'dc scan' first.[/red]")
|
|
1300
|
+
raise typer.Exit(code=1)
|
|
1278
1301
|
|
|
1279
1302
|
# Rules File
|
|
1280
|
-
if
|
|
1303
|
+
if conf_override:
|
|
1304
|
+
rules_path = Path(conf_override)
|
|
1305
|
+
elif "discovery" in resolved_config and "rules_file" in resolved_config["discovery"]:
|
|
1281
1306
|
rules_path = Path(resolved_config["discovery"]["rules_file"])
|
|
1282
1307
|
else:
|
|
1283
1308
|
raw_rules = (
|
|
@@ -1287,11 +1312,12 @@ def propose_rules(
|
|
|
1287
1312
|
)
|
|
1288
1313
|
rules_path = config_path.parent / raw_rules
|
|
1289
1314
|
|
|
1290
|
-
if
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
)
|
|
1294
|
-
|
|
1315
|
+
# Guard: Require Force if Rules Exist
|
|
1316
|
+
if rules_path.exists() and not force and not dry_run:
|
|
1317
|
+
console.print(f"[yellow]Rules file exists at {rules_path}.[/yellow]")
|
|
1318
|
+
console.print("[yellow]Skipping proposal generation to protect manual edits.[/yellow]")
|
|
1319
|
+
console.print("[dim]Use --force to overwrite/update rules.[/dim]")
|
|
1320
|
+
return
|
|
1295
1321
|
|
|
1296
1322
|
engine = RuleEngine(all_tables_path, rules_path)
|
|
1297
1323
|
engine.load()
|
|
@@ -1596,7 +1622,7 @@ def workflow(
|
|
|
1596
1622
|
# Step 0: Scan & Propose Rules (Critical for new projects/drift)
|
|
1597
1623
|
console.print(f"\n[bold cyan]Step 0a: Scan (Introspection)[/]")
|
|
1598
1624
|
scan(
|
|
1599
|
-
config_file=config_file,
|
|
1625
|
+
config_file=config_file if config_file else None,
|
|
1600
1626
|
db_url_map=db_url_map,
|
|
1601
1627
|
env_file=env_file,
|
|
1602
1628
|
db_name=db_name,
|
|
@@ -1604,8 +1630,10 @@ def workflow(
|
|
|
1604
1630
|
|
|
1605
1631
|
console.print(f"\n[bold cyan]Step 0b: Propose Rules[/]")
|
|
1606
1632
|
propose_rules(
|
|
1607
|
-
config_file=config_file,
|
|
1608
|
-
dry_run=dry_run,
|
|
1633
|
+
config_file=config_file if config_file else None,
|
|
1634
|
+
dry_run=dry_run,
|
|
1635
|
+
conf_override=None,
|
|
1636
|
+
force=force,
|
|
1609
1637
|
)
|
|
1610
1638
|
|
|
1611
1639
|
console.print(f"\n[bold cyan]Step 0c: Generate Whitelist[/]")
|
|
@@ -1936,8 +1964,38 @@ def whitelist(
|
|
|
1936
1964
|
)
|
|
1937
1965
|
continue
|
|
1938
1966
|
|
|
1939
|
-
|
|
1940
|
-
tables =
|
|
1967
|
+
# 0. Try to load tables from all_tables.yaml (Scan Output)
|
|
1968
|
+
tables = []
|
|
1969
|
+
all_tables_file = (
|
|
1970
|
+
params.get("paths", {}).get("discovery", {}).get("all_tables_file")
|
|
1971
|
+
or params.get("all_tables_file")
|
|
1972
|
+
or "dataobjects/globals/all_tables.yaml"
|
|
1973
|
+
)
|
|
1974
|
+
# Resolve path
|
|
1975
|
+
if not Path(all_tables_file).is_absolute():
|
|
1976
|
+
try:
|
|
1977
|
+
prj_root = config_path.parent.parent.parent
|
|
1978
|
+
except Exception:
|
|
1979
|
+
prj_root = Path.cwd()
|
|
1980
|
+
all_tables_path = prj_root / all_tables_file
|
|
1981
|
+
else:
|
|
1982
|
+
all_tables_path = Path(all_tables_file)
|
|
1983
|
+
|
|
1984
|
+
if all_tables_path.exists():
|
|
1985
|
+
try:
|
|
1986
|
+
with open(all_tables_path, "r") as f:
|
|
1987
|
+
all_tables_data = yaml.safe_load(f) or {}
|
|
1988
|
+
tables = all_tables_data.get(conn_obj) or []
|
|
1989
|
+
except Exception:
|
|
1990
|
+
pass
|
|
1991
|
+
|
|
1992
|
+
if tables:
|
|
1993
|
+
console.print(f"[green]Loaded {len(tables)} tables from {all_tables_file} for {conn_obj}.[/green]")
|
|
1994
|
+
else:
|
|
1995
|
+
# Fallback to Live Introspection
|
|
1996
|
+
console.print(f"[yellow]Warning: {conn_obj} not found in all_tables.yaml (or file missing). Falling back to live DB introspection.[/yellow]")
|
|
1997
|
+
insp = inspect(engine)
|
|
1998
|
+
tables = insp.get_table_names()
|
|
1941
1999
|
|
|
1942
2000
|
# Determine path (Config Driven)
|
|
1943
2001
|
discovery_cfg = params.get("paths", {}).get("discovery") or params.get(
|
|
@@ -2123,25 +2181,19 @@ def whitelist(
|
|
|
2123
2181
|
# Construct new table map
|
|
2124
2182
|
new_table_map = {}
|
|
2125
2183
|
|
|
2126
|
-
# 1. Retained: Merge existing with new rule metadata
|
|
2127
|
-
# Priority: Existing (Manual) > New (Rule)
|
|
2128
|
-
# BUT: Enforce calculated paths to avoid stale absolute paths
|
|
2129
2184
|
for t in retained:
|
|
2130
|
-
existing_meta = existing_tables_map
|
|
2185
|
+
existing_meta = existing_tables_map.get(t, {})
|
|
2131
2186
|
rule_meta = filtered_tables[t]
|
|
2132
|
-
# Merge: update rule defaults only if not set in existing
|
|
2133
|
-
merged = rule_meta.copy()
|
|
2134
|
-
merged.update(existing_meta) # Existing overwrites rule
|
|
2135
2187
|
|
|
2136
|
-
#
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
if
|
|
2143
|
-
merged["
|
|
2144
|
-
|
|
2188
|
+
# Strict Rewrite Logic (User Request)
|
|
2189
|
+
# We ONLY preserve 'custom_name' from existing entries.
|
|
2190
|
+
# All other metadata (domain, template, paths) must come strictly from rules/calculation.
|
|
2191
|
+
custom_name = existing_meta.get("custom_name")
|
|
2192
|
+
|
|
2193
|
+
merged = rule_meta.copy()
|
|
2194
|
+
if custom_name:
|
|
2195
|
+
merged["custom_name"] = custom_name
|
|
2196
|
+
|
|
2145
2197
|
new_table_map[t] = merged
|
|
2146
2198
|
|
|
2147
2199
|
# 2. Add new
|
|
@@ -89,10 +89,16 @@ class ConfigurationEngine:
|
|
|
89
89
|
# Context not found and no default key
|
|
90
90
|
rules = []
|
|
91
91
|
|
|
92
|
+
# Sort Rules by Specificity (Length Descending)
|
|
93
|
+
# This prevents short prefixes (e.g. 'data_') from shadowing longer ones or exact matches if they overlap
|
|
94
|
+
# though usually exact matches don't overlap with delimiter-based prefixes.
|
|
95
|
+
# But for 'user_' vs 'use_', 'user_' (len 5) should come before 'use_' (len 4).
|
|
96
|
+
sorted_rules = sorted(rules, key=lambda x: len(x.get("pattern", "")), reverse=True)
|
|
97
|
+
|
|
92
98
|
return GeneratorConfig(
|
|
93
99
|
version=1.2,
|
|
94
100
|
settings=GlobalSettings(**settings_data),
|
|
95
|
-
discovery_rules=[DiscoveryRule(**r) for r in
|
|
101
|
+
discovery_rules=[DiscoveryRule(**r) for r in sorted_rules],
|
|
96
102
|
)
|
|
97
103
|
|
|
98
104
|
def _resolve_cubes_root(self) -> str:
|
|
@@ -213,6 +219,8 @@ class ConfigurationEngine:
|
|
|
213
219
|
"field_map": field_map_module, # 'field_map' template logic relies on discovery result often
|
|
214
220
|
"connection_obj": conn_obj,
|
|
215
221
|
"class_name": class_name,
|
|
222
|
+
# Pass template through for downstream consumers (e.g. whitelist generator)
|
|
223
|
+
"output_template": rule.output_template,
|
|
216
224
|
}
|
|
217
225
|
|
|
218
226
|
|
|
@@ -23,8 +23,8 @@ sibi_flux/dask_cluster/exceptions.py,sha256=apQZaUMgac8k2ZTTsvUd-VlWdo9-Nrh5b6St
|
|
|
23
23
|
sibi_flux/dask_cluster/utils.py,sha256=Pr2qaow6GVyvM0hqKSM0ZQpe2Ot5ayfGYQiNhNpYA8Y,1342
|
|
24
24
|
sibi_flux/datacube/__init__.py,sha256=ODEoa4r5RtzynIp-PdVDaJ-4BcPBj1L9VkLIF7RYSPE,91
|
|
25
25
|
sibi_flux/datacube/_data_cube.py,sha256=Ofgy3JlR7N0eKijpUzI-ixFlISUd3CFsxnKd6a4fguE,12629
|
|
26
|
-
sibi_flux/datacube/cli.py,sha256=
|
|
27
|
-
sibi_flux/datacube/config_engine.py,sha256=
|
|
26
|
+
sibi_flux/datacube/cli.py,sha256=YGaJQBnGQMbrZ82MKF2FbQIpAiA0SCKt-y-SXvP-Aqs,105823
|
|
27
|
+
sibi_flux/datacube/config_engine.py,sha256=VZnEFs7JziWCZ-UyZadENbFNcRT1LDG-pFJ0hEe5hvc,8923
|
|
28
28
|
sibi_flux/datacube/field_factory.py,sha256=Z3Yp6tGzrZ13rvKSjMFr9jvW7fazeNi4K1fAalxLujM,6376
|
|
29
29
|
sibi_flux/datacube/field_mapper.py,sha256=V6aFYunl28DI7gSvrF7tcidPNX9QtOYymVxbumzQqPs,9334
|
|
30
30
|
sibi_flux/datacube/field_registry.py,sha256=VBTqxNIn2-eWMiZi1oQK-719oyJfn3yhfV7Bz_0iNyU,4355
|
|
@@ -123,7 +123,7 @@ sibi_flux/utils/file_utils.py,sha256=7OHUW65OTe6HlQ6wkDagDd7d0SCQ_-NEGmHlOJguKYw
|
|
|
123
123
|
sibi_flux/utils/filepath_generator/__init__.py,sha256=YVFJhIewjwksb9E2t43ojNC-W_AqUDhkKxQVBIBMkY8,91
|
|
124
124
|
sibi_flux/utils/filepath_generator/_filepath_generator.py,sha256=4HG-Ubvjtv6luL0z-A-8B6_r3o9YqBwATFXhOXiTbKc,6789
|
|
125
125
|
sibi_flux/utils/retry.py,sha256=45t0MF2IoMayN9xkn5_FtakMq4HwZlGvHVd6qv8x1AY,1227
|
|
126
|
-
sibi_flux-2026.1.
|
|
127
|
-
sibi_flux-2026.1.
|
|
128
|
-
sibi_flux-2026.1.
|
|
129
|
-
sibi_flux-2026.1.
|
|
126
|
+
sibi_flux-2026.1.10.dist-info/WHEEL,sha256=ZyFSCYkV2BrxH6-HRVRg3R9Fo7MALzer9KiPYqNxSbo,79
|
|
127
|
+
sibi_flux-2026.1.10.dist-info/entry_points.txt,sha256=6xrq5zuz_8wodJj4s49raopnuC3Owy_leZRkWtcXpTk,49
|
|
128
|
+
sibi_flux-2026.1.10.dist-info/METADATA,sha256=jhq2c4QSQkZfHh5-NOtlz5o9btdYM6FOTXqvWLK45os,9887
|
|
129
|
+
sibi_flux-2026.1.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|