DKOps 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DKOps/__init__.py +0 -0
- DKOps/environment_config.py +385 -0
- DKOps/launcher.py +368 -0
- DKOps/logger_config.py +299 -0
- DKOps/table_governance/__init__.py +34 -0
- DKOps/table_governance/contracts/__init__.py +0 -0
- DKOps/table_governance/contracts/loader.py +669 -0
- DKOps/table_governance/contracts/validator.py +269 -0
- DKOps/table_governance/migrations/__init__.py +0 -0
- DKOps/table_governance/migrations/safe_migrator.py +270 -0
- DKOps/table_governance/readers/__init__.py +3 -0
- DKOps/table_governance/readers/table_reader.py +297 -0
- DKOps/table_governance/writers/__init__.py +15 -0
- DKOps/table_governance/writers/append_writer.py +42 -0
- DKOps/table_governance/writers/base_writer.py +384 -0
- DKOps/table_governance/writers/create_writer.py +91 -0
- DKOps/table_governance/writers/delete_writer.py +111 -0
- DKOps/table_governance/writers/partition_writer.py +75 -0
- DKOps/table_governance/writers/table_writer.py +162 -0
- DKOps/table_governance/writers/upsert_writer.py +110 -0
- dkops-0.1.0.dist-info/METADATA +406 -0
- dkops-0.1.0.dist-info/RECORD +25 -0
- dkops-0.1.0.dist-info/WHEEL +5 -0
- dkops-0.1.0.dist-info/licenses/LICENSE +21 -0
- dkops-0.1.0.dist-info/top_level.txt +1 -0
DKOps/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
"""
|
|
2
|
+
environment_config.py
|
|
3
|
+
=====================
|
|
4
|
+
Configuración de ambiente leída directamente desde el dict del config.json
|
|
5
|
+
que ya carga Launcher. No lee ningún archivo externo.
|
|
6
|
+
|
|
7
|
+
Estructura esperada en config.json
|
|
8
|
+
------------------------------------
|
|
9
|
+
{
|
|
10
|
+
"EXECUTION_ENVIRONMENT": "local",
|
|
11
|
+
...
|
|
12
|
+
|
|
13
|
+
"environments": {
|
|
14
|
+
"2370424844216896": { ← workspace_id real (Databricks)
|
|
15
|
+
"env": "dev",
|
|
16
|
+
"env_short": "d",
|
|
17
|
+
"workspace_host": "https://adb-2370424844216896.azuredatabricks.net",
|
|
18
|
+
"catalogs": {
|
|
19
|
+
"bronze": "ct_bronze_dlsuraanaliticadev",
|
|
20
|
+
"silver": "ct_silver_dlsuraanaliticadev",
|
|
21
|
+
"gold": "ct_gold_dlsuraanaliticadev"
|
|
22
|
+
},
|
|
23
|
+
"storage_accounts": {
|
|
24
|
+
"default": "dlsuraanaliticadev",
|
|
25
|
+
"raw": "dlsuraanaliticadevraw"
|
|
26
|
+
},
|
|
27
|
+
"paths": {
|
|
28
|
+
"raw": "abfss://raw@dlsuraanaliticadev.dfs.core.windows.net",
|
|
29
|
+
"curated": "abfss://curated@dlsuraanaliticadev.dfs.core.windows.net",
|
|
30
|
+
"archive": "abfss://archive@dlsuraanaliticadev.dfs.core.windows.net"
|
|
31
|
+
},
|
|
32
|
+
"secrets": { "scope": "kv-dev" },
|
|
33
|
+
"tags": {
|
|
34
|
+
"environment": "dev",
|
|
35
|
+
"cost_center": "CC-1001",
|
|
36
|
+
"team": "data-engineering"
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
"7042033821150253": { ... } ← workspace_id prod
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
Detección del ambiente (cascada)
|
|
44
|
+
---------------------------------
|
|
45
|
+
En Databricks → get_context().workspaceId → busca en environments por ID
|
|
46
|
+
En local → DATABRICKS_TARGET=dev → busca por valor de "env" en cada entrada
|
|
47
|
+
|
|
48
|
+
Secrets
|
|
49
|
+
-------
|
|
50
|
+
En Databricks → dbutils.secrets.get(scope, key)
|
|
51
|
+
En local → SECRET_<key> en .env.<env> o .env (nunca commitear)
|
|
52
|
+
|
|
53
|
+
Uso desde Launcher (no instanciar directamente)
|
|
54
|
+
------------------------------------------------
|
|
55
|
+
launcher = Launcher("config.json")
|
|
56
|
+
env = launcher.env
|
|
57
|
+
|
|
58
|
+
env.get_catalog("bronze") → "ct_bronze_dlsuraanaliticadev"
|
|
59
|
+
env.get_path("raw") → "abfss://raw@..."
|
|
60
|
+
env.get_secret("jdbc_password") → dbutils o .env según runtime
|
|
61
|
+
env.get_var("tags.cost_center") → "CC-1001"
|
|
62
|
+
env.summary() → dict completo del ambiente activo
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
import os
|
|
66
|
+
from pathlib import Path
|
|
67
|
+
from typing import Any
|
|
68
|
+
|
|
69
|
+
from DKOps.logger_config import LoggableMixin
|
|
70
|
+
|
|
71
|
+
_ENV_VAR_TARGET = "DATABRICKS_TARGET"
|
|
72
|
+
_SECRET_PREFIX = "SECRET_"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class EnvironmentConfig(LoggableMixin):
|
|
76
|
+
"""
|
|
77
|
+
Configuración del ambiente activo.
|
|
78
|
+
Recibe el dict completo de config.json desde Launcher.
|
|
79
|
+
|
|
80
|
+
Parámetros
|
|
81
|
+
----------
|
|
82
|
+
config : dict completo cargado desde config.json (self.config en Launcher)
|
|
83
|
+
is_databricks: True si estamos corriendo dentro de un cluster Databricks
|
|
84
|
+
env_file : ruta explícita al .env local (opcional)
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
config: dict,
|
|
90
|
+
is_databricks: bool = False,
|
|
91
|
+
env_file: str | None = None,
|
|
92
|
+
) -> None:
|
|
93
|
+
self._is_databricks = is_databricks
|
|
94
|
+
self._environments = config.get("environments", {})
|
|
95
|
+
self._config = config
|
|
96
|
+
|
|
97
|
+
if not self._environments:
|
|
98
|
+
raise ValueError(
|
|
99
|
+
"No se encontró la sección 'environments' en config.json.\n"
|
|
100
|
+
"Agrega al menos un ambiente con su workspace_id como clave."
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Resuelve qué entrada de environments usar
|
|
104
|
+
self._workspace_id, self._vars = self._resolve_environment()
|
|
105
|
+
|
|
106
|
+
# Secrets locales desde .env
|
|
107
|
+
self._env_secrets = self._load_env_file(env_file)
|
|
108
|
+
|
|
109
|
+
self.log.info(
|
|
110
|
+
f"EnvironmentConfig listo | "
|
|
111
|
+
f"workspace_id='{self._workspace_id}' | "
|
|
112
|
+
f"env='{self.env}' | "
|
|
113
|
+
f"runtime={'databricks' if is_databricks else 'local'}"
|
|
114
|
+
)
|
|
115
|
+
self.log.debug(f"Catálogos: {list(self._vars.get('catalogs', {}).keys())}")
|
|
116
|
+
self.log.debug(f"Paths : {list(self._vars.get('paths', {}).keys())}")
|
|
117
|
+
|
|
118
|
+
# ── Resolución del ambiente ───────────────────────────────────────────
|
|
119
|
+
|
|
120
|
+
def _resolve_environment(self) -> tuple[str, dict]:
|
|
121
|
+
"""
|
|
122
|
+
Cascada de resolución:
|
|
123
|
+
1. En Databricks → workspace_id real (get_context().workspaceId)
|
|
124
|
+
2. En local → DATABRICKS_TARGET (nombre del env: dev, qa, prod)
|
|
125
|
+
3. Error descriptivo
|
|
126
|
+
"""
|
|
127
|
+
if self._is_databricks:
|
|
128
|
+
result = self._resolve_by_workspace_id()
|
|
129
|
+
if result:
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
result = self._resolve_by_env_var()
|
|
133
|
+
if result:
|
|
134
|
+
return result
|
|
135
|
+
|
|
136
|
+
# Error con ayuda contextual
|
|
137
|
+
env_names = [v.get("env", k) for k, v in self._environments.items()]
|
|
138
|
+
raise ValueError(
|
|
139
|
+
"No se pudo determinar el ambiente de ejecución.\n\n"
|
|
140
|
+
f" Ambientes disponibles (por 'env'): {env_names}\n\n"
|
|
141
|
+
" Opciones:\n"
|
|
142
|
+
f" A) Variable de entorno: export {_ENV_VAR_TARGET}=dev\n"
|
|
143
|
+
" B) En Databricks: el workspace_id se detecta automáticamente\n"
|
|
144
|
+
" Verifica que el workspace_id esté en la sección 'environments' del config.json"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
def _resolve_by_workspace_id(self) -> tuple[str, dict] | None:
|
|
148
|
+
"""En Databricks: lee el workspaceId del contexto y lo busca en environments."""
|
|
149
|
+
try:
|
|
150
|
+
from dbruntime.databricks_repl_context import get_context
|
|
151
|
+
workspace_id = str(get_context().workspaceId)
|
|
152
|
+
self.log.debug(f"Workspace ID detectado: '{workspace_id}'")
|
|
153
|
+
|
|
154
|
+
if workspace_id in self._environments:
|
|
155
|
+
self.log.debug(f"Ambiente encontrado por workspace_id: '{workspace_id}'")
|
|
156
|
+
return workspace_id, self._environments[workspace_id]
|
|
157
|
+
|
|
158
|
+
self.log.warning(
|
|
159
|
+
"resolve_environment",
|
|
160
|
+
f"Workspace ID '{workspace_id}' no está en environments del config.json",
|
|
161
|
+
disponibles=list(self._environments.keys()),
|
|
162
|
+
)
|
|
163
|
+
except Exception as exc:
|
|
164
|
+
self.log.debug(f"No se pudo leer workspace_id del contexto: {exc}")
|
|
165
|
+
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
def _resolve_by_env_var(self) -> tuple[str, dict] | None:
|
|
169
|
+
"""
|
|
170
|
+
Resuelve el ambiente por nombre (ej: "dev") usando la siguiente cascada:
|
|
171
|
+
1. Variable de entorno del sistema: export DATABRICKS_TARGET=dev
|
|
172
|
+
2. Clave "DATABRICKS_TARGET" en config.json
|
|
173
|
+
|
|
174
|
+
El valor encontrado se contrasta con el campo "env" o "env_short"
|
|
175
|
+
de cada entrada en 'environments' hasta encontrar coincidencia.
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
target = (
|
|
179
|
+
os.environ.get(_ENV_VAR_TARGET, "").strip()
|
|
180
|
+
or self._config.get(_ENV_VAR_TARGET, "").strip()
|
|
181
|
+
)
|
|
182
|
+
if not target:
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
self.log.debug(f"Buscando ambiente por {_ENV_VAR_TARGET}='{target}'")
|
|
186
|
+
|
|
187
|
+
for workspace_id, vars_ in self._environments.items():
|
|
188
|
+
if vars_.get("env") == target or vars_.get("env_short") == target:
|
|
189
|
+
self.log.debug(
|
|
190
|
+
f"Ambiente '{target}' encontrado → workspace_id='{workspace_id}'"
|
|
191
|
+
)
|
|
192
|
+
return workspace_id, vars_
|
|
193
|
+
|
|
194
|
+
available = [v.get("env", k) for k, v in self._environments.items()]
|
|
195
|
+
raise ValueError(
|
|
196
|
+
f"El valor '{target}' de {_ENV_VAR_TARGET} no coincide con ningún ambiente.\n"
|
|
197
|
+
f"Ambientes disponibles: {available}"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# ── API pública ───────────────────────────────────────────────────────
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def env(self) -> str:
|
|
204
|
+
"""Nombre completo: dev, qa, prod."""
|
|
205
|
+
return self._vars.get("env", self._workspace_id)
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def env_short(self) -> str:
|
|
209
|
+
"""Abreviación: d, q, p."""
|
|
210
|
+
return self._vars.get("env_short", self.env[0])
|
|
211
|
+
|
|
212
|
+
@property
|
|
213
|
+
def workspace_id(self) -> str:
|
|
214
|
+
"""Workspace ID de Databricks usado para resolver este ambiente."""
|
|
215
|
+
return self._workspace_id
|
|
216
|
+
|
|
217
|
+
@property
|
|
218
|
+
def workspace_host(self) -> str:
|
|
219
|
+
"""URL del workspace Databricks."""
|
|
220
|
+
return self._vars.get("workspace_host", "")
|
|
221
|
+
|
|
222
|
+
@property
|
|
223
|
+
def tags(self) -> dict:
|
|
224
|
+
"""Etiquetas del ambiente (environment, cost_center, team, etc.)."""
|
|
225
|
+
return self._vars.get("tags", {})
|
|
226
|
+
|
|
227
|
+
def get_catalog(self, name: str) -> str:
|
|
228
|
+
"""
|
|
229
|
+
Nombre real del catálogo Unity Catalog para este ambiente.
|
|
230
|
+
|
|
231
|
+
Ejemplo:
|
|
232
|
+
env.get_catalog("bronze") → "ct_bronze_dlsuraanaliticadev"
|
|
233
|
+
"""
|
|
234
|
+
catalogs = self._vars.get("catalogs", {})
|
|
235
|
+
if name not in catalogs:
|
|
236
|
+
raise KeyError(
|
|
237
|
+
f"Catálogo '{name}' no definido para env='{self.env}'.\n"
|
|
238
|
+
f"Catálogos disponibles: {list(catalogs.keys())}"
|
|
239
|
+
)
|
|
240
|
+
return catalogs[name]
|
|
241
|
+
|
|
242
|
+
def has_catalog(self, name: str) -> bool:
|
|
243
|
+
return name in self._vars.get("catalogs", {})
|
|
244
|
+
|
|
245
|
+
def get_storage_account(self, name: str = "default") -> str:
|
|
246
|
+
"""
|
|
247
|
+
Nombre de la cuenta de storage para este ambiente.
|
|
248
|
+
|
|
249
|
+
Ejemplo:
|
|
250
|
+
env.get_storage_account() → "dlsuraanaliticadev"
|
|
251
|
+
env.get_storage_account("raw") → "dlsuraanaliticadevraw"
|
|
252
|
+
"""
|
|
253
|
+
accounts = self._vars.get("storage_accounts", {})
|
|
254
|
+
if name not in accounts:
|
|
255
|
+
raise KeyError(
|
|
256
|
+
f"Storage account '{name}' no definida para env='{self.env}'.\n"
|
|
257
|
+
f"Disponibles: {list(accounts.keys())}"
|
|
258
|
+
)
|
|
259
|
+
return accounts[name]
|
|
260
|
+
|
|
261
|
+
def get_path(self, name: str) -> str:
|
|
262
|
+
"""
|
|
263
|
+
Ruta base para este ambiente.
|
|
264
|
+
|
|
265
|
+
Ejemplo:
|
|
266
|
+
env.get_path("raw") → "abfss://raw@dlsuraanaliticadev..."
|
|
267
|
+
env.get_path("curated") → "abfss://curated@dlsuraanaliticadev..."
|
|
268
|
+
"""
|
|
269
|
+
paths = self._vars.get("paths", {})
|
|
270
|
+
if name not in paths:
|
|
271
|
+
raise KeyError(
|
|
272
|
+
f"Path '{name}' no definido para env='{self.env}'.\n"
|
|
273
|
+
f"Paths disponibles: {list(paths.keys())}"
|
|
274
|
+
)
|
|
275
|
+
return paths[name]
|
|
276
|
+
|
|
277
|
+
def get_secret(self, key: str) -> str:
|
|
278
|
+
"""
|
|
279
|
+
Obtiene un secreto según el runtime:
|
|
280
|
+
- Databricks → dbutils.secrets.get(scope, key)
|
|
281
|
+
- Local → SECRET_<KEY> desde .env.<env> o .env
|
|
282
|
+
"""
|
|
283
|
+
if self._is_databricks:
|
|
284
|
+
return self._get_secret_databricks(key)
|
|
285
|
+
return self._get_secret_local(key)
|
|
286
|
+
|
|
287
|
+
def get_var(self, path: str, default: Any = None) -> Any:
|
|
288
|
+
"""
|
|
289
|
+
Acceso genérico con notación de puntos a cualquier variable del ambiente.
|
|
290
|
+
|
|
291
|
+
Ejemplo:
|
|
292
|
+
env.get_var("tags.cost_center") → "CC-1001"
|
|
293
|
+
env.get_var("secrets.scope") → "kv-dev"
|
|
294
|
+
env.get_var("no.existe", "N/A") → "N/A"
|
|
295
|
+
"""
|
|
296
|
+
value = self._vars
|
|
297
|
+
for part in path.split("."):
|
|
298
|
+
if isinstance(value, dict) and part in value:
|
|
299
|
+
value = value[part]
|
|
300
|
+
else:
|
|
301
|
+
return default
|
|
302
|
+
return value
|
|
303
|
+
|
|
304
|
+
def summary(self) -> dict:
|
|
305
|
+
"""Dict completo del ambiente activo — útil para logging al inicio."""
|
|
306
|
+
return {
|
|
307
|
+
"workspace_id": self._workspace_id,
|
|
308
|
+
"env": self.env,
|
|
309
|
+
"env_short": self.env_short,
|
|
310
|
+
"workspace_host": self.workspace_host,
|
|
311
|
+
"catalogs": self._vars.get("catalogs", {}),
|
|
312
|
+
"storage_accounts":self._vars.get("storage_accounts", {}),
|
|
313
|
+
"paths": self._vars.get("paths", {}),
|
|
314
|
+
"secrets_scope": self._vars.get("secrets", {}).get("scope", ""),
|
|
315
|
+
"tags": self.tags,
|
|
316
|
+
"runtime": "databricks" if self._is_databricks else "local",
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
# ── Secrets internos ──────────────────────────────────────────────────
|
|
320
|
+
|
|
321
|
+
def _get_secret_databricks(self, key: str) -> str:
|
|
322
|
+
scope = self._vars.get("secrets", {}).get("scope")
|
|
323
|
+
if not scope:
|
|
324
|
+
raise ValueError(
|
|
325
|
+
f"'secrets.scope' no configurado para env='{self.env}' en config.json."
|
|
326
|
+
)
|
|
327
|
+
try:
|
|
328
|
+
from pyspark.dbutils import DBUtils
|
|
329
|
+
dbutils = DBUtils(None)
|
|
330
|
+
value = dbutils.secrets.get(scope=scope, key=key)
|
|
331
|
+
self.log.debug(f"Secret '{key}' leído desde scope='{scope}' ✔")
|
|
332
|
+
return value
|
|
333
|
+
except Exception as exc:
|
|
334
|
+
raise RuntimeError(
|
|
335
|
+
f"No se pudo obtener el secret '{key}' del scope '{scope}': {exc}"
|
|
336
|
+
) from exc
|
|
337
|
+
|
|
338
|
+
def _get_secret_local(self, key: str) -> str:
|
|
339
|
+
env_key = f"{_SECRET_PREFIX}{key}".upper()
|
|
340
|
+
value = self._env_secrets.get(env_key)
|
|
341
|
+
if value is None:
|
|
342
|
+
raise KeyError(
|
|
343
|
+
f"Secret local '{key}' no encontrado.\n"
|
|
344
|
+
f"Agrega '{env_key}=<valor>' en .env.{self.env} o .env"
|
|
345
|
+
)
|
|
346
|
+
self.log.debug(f"Secret '{key}' leído desde .env local ✔")
|
|
347
|
+
return value
|
|
348
|
+
|
|
349
|
+
# ── Carga de .env local ───────────────────────────────────────────────
|
|
350
|
+
|
|
351
|
+
def _load_env_file(self, env_file: str | None) -> dict[str, str]:
|
|
352
|
+
"""
|
|
353
|
+
Busca el archivo .env en orden:
|
|
354
|
+
1. Ruta explícita (env_file)
|
|
355
|
+
2. .env.<env> (ej: .env.dev)
|
|
356
|
+
3. .env
|
|
357
|
+
"""
|
|
358
|
+
candidates: list[Path] = []
|
|
359
|
+
if env_file:
|
|
360
|
+
candidates.append(Path(env_file))
|
|
361
|
+
candidates.append(Path(f".env.{self.env}"))
|
|
362
|
+
candidates.append(Path(".env"))
|
|
363
|
+
|
|
364
|
+
for path in candidates:
|
|
365
|
+
if path.exists():
|
|
366
|
+
parsed = self._parse_env_file(path)
|
|
367
|
+
self.log.info(f"Secrets locales cargados desde: {path} ({len(parsed)} entradas)")
|
|
368
|
+
return parsed
|
|
369
|
+
|
|
370
|
+
self.log.debug("Sin archivo .env — secrets locales no disponibles")
|
|
371
|
+
return {}
|
|
372
|
+
|
|
373
|
+
@staticmethod
|
|
374
|
+
def _parse_env_file(path: Path) -> dict[str, str]:
|
|
375
|
+
result = {}
|
|
376
|
+
for line in path.read_text(encoding="utf-8").splitlines():
|
|
377
|
+
line = line.strip()
|
|
378
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
379
|
+
continue
|
|
380
|
+
key, _, value = line.partition("=")
|
|
381
|
+
key = key.strip().upper()
|
|
382
|
+
value = value.strip().strip('"').strip("'")
|
|
383
|
+
if key.startswith(_SECRET_PREFIX):
|
|
384
|
+
result[key] = value
|
|
385
|
+
return result
|