luxorasap 0.1.39__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- luxorasap/__init__.py +1 -1
- luxorasap/utils/storage/__init__.py +11 -1
- luxorasap/utils/storage/change_tracker.py +294 -0
- {luxorasap-0.1.39.dist-info → luxorasap-0.2.0.dist-info}/METADATA +1 -1
- {luxorasap-0.1.39.dist-info → luxorasap-0.2.0.dist-info}/RECORD +8 -7
- {luxorasap-0.1.39.dist-info → luxorasap-0.2.0.dist-info}/WHEEL +0 -0
- {luxorasap-0.1.39.dist-info → luxorasap-0.2.0.dist-info}/entry_points.txt +0 -0
- {luxorasap-0.1.39.dist-info → luxorasap-0.2.0.dist-info}/top_level.txt +0 -0
luxorasap/__init__.py
CHANGED
|
@@ -13,7 +13,7 @@ from types import ModuleType
|
|
|
13
13
|
try:
|
|
14
14
|
__version__: str = metadata.version(__name__)
|
|
15
15
|
except metadata.PackageNotFoundError: # editable install
|
|
16
|
-
__version__ = "0.
|
|
16
|
+
__version__ = "0.2.0"
|
|
17
17
|
|
|
18
18
|
# ─── Lazy loader ─────────────────────────────────────────────────
|
|
19
19
|
def __getattr__(name: str) -> ModuleType:
|
|
@@ -1,2 +1,12 @@
|
|
|
1
1
|
from .blob import BlobParquetClient, BlobPickleClient, BlobExcelClient, delete_blob, list_blob_files
|
|
2
|
-
|
|
2
|
+
from .change_tracker import BlobChangeWatcher, BlobMetadata
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"BlobParquetClient",
|
|
6
|
+
"BlobPickleClient",
|
|
7
|
+
"BlobExcelClient",
|
|
8
|
+
"delete_blob",
|
|
9
|
+
"list_blob_files",
|
|
10
|
+
"BlobChangeWatcher",
|
|
11
|
+
"BlobMetadata",
|
|
12
|
+
]
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, asdict
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Dict, List, Optional, Sequence, Tuple
|
|
6
|
+
|
|
7
|
+
from azure.core.exceptions import ResourceNotFoundError
|
|
8
|
+
from azure.storage.blob import BlobServiceClient
|
|
9
|
+
|
|
10
|
+
# Reuso dos utilitários que você já tem no projeto
|
|
11
|
+
from luxorasap.utils.storage.blob import BlobPickleClient
|
|
12
|
+
import os
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
16
|
+
# Tipos de dados
|
|
17
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class BlobMetadata:
|
|
21
|
+
"""
|
|
22
|
+
Conjunto mínimo de informações para detectar mudanças em um blob.
|
|
23
|
+
"""
|
|
24
|
+
last_modified_utc: datetime # timezone-aware, sempre em UTC
|
|
25
|
+
etag: str
|
|
26
|
+
size_bytes: int
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
def from_blob_properties(props) -> "BlobMetadata":
|
|
30
|
+
"""
|
|
31
|
+
Constrói BlobMetadata a partir de BlobProperties (SDK azure.storage.blob).
|
|
32
|
+
Garante que last_modified seja timezone-aware em UTC.
|
|
33
|
+
"""
|
|
34
|
+
last_mod = props.last_modified
|
|
35
|
+
if last_mod.tzinfo is None:
|
|
36
|
+
last_mod = last_mod.replace(tzinfo=timezone.utc)
|
|
37
|
+
else:
|
|
38
|
+
last_mod = last_mod.astimezone(timezone.utc)
|
|
39
|
+
|
|
40
|
+
return BlobMetadata(
|
|
41
|
+
last_modified_utc=last_mod,
|
|
42
|
+
etag=props.etag,
|
|
43
|
+
size_bytes=int(props.size),
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
def to_dict(self) -> Dict:
|
|
47
|
+
d = asdict(self)
|
|
48
|
+
d["last_modified_utc"] = self.last_modified_utc.isoformat()
|
|
49
|
+
return d
|
|
50
|
+
|
|
51
|
+
@staticmethod
|
|
52
|
+
def from_dict(d: Dict) -> "BlobMetadata":
|
|
53
|
+
lm = d["last_modified_utc"]
|
|
54
|
+
if isinstance(lm, str):
|
|
55
|
+
lm = datetime.fromisoformat(lm)
|
|
56
|
+
if lm.tzinfo is None:
|
|
57
|
+
lm = lm.replace(tzinfo=timezone.utc)
|
|
58
|
+
else:
|
|
59
|
+
lm = lm.astimezone(timezone.utc)
|
|
60
|
+
return BlobMetadata(last_modified_utc=lm, etag=d["etag"], size_bytes=int(d["size_bytes"]))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
64
|
+
# Watcher (com persistência em pickle no próprio ADLS)
|
|
65
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
class BlobChangeWatcher:
|
|
68
|
+
"""
|
|
69
|
+
Verificador de mudanças de blobs, com snapshot persistido via Pickle no ADLS.
|
|
70
|
+
|
|
71
|
+
Snapshot salvo como dict:
|
|
72
|
+
{
|
|
73
|
+
"<blob_path>": {"last_modified_utc": "...", "etag": "...", "size_bytes": int},
|
|
74
|
+
...
|
|
75
|
+
}
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(
|
|
79
|
+
self,
|
|
80
|
+
*,
|
|
81
|
+
adls_connection_string: Optional[str] = None,
|
|
82
|
+
container: str = "luxorasap",
|
|
83
|
+
snapshot_blob_path: str = "system/state",
|
|
84
|
+
watcher_id: str = "blob_change_watcher.pkl",
|
|
85
|
+
treat_missing_as_changed: bool = True,
|
|
86
|
+
) -> None:
|
|
87
|
+
"""
|
|
88
|
+
Args:
|
|
89
|
+
adls_connection_string: Se None, usa AZURE_STORAGE_CONNECTION_STRING do ambiente.
|
|
90
|
+
container: Nome do container onde estão os blobs (e onde ficará o snapshot).
|
|
91
|
+
snapshot_blob_path: Caminho do arquivo pickle (no próprio container) que guarda o snapshot.
|
|
92
|
+
treat_missing_as_changed: Se True, um blob observado pela primeira vez é considerado "mudado".
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
if adls_connection_string is None:
|
|
96
|
+
adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
|
97
|
+
|
|
98
|
+
if adls_connection_string is None:
|
|
99
|
+
raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
|
|
100
|
+
|
|
101
|
+
self._container_name = container
|
|
102
|
+
self._snapshot_blob_path = f"{snapshot_blob_path}/{watcher_id}"
|
|
103
|
+
self._treat_missing_as_changed = treat_missing_as_changed
|
|
104
|
+
|
|
105
|
+
# Clientes
|
|
106
|
+
self._blob_service = BlobServiceClient.from_connection_string(adls_connection_string)
|
|
107
|
+
self._container_client = self._blob_service.get_container_client(self._container_name)
|
|
108
|
+
self._pickle_client = BlobPickleClient(
|
|
109
|
+
adls_connection_string=adls_connection_string,
|
|
110
|
+
container=self._container_name,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Estado em memória
|
|
114
|
+
self._snapshot: Dict[str, Dict] = {}
|
|
115
|
+
|
|
116
|
+
# Carrega snapshot na inicialização (se não existir, começa vazio)
|
|
117
|
+
self._load_snapshot()
|
|
118
|
+
|
|
119
|
+
# ───────────────────────────── Persistência do snapshot ─────────────────────────────
|
|
120
|
+
|
|
121
|
+
def _load_snapshot(self) -> None:
|
|
122
|
+
"""
|
|
123
|
+
Carrega o snapshot do ADLS (pickle).
|
|
124
|
+
Se não existir ou estiver inválido, inicia com dicionário vazio.
|
|
125
|
+
"""
|
|
126
|
+
try:
|
|
127
|
+
data = self._pickle_client.read_pickle(self._snapshot_blob_path)
|
|
128
|
+
self._snapshot = data if isinstance(data, dict) else {}
|
|
129
|
+
except FileNotFoundError:
|
|
130
|
+
self._snapshot = {}
|
|
131
|
+
except Exception:
|
|
132
|
+
# Corrupção/versão antiga/etc → começa do zero
|
|
133
|
+
self._snapshot = {}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _save_snapshot(self) -> None:
|
|
137
|
+
"""
|
|
138
|
+
Salva o snapshot atual no ADLS via pickle.
|
|
139
|
+
"""
|
|
140
|
+
self._pickle_client.write_pickle(self._snapshot, self._snapshot_blob_path)
|
|
141
|
+
|
|
142
|
+
# ───────────────────────────── Acesso a propriedades remotas ─────────────────────────────
|
|
143
|
+
|
|
144
|
+
def _fetch_remote_metadata(self, blob_path: str) -> BlobMetadata:
|
|
145
|
+
"""
|
|
146
|
+
Busca metadados atuais do blob no ADLS.
|
|
147
|
+
Raises:
|
|
148
|
+
ResourceNotFoundError se o blob não existir.
|
|
149
|
+
"""
|
|
150
|
+
props = self._container_client.get_blob_client(blob_path).get_blob_properties()
|
|
151
|
+
return BlobMetadata.from_blob_properties(props)
|
|
152
|
+
|
|
153
|
+
def _get_snapshot_metadata(self, blob_path: str) -> Optional[BlobMetadata]:
|
|
154
|
+
"""
|
|
155
|
+
Retorna o metadata salvo no snapshot (se houver).
|
|
156
|
+
"""
|
|
157
|
+
raw = self._snapshot.get(blob_path)
|
|
158
|
+
return BlobMetadata.from_dict(raw) if raw else None
|
|
159
|
+
|
|
160
|
+
# ───────────────────────────── API pública ─────────────────────────────
|
|
161
|
+
|
|
162
|
+
def has_changed(
|
|
163
|
+
self,
|
|
164
|
+
blob_path: str,
|
|
165
|
+
*,
|
|
166
|
+
update_snapshot: bool = False,
|
|
167
|
+
treat_missing_as_changed: Optional[bool] = None,
|
|
168
|
+
) -> Tuple[bool, Optional[BlobMetadata], Optional[BlobMetadata]]:
|
|
169
|
+
"""
|
|
170
|
+
Verifica se o blob mudou desde o snapshot anterior.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
blob_path: Caminho do blob (ex.: "raw/xlsx/trades.xlsx").
|
|
174
|
+
update_snapshot: Se True, grava o novo estado no snapshot quando houver mudança.
|
|
175
|
+
treat_missing_as_changed: Override local para a regra de "primeira vez conta como mudança?".
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
(mudou?, metadata_antigo, metadata_atual)
|
|
179
|
+
"""
|
|
180
|
+
if treat_missing_as_changed is None:
|
|
181
|
+
treat_missing_as_changed = self._treat_missing_as_changed
|
|
182
|
+
|
|
183
|
+
previous = self._get_snapshot_metadata(blob_path)
|
|
184
|
+
|
|
185
|
+
# Se o blob não existe mais no remoto:
|
|
186
|
+
try:
|
|
187
|
+
current = self._fetch_remote_metadata(blob_path)
|
|
188
|
+
except ResourceNotFoundError:
|
|
189
|
+
changed = previous is not None
|
|
190
|
+
if update_snapshot and changed:
|
|
191
|
+
# remove do snapshot porque o blob foi apagado
|
|
192
|
+
self._snapshot.pop(blob_path, None)
|
|
193
|
+
self._save_snapshot()
|
|
194
|
+
return changed, previous, None
|
|
195
|
+
|
|
196
|
+
# Primeira observação desse blob?
|
|
197
|
+
if previous is None:
|
|
198
|
+
changed = bool(treat_missing_as_changed)
|
|
199
|
+
else:
|
|
200
|
+
# Critério de mudança (ordem de “força”: etag > last_modified > size)
|
|
201
|
+
changed = (
|
|
202
|
+
current.etag != previous.etag
|
|
203
|
+
or current.last_modified_utc != previous.last_modified_utc
|
|
204
|
+
or current.size_bytes != previous.size_bytes
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
if update_snapshot and changed:
|
|
208
|
+
self._snapshot[blob_path] = current.to_dict()
|
|
209
|
+
self._save_snapshot()
|
|
210
|
+
|
|
211
|
+
return changed, previous, current
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def update_snapshot(self, blob_path: str) -> Optional[BlobMetadata]:
|
|
215
|
+
"""
|
|
216
|
+
Força a atualização do snapshot para refletir o estado atual do blob.
|
|
217
|
+
Se o blob não existir, remove do snapshot e retorna None.
|
|
218
|
+
"""
|
|
219
|
+
try:
|
|
220
|
+
current = self._fetch_remote_metadata(blob_path)
|
|
221
|
+
except ResourceNotFoundError:
|
|
222
|
+
self._snapshot.pop(blob_path, None)
|
|
223
|
+
self._save_snapshot()
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
self._snapshot[blob_path] = current.to_dict()
|
|
227
|
+
self._save_snapshot()
|
|
228
|
+
return current
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def mark_as_synchronized(self, blob_path: str, metadata: Optional[BlobMetadata] = None) -> None:
|
|
232
|
+
"""
|
|
233
|
+
Marca explicitamente um blob como “sincronizado” no snapshot (ex.: após processar um pipeline).
|
|
234
|
+
Se `metadata` não for informado, consulta o estado atual no ADLS.
|
|
235
|
+
"""
|
|
236
|
+
if metadata is None:
|
|
237
|
+
metadata = self._fetch_remote_metadata(blob_path)
|
|
238
|
+
self._snapshot[blob_path] = metadata.to_dict()
|
|
239
|
+
self._save_snapshot()
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def list_changed_under_prefix(
|
|
243
|
+
self,
|
|
244
|
+
prefix: str,
|
|
245
|
+
*,
|
|
246
|
+
allowed_extensions: Optional[Sequence[str]] = None,
|
|
247
|
+
update_snapshot: bool = False,
|
|
248
|
+
) -> List[str]:
|
|
249
|
+
"""
|
|
250
|
+
Varre todos os blobs sob um prefixo e retorna a lista dos que mudaram
|
|
251
|
+
segundo as regras de comparação de metadados.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
prefix: Ex.: "enriched/parquet/fundos" (com ou sem barra final).
|
|
255
|
+
allowed_extensions: Ex.: [".parquet", ".xlsx"] para filtrar por sufixo.
|
|
256
|
+
update_snapshot: Se True, atualiza o snapshot para os que mudaram.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
Lista de paths de blobs que mudaram.
|
|
260
|
+
"""
|
|
261
|
+
if prefix and not prefix.endswith("/"):
|
|
262
|
+
prefix += "/"
|
|
263
|
+
|
|
264
|
+
extensions = tuple(e.lower() for e in (allowed_extensions or []))
|
|
265
|
+
changed_paths: List[str] = []
|
|
266
|
+
|
|
267
|
+
for blob_item in self._container_client.list_blobs(name_starts_with=prefix):
|
|
268
|
+
name = blob_item.name
|
|
269
|
+
if name.endswith("/"):
|
|
270
|
+
continue
|
|
271
|
+
if extensions and not name.lower().endswith(extensions):
|
|
272
|
+
continue
|
|
273
|
+
|
|
274
|
+
previous = self._get_snapshot_metadata(name)
|
|
275
|
+
current = BlobMetadata.from_blob_properties(blob_item)
|
|
276
|
+
|
|
277
|
+
if previous is None:
|
|
278
|
+
has_changed = self._treat_missing_as_changed
|
|
279
|
+
else:
|
|
280
|
+
has_changed = (
|
|
281
|
+
current.etag != previous.etag
|
|
282
|
+
or current.last_modified_utc != previous.last_modified_utc
|
|
283
|
+
or current.size_bytes != previous.size_bytes
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
if has_changed:
|
|
287
|
+
changed_paths.append(name)
|
|
288
|
+
if update_snapshot:
|
|
289
|
+
self._snapshot[name] = current.to_dict()
|
|
290
|
+
|
|
291
|
+
if update_snapshot and changed_paths:
|
|
292
|
+
self._save_snapshot()
|
|
293
|
+
|
|
294
|
+
return changed_paths
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
luxorasap/__init__.py,sha256=
|
|
1
|
+
luxorasap/__init__.py,sha256=4hN1iSr_kNBePyZxQAd1K2o0jJGru697Yy-ln78rRVA,1355
|
|
2
2
|
luxorasap/btgapi/__init__.py,sha256=QUlfb5oiBY6K1Q5x4-a-x2wECe1At5wc2962I5odOJk,620
|
|
3
3
|
luxorasap/btgapi/auth.py,sha256=PvyCtbEyBO2B1CIeAlNXWugKW1OgiKfPcVzS6K5FBnQ,1872
|
|
4
4
|
luxorasap/btgapi/reports.py,sha256=ZVEMLoJPXc0r3XjPJPMsKQN0zZd1Npd7umNpAj1bncs,8040
|
|
@@ -12,12 +12,13 @@ luxorasap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
12
12
|
luxorasap/utils/dataframe/__init__.py,sha256=heKpmq58FmX35syzzwrHqlOWKYBkH2Z1jyqaQ_Vg-00,265
|
|
13
13
|
luxorasap/utils/dataframe/reader.py,sha256=Vzjdw-AeS1lnWEHQ8RZNh0kK93NWTp0NWVi_B6mN5N0,616
|
|
14
14
|
luxorasap/utils/dataframe/transforms.py,sha256=OIvlTTcjFX6bUhuQp_syEp7ssm4sLzwvgsag6n2Wl3k,2438
|
|
15
|
-
luxorasap/utils/storage/__init__.py,sha256=
|
|
15
|
+
luxorasap/utils/storage/__init__.py,sha256=461GYJcPMXGjHuJ9y9D3BHOC_oUS9Re32nVu1AwKyIA,334
|
|
16
16
|
luxorasap/utils/storage/blob.py,sha256=vgCKMOiVgP-V1A2xZRhG3kJhPFU-LA9E9kddOQTxYD8,9443
|
|
17
|
+
luxorasap/utils/storage/change_tracker.py,sha256=5URYI18mymcVfUyyb9zi8NeNDVrdYgdOAs8L58GwL5Q,11706
|
|
17
18
|
luxorasap/utils/tools/__init__.py,sha256=dvK7Z4xnNQAuEiObVN7qjeLWAvP49JeFn2Oq9GdgmXs,76
|
|
18
19
|
luxorasap/utils/tools/excel.py,sha256=SfeTcbJWsWq3uKruwKSjJ4aWgMovITzlNXjP2bhdMjI,1246
|
|
19
|
-
luxorasap-0.
|
|
20
|
-
luxorasap-0.
|
|
21
|
-
luxorasap-0.
|
|
22
|
-
luxorasap-0.
|
|
23
|
-
luxorasap-0.
|
|
20
|
+
luxorasap-0.2.0.dist-info/METADATA,sha256=2zENi1Kh37o6j3noZijo4RJ7Dg71p0yMIlR_ZSLHqBA,3803
|
|
21
|
+
luxorasap-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
22
|
+
luxorasap-0.2.0.dist-info/entry_points.txt,sha256=XFh-dOwUhlya9DmGvgookMI0ezyUJjcOvTIHDEYS44g,52
|
|
23
|
+
luxorasap-0.2.0.dist-info/top_level.txt,sha256=9YOL6bUIpzY06XFBRkUW1e4rgB32Ds91fQPGwUEjxzU,10
|
|
24
|
+
luxorasap-0.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|