luxorasap 0.1.39__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
luxorasap/__init__.py CHANGED
@@ -13,7 +13,7 @@ from types import ModuleType
13
13
  try:
14
14
  __version__: str = metadata.version(__name__)
15
15
  except metadata.PackageNotFoundError: # editable install
16
- __version__ = "0.1.39"
16
+ __version__ = "0.2.0"
17
17
 
18
18
  # ─── Lazy loader ─────────────────────────────────────────────────
19
19
  def __getattr__(name: str) -> ModuleType:
@@ -1,2 +1,12 @@
1
1
  from .blob import BlobParquetClient, BlobPickleClient, BlobExcelClient, delete_blob, list_blob_files
2
- __all__ = ["BlobParquetClient", "BlobPickleClient", "BlobExcelClient", "delete_blob", "list_blob_files"]
2
+ from .change_tracker import BlobChangeWatcher, BlobMetadata
3
+
4
+ __all__ = [
5
+ "BlobParquetClient",
6
+ "BlobPickleClient",
7
+ "BlobExcelClient",
8
+ "delete_blob",
9
+ "list_blob_files",
10
+ "BlobChangeWatcher",
11
+ "BlobMetadata",
12
+ ]
@@ -0,0 +1,294 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, asdict
4
+ from datetime import datetime, timezone
5
+ from typing import Dict, List, Optional, Sequence, Tuple
6
+
7
+ from azure.core.exceptions import ResourceNotFoundError
8
+ from azure.storage.blob import BlobServiceClient
9
+
10
+ # Reuso dos utilitários que você já tem no projeto
11
+ from luxorasap.utils.storage.blob import BlobPickleClient
12
+ import os
13
+
14
+
15
+ # ──────────────────────────────────────────────────────────────────────────────
16
+ # Tipos de dados
17
+ # ──────────────────────────────────────────────────────────────────────────────
18
+
19
+ @dataclass(frozen=True)
20
+ class BlobMetadata:
21
+ """
22
+ Conjunto mínimo de informações para detectar mudanças em um blob.
23
+ """
24
+ last_modified_utc: datetime # timezone-aware, sempre em UTC
25
+ etag: str
26
+ size_bytes: int
27
+
28
+ @staticmethod
29
+ def from_blob_properties(props) -> "BlobMetadata":
30
+ """
31
+ Constrói BlobMetadata a partir de BlobProperties (SDK azure.storage.blob).
32
+ Garante que last_modified seja timezone-aware em UTC.
33
+ """
34
+ last_mod = props.last_modified
35
+ if last_mod.tzinfo is None:
36
+ last_mod = last_mod.replace(tzinfo=timezone.utc)
37
+ else:
38
+ last_mod = last_mod.astimezone(timezone.utc)
39
+
40
+ return BlobMetadata(
41
+ last_modified_utc=last_mod,
42
+ etag=props.etag,
43
+ size_bytes=int(props.size),
44
+ )
45
+
46
+ def to_dict(self) -> Dict:
47
+ d = asdict(self)
48
+ d["last_modified_utc"] = self.last_modified_utc.isoformat()
49
+ return d
50
+
51
+ @staticmethod
52
+ def from_dict(d: Dict) -> "BlobMetadata":
53
+ lm = d["last_modified_utc"]
54
+ if isinstance(lm, str):
55
+ lm = datetime.fromisoformat(lm)
56
+ if lm.tzinfo is None:
57
+ lm = lm.replace(tzinfo=timezone.utc)
58
+ else:
59
+ lm = lm.astimezone(timezone.utc)
60
+ return BlobMetadata(last_modified_utc=lm, etag=d["etag"], size_bytes=int(d["size_bytes"]))
61
+
62
+
63
+ # ──────────────────────────────────────────────────────────────────────────────
64
+ # Watcher (com persistência em pickle no próprio ADLS)
65
+ # ──────────────────────────────────────────────────────────────────────────────
66
+
67
+ class BlobChangeWatcher:
68
+ """
69
+ Verificador de mudanças de blobs, com snapshot persistido via Pickle no ADLS.
70
+
71
+ Snapshot salvo como dict:
72
+ {
73
+ "<blob_path>": {"last_modified_utc": "...", "etag": "...", "size_bytes": int},
74
+ ...
75
+ }
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ *,
81
+ adls_connection_string: Optional[str] = None,
82
+ container: str = "luxorasap",
83
+ snapshot_blob_path: str = "system/state",
84
+ watcher_id: str = "blob_change_watcher.pkl",
85
+ treat_missing_as_changed: bool = True,
86
+ ) -> None:
87
+ """
88
+ Args:
89
+ adls_connection_string: Se None, usa AZURE_STORAGE_CONNECTION_STRING do ambiente.
90
+ container: Nome do container onde estão os blobs (e onde ficará o snapshot).
91
+ snapshot_blob_path: Caminho do arquivo pickle (no próprio container) que guarda o snapshot.
92
+ treat_missing_as_changed: Se True, um blob observado pela primeira vez é considerado "mudado".
93
+ """
94
+
95
+ if adls_connection_string is None:
96
+ adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
97
+
98
+ if adls_connection_string is None:
99
+ raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
100
+
101
+ self._container_name = container
102
+ self._snapshot_blob_path = f"{snapshot_blob_path}/{watcher_id}"
103
+ self._treat_missing_as_changed = treat_missing_as_changed
104
+
105
+ # Clientes
106
+ self._blob_service = BlobServiceClient.from_connection_string(adls_connection_string)
107
+ self._container_client = self._blob_service.get_container_client(self._container_name)
108
+ self._pickle_client = BlobPickleClient(
109
+ adls_connection_string=adls_connection_string,
110
+ container=self._container_name,
111
+ )
112
+
113
+ # Estado em memória
114
+ self._snapshot: Dict[str, Dict] = {}
115
+
116
+ # Carrega snapshot na inicialização (se não existir, começa vazio)
117
+ self._load_snapshot()
118
+
119
+ # ───────────────────────────── Persistência do snapshot ─────────────────────────────
120
+
121
+ def _load_snapshot(self) -> None:
122
+ """
123
+ Carrega o snapshot do ADLS (pickle).
124
+ Se não existir ou estiver inválido, inicia com dicionário vazio.
125
+ """
126
+ try:
127
+ data = self._pickle_client.read_pickle(self._snapshot_blob_path)
128
+ self._snapshot = data if isinstance(data, dict) else {}
129
+ except FileNotFoundError:
130
+ self._snapshot = {}
131
+ except Exception:
132
+ # Corrupção/versão antiga/etc → começa do zero
133
+ self._snapshot = {}
134
+
135
+
136
+ def _save_snapshot(self) -> None:
137
+ """
138
+ Salva o snapshot atual no ADLS via pickle.
139
+ """
140
+ self._pickle_client.write_pickle(self._snapshot, self._snapshot_blob_path)
141
+
142
+ # ───────────────────────────── Acesso a propriedades remotas ─────────────────────────────
143
+
144
+ def _fetch_remote_metadata(self, blob_path: str) -> BlobMetadata:
145
+ """
146
+ Busca metadados atuais do blob no ADLS.
147
+ Raises:
148
+ ResourceNotFoundError se o blob não existir.
149
+ """
150
+ props = self._container_client.get_blob_client(blob_path).get_blob_properties()
151
+ return BlobMetadata.from_blob_properties(props)
152
+
153
+ def _get_snapshot_metadata(self, blob_path: str) -> Optional[BlobMetadata]:
154
+ """
155
+ Retorna o metadata salvo no snapshot (se houver).
156
+ """
157
+ raw = self._snapshot.get(blob_path)
158
+ return BlobMetadata.from_dict(raw) if raw else None
159
+
160
+ # ───────────────────────────── API pública ─────────────────────────────
161
+
162
+ def has_changed(
163
+ self,
164
+ blob_path: str,
165
+ *,
166
+ update_snapshot: bool = False,
167
+ treat_missing_as_changed: Optional[bool] = None,
168
+ ) -> Tuple[bool, Optional[BlobMetadata], Optional[BlobMetadata]]:
169
+ """
170
+ Verifica se o blob mudou desde o snapshot anterior.
171
+
172
+ Args:
173
+ blob_path: Caminho do blob (ex.: "raw/xlsx/trades.xlsx").
174
+ update_snapshot: Se True, grava o novo estado no snapshot quando houver mudança.
175
+ treat_missing_as_changed: Override local para a regra de "primeira vez conta como mudança?".
176
+
177
+ Returns:
178
+ (mudou?, metadata_antigo, metadata_atual)
179
+ """
180
+ if treat_missing_as_changed is None:
181
+ treat_missing_as_changed = self._treat_missing_as_changed
182
+
183
+ previous = self._get_snapshot_metadata(blob_path)
184
+
185
+ # Se o blob não existe mais no remoto:
186
+ try:
187
+ current = self._fetch_remote_metadata(blob_path)
188
+ except ResourceNotFoundError:
189
+ changed = previous is not None
190
+ if update_snapshot and changed:
191
+ # remove do snapshot porque o blob foi apagado
192
+ self._snapshot.pop(blob_path, None)
193
+ self._save_snapshot()
194
+ return changed, previous, None
195
+
196
+ # Primeira observação desse blob?
197
+ if previous is None:
198
+ changed = bool(treat_missing_as_changed)
199
+ else:
200
+ # Critério de mudança (ordem de “força”: etag > last_modified > size)
201
+ changed = (
202
+ current.etag != previous.etag
203
+ or current.last_modified_utc != previous.last_modified_utc
204
+ or current.size_bytes != previous.size_bytes
205
+ )
206
+
207
+ if update_snapshot and changed:
208
+ self._snapshot[blob_path] = current.to_dict()
209
+ self._save_snapshot()
210
+
211
+ return changed, previous, current
212
+
213
+
214
+ def update_snapshot(self, blob_path: str) -> Optional[BlobMetadata]:
215
+ """
216
+ Força a atualização do snapshot para refletir o estado atual do blob.
217
+ Se o blob não existir, remove do snapshot e retorna None.
218
+ """
219
+ try:
220
+ current = self._fetch_remote_metadata(blob_path)
221
+ except ResourceNotFoundError:
222
+ self._snapshot.pop(blob_path, None)
223
+ self._save_snapshot()
224
+ return None
225
+
226
+ self._snapshot[blob_path] = current.to_dict()
227
+ self._save_snapshot()
228
+ return current
229
+
230
+
231
+ def mark_as_synchronized(self, blob_path: str, metadata: Optional[BlobMetadata] = None) -> None:
232
+ """
233
+ Marca explicitamente um blob como “sincronizado” no snapshot (ex.: após processar um pipeline).
234
+ Se `metadata` não for informado, consulta o estado atual no ADLS.
235
+ """
236
+ if metadata is None:
237
+ metadata = self._fetch_remote_metadata(blob_path)
238
+ self._snapshot[blob_path] = metadata.to_dict()
239
+ self._save_snapshot()
240
+
241
+
242
+ def list_changed_under_prefix(
243
+ self,
244
+ prefix: str,
245
+ *,
246
+ allowed_extensions: Optional[Sequence[str]] = None,
247
+ update_snapshot: bool = False,
248
+ ) -> List[str]:
249
+ """
250
+ Varre todos os blobs sob um prefixo e retorna a lista dos que mudaram
251
+ segundo as regras de comparação de metadados.
252
+
253
+ Args:
254
+ prefix: Ex.: "enriched/parquet/fundos" (com ou sem barra final).
255
+ allowed_extensions: Ex.: [".parquet", ".xlsx"] para filtrar por sufixo.
256
+ update_snapshot: Se True, atualiza o snapshot para os que mudaram.
257
+
258
+ Returns:
259
+ Lista de paths de blobs que mudaram.
260
+ """
261
+ if prefix and not prefix.endswith("/"):
262
+ prefix += "/"
263
+
264
+ extensions = tuple(e.lower() for e in (allowed_extensions or []))
265
+ changed_paths: List[str] = []
266
+
267
+ for blob_item in self._container_client.list_blobs(name_starts_with=prefix):
268
+ name = blob_item.name
269
+ if name.endswith("/"):
270
+ continue
271
+ if extensions and not name.lower().endswith(extensions):
272
+ continue
273
+
274
+ previous = self._get_snapshot_metadata(name)
275
+ current = BlobMetadata.from_blob_properties(blob_item)
276
+
277
+ if previous is None:
278
+ has_changed = self._treat_missing_as_changed
279
+ else:
280
+ has_changed = (
281
+ current.etag != previous.etag
282
+ or current.last_modified_utc != previous.last_modified_utc
283
+ or current.size_bytes != previous.size_bytes
284
+ )
285
+
286
+ if has_changed:
287
+ changed_paths.append(name)
288
+ if update_snapshot:
289
+ self._snapshot[name] = current.to_dict()
290
+
291
+ if update_snapshot and changed_paths:
292
+ self._save_snapshot()
293
+
294
+ return changed_paths
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: luxorasap
3
- Version: 0.1.39
3
+ Version: 0.2.0
4
4
  Summary: Toolbox da Luxor para ingestão, análise e automação de dados financeiros.
5
5
  Author-email: Luxor Group <backoffice@luxor.com.br>
6
6
  License: Proprietary – All rights reserved
@@ -1,4 +1,4 @@
1
- luxorasap/__init__.py,sha256=9SBHid7BhQFp10sy5RmCEcKf_mIfcgyIcfJ_s4YyKSs,1356
1
+ luxorasap/__init__.py,sha256=4hN1iSr_kNBePyZxQAd1K2o0jJGru697Yy-ln78rRVA,1355
2
2
  luxorasap/btgapi/__init__.py,sha256=QUlfb5oiBY6K1Q5x4-a-x2wECe1At5wc2962I5odOJk,620
3
3
  luxorasap/btgapi/auth.py,sha256=PvyCtbEyBO2B1CIeAlNXWugKW1OgiKfPcVzS6K5FBnQ,1872
4
4
  luxorasap/btgapi/reports.py,sha256=ZVEMLoJPXc0r3XjPJPMsKQN0zZd1Npd7umNpAj1bncs,8040
@@ -12,12 +12,13 @@ luxorasap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  luxorasap/utils/dataframe/__init__.py,sha256=heKpmq58FmX35syzzwrHqlOWKYBkH2Z1jyqaQ_Vg-00,265
13
13
  luxorasap/utils/dataframe/reader.py,sha256=Vzjdw-AeS1lnWEHQ8RZNh0kK93NWTp0NWVi_B6mN5N0,616
14
14
  luxorasap/utils/dataframe/transforms.py,sha256=OIvlTTcjFX6bUhuQp_syEp7ssm4sLzwvgsag6n2Wl3k,2438
15
- luxorasap/utils/storage/__init__.py,sha256=96J1p4nDeDBvWIULBA0RAoP4QCdxkfWHGApRe2BhZ3E,205
15
+ luxorasap/utils/storage/__init__.py,sha256=461GYJcPMXGjHuJ9y9D3BHOC_oUS9Re32nVu1AwKyIA,334
16
16
  luxorasap/utils/storage/blob.py,sha256=vgCKMOiVgP-V1A2xZRhG3kJhPFU-LA9E9kddOQTxYD8,9443
17
+ luxorasap/utils/storage/change_tracker.py,sha256=5URYI18mymcVfUyyb9zi8NeNDVrdYgdOAs8L58GwL5Q,11706
17
18
  luxorasap/utils/tools/__init__.py,sha256=dvK7Z4xnNQAuEiObVN7qjeLWAvP49JeFn2Oq9GdgmXs,76
18
19
  luxorasap/utils/tools/excel.py,sha256=SfeTcbJWsWq3uKruwKSjJ4aWgMovITzlNXjP2bhdMjI,1246
19
- luxorasap-0.1.39.dist-info/METADATA,sha256=KM__zvQvhHpzbl1YamE9kP_1RMHopbbuZzrHSzS2Dmg,3804
20
- luxorasap-0.1.39.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- luxorasap-0.1.39.dist-info/entry_points.txt,sha256=XFh-dOwUhlya9DmGvgookMI0ezyUJjcOvTIHDEYS44g,52
22
- luxorasap-0.1.39.dist-info/top_level.txt,sha256=9YOL6bUIpzY06XFBRkUW1e4rgB32Ds91fQPGwUEjxzU,10
23
- luxorasap-0.1.39.dist-info/RECORD,,
20
+ luxorasap-0.2.0.dist-info/METADATA,sha256=2zENi1Kh37o6j3noZijo4RJ7Dg71p0yMIlR_ZSLHqBA,3803
21
+ luxorasap-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
+ luxorasap-0.2.0.dist-info/entry_points.txt,sha256=XFh-dOwUhlya9DmGvgookMI0ezyUJjcOvTIHDEYS44g,52
23
+ luxorasap-0.2.0.dist-info/top_level.txt,sha256=9YOL6bUIpzY06XFBRkUW1e4rgB32Ds91fQPGwUEjxzU,10
24
+ luxorasap-0.2.0.dist-info/RECORD,,