luxorasap 0.1.39__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {luxorasap-0.1.39 → luxorasap-0.2.0}/PKG-INFO +1 -1
  2. {luxorasap-0.1.39 → luxorasap-0.2.0}/pyproject.toml +2 -2
  3. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/__init__.py +1 -1
  4. luxorasap-0.2.0/src/luxorasap/utils/storage/__init__.py +12 -0
  5. luxorasap-0.2.0/src/luxorasap/utils/storage/change_tracker.py +294 -0
  6. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap.egg-info/PKG-INFO +1 -1
  7. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap.egg-info/SOURCES.txt +4 -1
  8. luxorasap-0.2.0/tests/test_utils_change_tracker.py +180 -0
  9. luxorasap-0.2.0/tests/tests_utils_pickle_excel.py +92 -0
  10. luxorasap-0.1.39/src/luxorasap/utils/storage/__init__.py +0 -2
  11. {luxorasap-0.1.39 → luxorasap-0.2.0}/README.md +0 -0
  12. {luxorasap-0.1.39 → luxorasap-0.2.0}/setup.cfg +0 -0
  13. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/btgapi/__init__.py +0 -0
  14. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/btgapi/auth.py +0 -0
  15. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/btgapi/reports.py +0 -0
  16. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/btgapi/trades.py +0 -0
  17. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/datareader/__init__.py +0 -0
  18. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/datareader/core.py +0 -0
  19. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/ingest/__init__.py +0 -0
  20. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/ingest/cloud/__init__.py +0 -0
  21. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/ingest/legacy_local/dataloader.py +0 -0
  22. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/utils/__init__.py +0 -0
  23. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/utils/dataframe/__init__.py +0 -0
  24. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/utils/dataframe/reader.py +0 -0
  25. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/utils/dataframe/transforms.py +0 -0
  26. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/utils/storage/blob.py +0 -0
  27. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/utils/tools/__init__.py +0 -0
  28. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap/utils/tools/excel.py +0 -0
  29. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap.egg-info/dependency_links.txt +0 -0
  30. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap.egg-info/entry_points.txt +0 -0
  31. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap.egg-info/requires.txt +0 -0
  32. {luxorasap-0.1.39 → luxorasap-0.2.0}/src/luxorasap.egg-info/top_level.txt +0 -0
  33. {luxorasap-0.1.39 → luxorasap-0.2.0}/tests/test_btgapi_auth.py +0 -0
  34. {luxorasap-0.1.39 → luxorasap-0.2.0}/tests/test_btgapi_reports.py +0 -0
  35. {luxorasap-0.1.39 → luxorasap-0.2.0}/tests/test_btgapi_trades.py +0 -0
  36. {luxorasap-0.1.39 → luxorasap-0.2.0}/tests/test_datareader.py +0 -0
  37. {luxorasap-0.1.39 → luxorasap-0.2.0}/tests/test_ingest_cloud.py +0 -0
  38. {luxorasap-0.1.39 → luxorasap-0.2.0}/tests/test_ingest_legacy_local.py +0 -0
  39. {luxorasap-0.1.39 → luxorasap-0.2.0}/tests/test_utils_dataframe.py +0 -0
  40. {luxorasap-0.1.39 → luxorasap-0.2.0}/tests/test_utils_storage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: luxorasap
3
- Version: 0.1.39
3
+ Version: 0.2.0
4
4
  Summary: Toolbox da Luxor para ingestão, análise e automação de dados financeiros.
5
5
  Author-email: Luxor Group <backoffice@luxor.com.br>
6
6
  License: Proprietary – All rights reserved
@@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta"
10
10
  #############################
11
11
  [project]
12
12
  name = "luxorasap"
13
- version = "0.1.39"
13
+ version = "0.2.0"
14
14
  description = "Toolbox da Luxor para ingestão, análise e automação de dados financeiros."
15
15
  readme = "README.md"
16
16
  requires-python = ">=3.9"
@@ -78,7 +78,7 @@ exclude = ["tests*"]
78
78
  # bumpver (sem-ver)
79
79
  #############################
80
80
  [tool.bumpver]
81
- current_version = "0.1.39"
81
+ current_version = "0.2.0"
82
82
  version_pattern = "MAJOR.MINOR.PATCH"
83
83
 
84
84
  # regex explícito – obrigatório no bumpver 2024+
@@ -13,7 +13,7 @@ from types import ModuleType
13
13
  try:
14
14
  __version__: str = metadata.version(__name__)
15
15
  except metadata.PackageNotFoundError: # editable install
16
- __version__ = "0.1.39"
16
+ __version__ = "0.2.0"
17
17
 
18
18
  # ─── Lazy loader ─────────────────────────────────────────────────
19
19
  def __getattr__(name: str) -> ModuleType:
@@ -0,0 +1,12 @@
1
+ from .blob import BlobParquetClient, BlobPickleClient, BlobExcelClient, delete_blob, list_blob_files
2
+ from .change_tracker import BlobChangeWatcher, BlobMetadata
3
+
4
+ __all__ = [
5
+ "BlobParquetClient",
6
+ "BlobPickleClient",
7
+ "BlobExcelClient",
8
+ "delete_blob",
9
+ "list_blob_files",
10
+ "BlobChangeWatcher",
11
+ "BlobMetadata",
12
+ ]
@@ -0,0 +1,294 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, asdict
4
+ from datetime import datetime, timezone
5
+ from typing import Dict, List, Optional, Sequence, Tuple
6
+
7
+ from azure.core.exceptions import ResourceNotFoundError
8
+ from azure.storage.blob import BlobServiceClient
9
+
10
+ # Reuso dos utilitários que você já tem no projeto
11
+ from luxorasap.utils.storage.blob import BlobPickleClient
12
+ import os
13
+
14
+
15
+ # ──────────────────────────────────────────────────────────────────────────────
16
+ # Tipos de dados
17
+ # ──────────────────────────────────────────────────────────────────────────────
18
+
19
+ @dataclass(frozen=True)
20
+ class BlobMetadata:
21
+ """
22
+ Conjunto mínimo de informações para detectar mudanças em um blob.
23
+ """
24
+ last_modified_utc: datetime # timezone-aware, sempre em UTC
25
+ etag: str
26
+ size_bytes: int
27
+
28
+ @staticmethod
29
+ def from_blob_properties(props) -> "BlobMetadata":
30
+ """
31
+ Constrói BlobMetadata a partir de BlobProperties (SDK azure.storage.blob).
32
+ Garante que last_modified seja timezone-aware em UTC.
33
+ """
34
+ last_mod = props.last_modified
35
+ if last_mod.tzinfo is None:
36
+ last_mod = last_mod.replace(tzinfo=timezone.utc)
37
+ else:
38
+ last_mod = last_mod.astimezone(timezone.utc)
39
+
40
+ return BlobMetadata(
41
+ last_modified_utc=last_mod,
42
+ etag=props.etag,
43
+ size_bytes=int(props.size),
44
+ )
45
+
46
+ def to_dict(self) -> Dict:
47
+ d = asdict(self)
48
+ d["last_modified_utc"] = self.last_modified_utc.isoformat()
49
+ return d
50
+
51
+ @staticmethod
52
+ def from_dict(d: Dict) -> "BlobMetadata":
53
+ lm = d["last_modified_utc"]
54
+ if isinstance(lm, str):
55
+ lm = datetime.fromisoformat(lm)
56
+ if lm.tzinfo is None:
57
+ lm = lm.replace(tzinfo=timezone.utc)
58
+ else:
59
+ lm = lm.astimezone(timezone.utc)
60
+ return BlobMetadata(last_modified_utc=lm, etag=d["etag"], size_bytes=int(d["size_bytes"]))
61
+
62
+
63
+ # ──────────────────────────────────────────────────────────────────────────────
64
+ # Watcher (com persistência em pickle no próprio ADLS)
65
+ # ──────────────────────────────────────────────────────────────────────────────
66
+
67
+ class BlobChangeWatcher:
68
+ """
69
+ Verificador de mudanças de blobs, com snapshot persistido via Pickle no ADLS.
70
+
71
+ Snapshot salvo como dict:
72
+ {
73
+ "<blob_path>": {"last_modified_utc": "...", "etag": "...", "size_bytes": int},
74
+ ...
75
+ }
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ *,
81
+ adls_connection_string: Optional[str] = None,
82
+ container: str = "luxorasap",
83
+ snapshot_blob_path: str = "system/state",
84
+ watcher_id: str = "blob_change_watcher.pkl",
85
+ treat_missing_as_changed: bool = True,
86
+ ) -> None:
87
+ """
88
+ Args:
89
+ adls_connection_string: Se None, usa AZURE_STORAGE_CONNECTION_STRING do ambiente.
90
+ container: Nome do container onde estão os blobs (e onde ficará o snapshot).
91
+ snapshot_blob_path: Caminho do arquivo pickle (no próprio container) que guarda o snapshot.
92
+ treat_missing_as_changed: Se True, um blob observado pela primeira vez é considerado "mudado".
93
+ """
94
+
95
+ if adls_connection_string is None:
96
+ adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
97
+
98
+ if adls_connection_string is None:
99
+ raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
100
+
101
+ self._container_name = container
102
+ self._snapshot_blob_path = f"{snapshot_blob_path}/{watcher_id}"
103
+ self._treat_missing_as_changed = treat_missing_as_changed
104
+
105
+ # Clientes
106
+ self._blob_service = BlobServiceClient.from_connection_string(adls_connection_string)
107
+ self._container_client = self._blob_service.get_container_client(self._container_name)
108
+ self._pickle_client = BlobPickleClient(
109
+ adls_connection_string=adls_connection_string,
110
+ container=self._container_name,
111
+ )
112
+
113
+ # Estado em memória
114
+ self._snapshot: Dict[str, Dict] = {}
115
+
116
+ # Carrega snapshot na inicialização (se não existir, começa vazio)
117
+ self._load_snapshot()
118
+
119
+ # ───────────────────────────── Persistência do snapshot ─────────────────────────────
120
+
121
+ def _load_snapshot(self) -> None:
122
+ """
123
+ Carrega o snapshot do ADLS (pickle).
124
+ Se não existir ou estiver inválido, inicia com dicionário vazio.
125
+ """
126
+ try:
127
+ data = self._pickle_client.read_pickle(self._snapshot_blob_path)
128
+ self._snapshot = data if isinstance(data, dict) else {}
129
+ except FileNotFoundError:
130
+ self._snapshot = {}
131
+ except Exception:
132
+ # Corrupção/versão antiga/etc → começa do zero
133
+ self._snapshot = {}
134
+
135
+
136
+ def _save_snapshot(self) -> None:
137
+ """
138
+ Salva o snapshot atual no ADLS via pickle.
139
+ """
140
+ self._pickle_client.write_pickle(self._snapshot, self._snapshot_blob_path)
141
+
142
+ # ───────────────────────────── Acesso a propriedades remotas ─────────────────────────────
143
+
144
+ def _fetch_remote_metadata(self, blob_path: str) -> BlobMetadata:
145
+ """
146
+ Busca metadados atuais do blob no ADLS.
147
+ Raises:
148
+ ResourceNotFoundError se o blob não existir.
149
+ """
150
+ props = self._container_client.get_blob_client(blob_path).get_blob_properties()
151
+ return BlobMetadata.from_blob_properties(props)
152
+
153
+ def _get_snapshot_metadata(self, blob_path: str) -> Optional[BlobMetadata]:
154
+ """
155
+ Retorna o metadata salvo no snapshot (se houver).
156
+ """
157
+ raw = self._snapshot.get(blob_path)
158
+ return BlobMetadata.from_dict(raw) if raw else None
159
+
160
+ # ───────────────────────────── API pública ─────────────────────────────
161
+
162
+ def has_changed(
163
+ self,
164
+ blob_path: str,
165
+ *,
166
+ update_snapshot: bool = False,
167
+ treat_missing_as_changed: Optional[bool] = None,
168
+ ) -> Tuple[bool, Optional[BlobMetadata], Optional[BlobMetadata]]:
169
+ """
170
+ Verifica se o blob mudou desde o snapshot anterior.
171
+
172
+ Args:
173
+ blob_path: Caminho do blob (ex.: "raw/xlsx/trades.xlsx").
174
+ update_snapshot: Se True, grava o novo estado no snapshot quando houver mudança.
175
+ treat_missing_as_changed: Override local para a regra de "primeira vez conta como mudança?".
176
+
177
+ Returns:
178
+ (mudou?, metadata_antigo, metadata_atual)
179
+ """
180
+ if treat_missing_as_changed is None:
181
+ treat_missing_as_changed = self._treat_missing_as_changed
182
+
183
+ previous = self._get_snapshot_metadata(blob_path)
184
+
185
+ # Se o blob não existe mais no remoto:
186
+ try:
187
+ current = self._fetch_remote_metadata(blob_path)
188
+ except ResourceNotFoundError:
189
+ changed = previous is not None
190
+ if update_snapshot and changed:
191
+ # remove do snapshot porque o blob foi apagado
192
+ self._snapshot.pop(blob_path, None)
193
+ self._save_snapshot()
194
+ return changed, previous, None
195
+
196
+ # Primeira observação desse blob?
197
+ if previous is None:
198
+ changed = bool(treat_missing_as_changed)
199
+ else:
200
+ # Critério de mudança (ordem de “força”: etag > last_modified > size)
201
+ changed = (
202
+ current.etag != previous.etag
203
+ or current.last_modified_utc != previous.last_modified_utc
204
+ or current.size_bytes != previous.size_bytes
205
+ )
206
+
207
+ if update_snapshot and changed:
208
+ self._snapshot[blob_path] = current.to_dict()
209
+ self._save_snapshot()
210
+
211
+ return changed, previous, current
212
+
213
+
214
+ def update_snapshot(self, blob_path: str) -> Optional[BlobMetadata]:
215
+ """
216
+ Força a atualização do snapshot para refletir o estado atual do blob.
217
+ Se o blob não existir, remove do snapshot e retorna None.
218
+ """
219
+ try:
220
+ current = self._fetch_remote_metadata(blob_path)
221
+ except ResourceNotFoundError:
222
+ self._snapshot.pop(blob_path, None)
223
+ self._save_snapshot()
224
+ return None
225
+
226
+ self._snapshot[blob_path] = current.to_dict()
227
+ self._save_snapshot()
228
+ return current
229
+
230
+
231
+ def mark_as_synchronized(self, blob_path: str, metadata: Optional[BlobMetadata] = None) -> None:
232
+ """
233
+ Marca explicitamente um blob como “sincronizado” no snapshot (ex.: após processar um pipeline).
234
+ Se `metadata` não for informado, consulta o estado atual no ADLS.
235
+ """
236
+ if metadata is None:
237
+ metadata = self._fetch_remote_metadata(blob_path)
238
+ self._snapshot[blob_path] = metadata.to_dict()
239
+ self._save_snapshot()
240
+
241
+
242
+ def list_changed_under_prefix(
243
+ self,
244
+ prefix: str,
245
+ *,
246
+ allowed_extensions: Optional[Sequence[str]] = None,
247
+ update_snapshot: bool = False,
248
+ ) -> List[str]:
249
+ """
250
+ Varre todos os blobs sob um prefixo e retorna a lista dos que mudaram
251
+ segundo as regras de comparação de metadados.
252
+
253
+ Args:
254
+ prefix: Ex.: "enriched/parquet/fundos" (com ou sem barra final).
255
+ allowed_extensions: Ex.: [".parquet", ".xlsx"] para filtrar por sufixo.
256
+ update_snapshot: Se True, atualiza o snapshot para os que mudaram.
257
+
258
+ Returns:
259
+ Lista de paths de blobs que mudaram.
260
+ """
261
+ if prefix and not prefix.endswith("/"):
262
+ prefix += "/"
263
+
264
+ extensions = tuple(e.lower() for e in (allowed_extensions or []))
265
+ changed_paths: List[str] = []
266
+
267
+ for blob_item in self._container_client.list_blobs(name_starts_with=prefix):
268
+ name = blob_item.name
269
+ if name.endswith("/"):
270
+ continue
271
+ if extensions and not name.lower().endswith(extensions):
272
+ continue
273
+
274
+ previous = self._get_snapshot_metadata(name)
275
+ current = BlobMetadata.from_blob_properties(blob_item)
276
+
277
+ if previous is None:
278
+ has_changed = self._treat_missing_as_changed
279
+ else:
280
+ has_changed = (
281
+ current.etag != previous.etag
282
+ or current.last_modified_utc != previous.last_modified_utc
283
+ or current.size_bytes != previous.size_bytes
284
+ )
285
+
286
+ if has_changed:
287
+ changed_paths.append(name)
288
+ if update_snapshot:
289
+ self._snapshot[name] = current.to_dict()
290
+
291
+ if update_snapshot and changed_paths:
292
+ self._save_snapshot()
293
+
294
+ return changed_paths
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: luxorasap
3
- Version: 0.1.39
3
+ Version: 0.2.0
4
4
  Summary: Toolbox da Luxor para ingestão, análise e automação de dados financeiros.
5
5
  Author-email: Luxor Group <backoffice@luxor.com.br>
6
6
  License: Proprietary – All rights reserved
@@ -22,6 +22,7 @@ src/luxorasap/utils/dataframe/reader.py
22
22
  src/luxorasap/utils/dataframe/transforms.py
23
23
  src/luxorasap/utils/storage/__init__.py
24
24
  src/luxorasap/utils/storage/blob.py
25
+ src/luxorasap/utils/storage/change_tracker.py
25
26
  src/luxorasap/utils/tools/__init__.py
26
27
  src/luxorasap/utils/tools/excel.py
27
28
  tests/test_btgapi_auth.py
@@ -30,5 +31,7 @@ tests/test_btgapi_trades.py
30
31
  tests/test_datareader.py
31
32
  tests/test_ingest_cloud.py
32
33
  tests/test_ingest_legacy_local.py
34
+ tests/test_utils_change_tracker.py
33
35
  tests/test_utils_dataframe.py
34
- tests/test_utils_storage.py
36
+ tests/test_utils_storage.py
37
+ tests/tests_utils_pickle_excel.py
@@ -0,0 +1,180 @@
1
+ import datetime as dt
2
+ from types import SimpleNamespace
3
+ from datetime import timezone
4
+
5
+ import pytest
6
+
7
+ from luxorasap.utils.storage import BlobChangeWatcher, BlobMetadata
8
+
9
+
10
+ # ------------------------ Fakes do SDK Azure ------------------------
11
+
12
+ class FakeDownload:
13
+ def __init__(self, content: bytes):
14
+ self._content = content
15
+ def readall(self):
16
+ return self._content
17
+
18
+ class FakeBlobClient:
19
+ def __init__(self, name, props=None, store=None):
20
+ self._name = name
21
+ self._props = props # SimpleNamespace(last_modified, etag, size)
22
+ self._store = store # dict[name] -> bytes (pkl snapshot)
23
+ def get_blob_properties(self):
24
+ if self._props is None:
25
+ from azure.core.exceptions import ResourceNotFoundError
26
+ raise ResourceNotFoundError("not found")
27
+ return self._props
28
+ def download_blob(self, lease=None):
29
+ # apenas para o snapshot (state pickle)
30
+ data = self._store.get(self._name, b"")
31
+ return FakeDownload(data)
32
+
33
+ class FakeContainerClient:
34
+ def __init__(self, blobs, store):
35
+ # blobs: dict[name] -> SimpleNamespace(last_modified, etag, size)
36
+ self._blobs = blobs
37
+ self._store = store
38
+ def get_blob_client(self, name):
39
+ props = self._blobs.get(name)
40
+ return FakeBlobClient(name, props=props, store=self._store)
41
+ def list_blobs(self, name_starts_with=""):
42
+ for name, props in self._blobs.items():
43
+ if name.startswith(name_starts_with):
44
+ # Azure devolve itens com .name + props
45
+ item = SimpleNamespace(
46
+ name=name,
47
+ last_modified=props.last_modified,
48
+ etag=props.etag,
49
+ size=props.size,
50
+ )
51
+ yield item
52
+ def upload_blob(self, name, data, overwrite=False):
53
+ # usado para salvar o snapshot .pkl
54
+ content = data.read() if hasattr(data, "read") else data
55
+ self._store[name] = content
56
+ return SimpleNamespace() # dummy
57
+
58
+ class FakeBlobServiceClient:
59
+ def __init__(self, container_client):
60
+ self._cc = container_client
61
+ def get_container_client(self, container):
62
+ return self._cc
63
+
64
+ # ------------------------ Fixtures ------------------------
65
+
66
+ @pytest.fixture
67
+ def fake_now():
68
+ return dt.datetime(2025, 8, 25, 12, 0, 0, tzinfo=timezone.utc)
69
+
70
+ @pytest.fixture
71
+ def azure_mocks(monkeypatch, fake_now):
72
+ """
73
+ Prepara um container fake com 2 blobs e storage em memória para o snapshot pkl.
74
+ """
75
+ # blobs existentes no "ADLS"
76
+ blobs = {
77
+ "raw/x/a.xlsx": SimpleNamespace(
78
+ last_modified=fake_now, etag='"v1-a"', size=100
79
+ ),
80
+ "raw/p/tb.parquet": SimpleNamespace(
81
+ last_modified=fake_now, etag='"v1-p"', size=500
82
+ ),
83
+ }
84
+ # storage em memória para o snapshot .pkl
85
+ store = {}
86
+
87
+ cc = FakeContainerClient(blobs=blobs, store=store)
88
+ bsc = FakeBlobServiceClient(container_client=cc)
89
+
90
+ # patcha o construtor real para devolver o fake
91
+ import luxorasap.utils.storage.change_tracker as mod
92
+ monkeypatch.setattr(mod, "BlobServiceClient", SimpleNamespace(from_connection_string=lambda *_args, **_kw: bsc))
93
+
94
+ return SimpleNamespace(blobs=blobs, store=store, cc=cc, bsc=bsc)
95
+
96
+
97
+ # ------------------------ Testes ------------------------
98
+
99
+
100
+ def test_no_change_is_false(azure_mocks):
101
+ watcher = BlobChangeWatcher(
102
+ container="luxorasap",
103
+ snapshot_blob_path="system/state/tests",
104
+ watcher_id='test_watcher.pkl',
105
+ treat_missing_as_changed=True,
106
+ )
107
+ # primeira vez -> muda
108
+ watcher.has_changed("raw/p/tb.parquet", update_snapshot=True)
109
+
110
+ # mesma versão -> não muda
111
+ changed, prev, curr = watcher.has_changed("raw/p/tb.parquet", update_snapshot=False)
112
+ assert changed is False
113
+ assert prev is not None and curr is not None
114
+ assert prev.etag == curr.etag
115
+ assert prev.size_bytes == curr.size_bytes
116
+
117
+
118
+ def test_change_by_etag_or_size_is_true(monkeypatch, azure_mocks, fake_now):
119
+ watcher = BlobChangeWatcher(
120
+ container="luxorasap",
121
+ snapshot_blob_path="system/state/tests",
122
+ watcher_id='test_watcher.pkl'
123
+ )
124
+ # baseline
125
+ watcher.has_changed("raw/x/a.xlsx", update_snapshot=True)
126
+
127
+ # muda etag
128
+ azure_mocks.blobs["raw/x/a.xlsx"].etag = '"v2-a"'
129
+ changed, prev, curr = watcher.has_changed("raw/x/a.xlsx", update_snapshot=False)
130
+ assert changed is True
131
+
132
+ # aplica snapshot
133
+ watcher.has_changed("raw/x/a.xlsx", update_snapshot=True)
134
+
135
+ # muda apenas tamanho
136
+ azure_mocks.blobs["raw/x/a.xlsx"].size = 200
137
+ changed2, _, _ = watcher.has_changed("raw/x/a.xlsx", update_snapshot=False)
138
+ assert changed2 is True
139
+
140
+
141
+ def test_deleted_blob_is_considered_changed_if_was_known(azure_mocks):
142
+ watcher = BlobChangeWatcher(
143
+ container="luxorasap",
144
+ snapshot_blob_path="system/state/tests",
145
+ watcher_id='test_watcher.pkl'
146
+ )
147
+ # primeiro registra
148
+ watcher.has_changed("raw/p/tb.parquet", update_snapshot=True)
149
+
150
+ # remove do conjunto remoto
151
+ azure_mocks.blobs.pop("raw/p/tb.parquet")
152
+
153
+ changed, prev, curr = watcher.has_changed("raw/p/tb.parquet", update_snapshot=True)
154
+ assert changed is True
155
+ assert prev is not None
156
+ assert curr is None # não existe mais
157
+
158
+
159
+ def test_list_changed_under_prefix_filters_and_updates(azure_mocks, fake_now):
160
+ watcher = BlobChangeWatcher(
161
+ container="luxorasap",
162
+ snapshot_blob_path="system/state/tests",
163
+ watcher_id='test_watcher.pkl'
164
+ )
165
+ # primeira varredura (primeira vez conta como mudança)
166
+ changed = watcher.list_changed_under_prefix(
167
+ "raw/",
168
+ allowed_extensions=[".xlsx"],
169
+ update_snapshot=True,
170
+ )
171
+ assert changed == ["raw/x/a.xlsx"]
172
+
173
+ # altera parquet, mas filtro é xlsx, então não deve aparecer
174
+ azure_mocks.blobs["raw/p/tb.parquet"].etag = '"v2-p"'
175
+ changed2 = watcher.list_changed_under_prefix(
176
+ "raw/",
177
+ allowed_extensions=[".xlsx"],
178
+ update_snapshot=True,
179
+ )
180
+ assert changed2 == []
@@ -0,0 +1,92 @@
1
+ import io
2
+ import pickle
3
+ import pandas as pd
4
+ import pytest
5
+ from types import SimpleNamespace
6
+
7
+ # Supondo que as classes estejam em luxorasap.utils.storage.blob
8
+ from luxorasap.utils.storage import BlobPickleClient, BlobExcelClient
9
+
10
+
11
+ # ------------------------ Fakes Azure ------------------------
12
+
13
+ class FakeDownload:
14
+ def __init__(self, content: bytes):
15
+ self._content = content
16
+ def readall(self):
17
+ return self._content
18
+
19
+ class FakeBlobClient:
20
+ def __init__(self, name, store):
21
+ self._name = name
22
+ self._store = store
23
+ def download_blob(self):
24
+ if self._name not in self._store:
25
+ from azure.core.exceptions import ResourceNotFoundError
26
+ raise ResourceNotFoundError("not found")
27
+ return FakeDownload(self._store[self._name])
28
+
29
+ class FakeContainerClient:
30
+ def __init__(self, store):
31
+ self._store = store
32
+ def get_blob_client(self, name):
33
+ return FakeBlobClient(name, self._store)
34
+ def upload_blob(self, name, data, overwrite=False):
35
+ content = data.read() if hasattr(data, "read") else data
36
+ self._store[name] = content
37
+ return SimpleNamespace()
38
+
39
+ class FakeBlobServiceClient:
40
+ def __init__(self, container_client):
41
+ self._cc = container_client
42
+ def get_container_client(self, container):
43
+ return self._cc
44
+
45
+ # ------------------------ Fixtures ------------------------
46
+
47
+ @pytest.fixture
48
+ def mem_store():
49
+ return {}
50
+
51
+ @pytest.fixture
52
+ def patch_blob_clients(monkeypatch, mem_store):
53
+ # Patch para BlobPickleClient / BlobExcelClient usarem o FakeBlobServiceClient
54
+ import luxorasap.utils.storage.blob as mod
55
+ fake_bsc = FakeBlobServiceClient(FakeContainerClient(mem_store))
56
+ monkeypatch.setattr(mod, "BlobServiceClient", SimpleNamespace(from_connection_string=lambda *_a, **_k: fake_bsc))
57
+ return mem_store
58
+
59
+ # ------------------------ Tests Pickle ------------------------
60
+
61
+ def test_pickle_roundtrip(patch_blob_clients):
62
+ client = BlobPickleClient()
63
+ obj = {"a": 1, "b": [1, 2, 3]}
64
+ path = "aux/test/state.pkl"
65
+
66
+ client.write_pickle(obj, path)
67
+ loaded = client.read_pickle(path)
68
+ assert loaded == obj
69
+
70
+ def test_pickle_read_missing_raises(monkeypatch, patch_blob_clients):
71
+ client = BlobPickleClient()
72
+ with pytest.raises(Exception):
73
+ client.read_pickle("aux/missing.pkl")
74
+
75
+ # ------------------------ Tests Excel ------------------------
76
+
77
+ @pytest.mark.skipif(
78
+ pytest.importorskip("openpyxl", reason="openpyxl é necessário para testar Excel") is None,
79
+ reason="openpyxl não disponível",
80
+ )
81
+ def test_excel_roundtrip(patch_blob_clients, tmp_path):
82
+ df = pd.DataFrame({"Nome": ["Ana", "Bruno"], "Idade": [28, 35]})
83
+ client = BlobExcelClient()
84
+
85
+ blob_path = "reports/teste.xlsx"
86
+ client.write_excel(df, blob_path, index=False)
87
+
88
+ df2 = client.read_excel(blob_path)
89
+ # Comparação tolerante a tipos (pandas pode alterar dtype ao ler)
90
+ assert df2.shape == df.shape
91
+ assert list(df2.columns) == list(df.columns)
92
+ assert df2.astype(str).equals(df.astype(str))
@@ -1,2 +0,0 @@
1
- from .blob import BlobParquetClient, BlobPickleClient, BlobExcelClient, delete_blob, list_blob_files
2
- __all__ = ["BlobParquetClient", "BlobPickleClient", "BlobExcelClient", "delete_blob", "list_blob_files"]
File without changes
File without changes