luxorasap 0.1.38__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
luxorasap/__init__.py CHANGED
@@ -13,7 +13,7 @@ from types import ModuleType
13
13
  try:
14
14
  __version__: str = metadata.version(__name__)
15
15
  except metadata.PackageNotFoundError: # editable install
16
- __version__ = "0.1.38"
16
+ __version__ = "0.2.0"
17
17
 
18
18
  # ─── Lazy loader ─────────────────────────────────────────────────
19
19
  def __getattr__(name: str) -> ModuleType:
@@ -24,12 +24,15 @@ load_dotenv()
24
24
  #@logger.catch
25
25
  class LuxorQuery:
26
26
 
27
- def __init__(self, update_mode="optimized", is_develop_mode=False, tables_path=None,
28
- blob_directory='enriched/parquet', adls_connection_string:str=None, container_name="luxorasap"):
27
+ # Criando construtor com docstring detalhada
28
+ def __init__(self, blob_directory='enriched/parquet', adls_connection_string:str=None,
29
+ container_name="luxorasap"):
29
30
  """
30
- update_mode:
31
- 'standard' - Carrega todas as tabelas disponiveis
32
- 'optimized' - Carrega apenas as tabelas utilizadas sob demanda
31
+ Classe para consulta de dados da Luxor.
32
+ Args:
33
+ blob_directory (str, optional): Diretório no blob onde estão as tabelas. Defaults to 'enriched/parquet'.
34
+ adls_connection_string (str, optional): String de conexão com o ADLS. Se None, usa variável de ambiente. Defaults to None.
35
+ container_name (str, optional): Nome do container no blob. Defaults to "luxorasap".
33
36
  """
34
37
 
35
38
  self.blob_client = BlobParquetClient(adls_connection_string=adls_connection_string,
@@ -38,10 +41,8 @@ class LuxorQuery:
38
41
 
39
42
 
40
43
  self.modified_tables = []
41
- self.is_develop_mode = is_develop_mode
42
44
 
43
-
44
- self.tables_path = tables_path
45
+
45
46
  #if tables_path is None:
46
47
  # self.tables_path = self.__set_tables_path()
47
48
 
@@ -54,8 +55,6 @@ class LuxorQuery:
54
55
  self.lipi_manga_incorp_date = dt.date(2022,12,9)
55
56
 
56
57
 
57
- self.update_modes_name = {"standard" : 0, "optimized" : 1}
58
- self.update_mode = self.update_modes_name[update_mode]
59
58
  self.update() # Nessa 1° exec. vai inicializar os dicionarios acima
60
59
 
61
60
 
@@ -4,7 +4,7 @@ import pandas as pd
4
4
  import datetime as dt
5
5
  import numpy as np
6
6
 
7
- from luxorasap.utils.storage import BlobParquetClient
7
+ from luxorasap.utils.storage import BlobParquetClient, BlobExcelClient, BlobPickleClient
8
8
  from luxorasap.utils.dataframe import prep_for_save, astype_str_inplace
9
9
  from luxorasap.datareader import LuxorQuery
10
10
 
@@ -12,6 +12,8 @@ from luxorasap.datareader import LuxorQuery
12
12
  __all__ = ["save_table", "incremental_load"]
13
13
 
14
14
  _client = BlobParquetClient() # instância única para o módulo
15
+ _client_excel = None
16
+ _client_pickle = None
15
17
 
16
18
 
17
19
  # ────────────────────────────────────────────────────────────────
@@ -23,7 +25,8 @@ def save_table(
23
25
  index_name: str = "index",
24
26
  normalize_columns: bool = True,
25
27
  directory: str = "enriched/parquet",
26
- override=False
28
+ override=False,
29
+ format='parquet'
27
30
  ):
28
31
  """Salva DataFrame como Parquet em ADLS (sobrescrevendo)."""
29
32
 
@@ -43,9 +46,27 @@ def save_table(
43
46
 
44
47
  df = prep_for_save(df, index=index, index_name=index_name, normalize=normalize_columns)
45
48
 
46
- #_client.write_df(df.astype(str), f"{directory}/{table_name}.parquet")
47
- astype_str_inplace(df)
48
- _client.write_df(df, f"{directory}/{table_name}.parquet")
49
+ if format == 'parquet':
50
+ #_client.write_df(df.astype(str), f"{directory}/{table_name}.parquet")
51
+ astype_str_inplace(df)
52
+ _client.write_df(df, f"{directory}/{table_name}.parquet")
53
+
54
+ elif format == 'excel':
55
+ global _client_excel
56
+ if _client_excel is None:
57
+ _client_excel = BlobExcelClient()
58
+ if index:
59
+ df = df.reset_index().rename(columns={"index": index_name})
60
+ _client_excel.write_excel(df, f"{directory}/{table_name}.xlsx")
61
+
62
+ elif format == 'pickle':
63
+ global _client_pickle
64
+ if _client_pickle is None:
65
+ _client_pickle = BlobPickleClient()
66
+ _client_pickle.write_pickle(df, f"{directory}/{table_name}.pkl")
67
+
68
+ else:
69
+ raise ValueError(f"Formato '{format}' não suportado. Use 'parquet', 'excel' ou 'pickle'.")
49
70
 
50
71
 
51
72
 
@@ -1,2 +1,12 @@
1
- from .blob import BlobParquetClient, BlobPickleClient
2
- __all__ = ["BlobParquetClient", "BlobPickleClient"]
1
+ from .blob import BlobParquetClient, BlobPickleClient, BlobExcelClient, delete_blob, list_blob_files
2
+ from .change_tracker import BlobChangeWatcher, BlobMetadata
3
+
4
+ __all__ = [
5
+ "BlobParquetClient",
6
+ "BlobPickleClient",
7
+ "BlobExcelClient",
8
+ "delete_blob",
9
+ "list_blob_files",
10
+ "BlobChangeWatcher",
11
+ "BlobMetadata",
12
+ ]
@@ -3,8 +3,11 @@ from pathlib import PurePosixPath
3
3
  from datetime import timezone
4
4
  import pandas as pd
5
5
  import pyarrow as pa, pyarrow.parquet as pq
6
- from azure.storage.blob import BlobServiceClient
7
6
  import pickle
7
+ import re
8
+
9
+ from azure.storage.blob import BlobServiceClient
10
+ from azure.core.exceptions import ResourceNotFoundError
8
11
 
9
12
  from ..dataframe import read_bytes
10
13
 
@@ -103,22 +106,158 @@ class BlobPickleClient:
103
106
  if adls_connection_string is None:
104
107
  raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
105
108
 
106
- self.blob_service_client = BlobServiceClient.from_connection_string(adls_connection_string)
107
- self.container_client = self.blob_service_client.get_container_client(container)
109
+ self._svc = BlobServiceClient.from_connection_string(adls_connection_string)
110
+ self._container = self._svc.get_container_client(container)
111
+
108
112
 
109
113
  def write_pickle(self, obj, blob_name: str):
110
114
  """Salva objeto Python (ex: DataFrame) como pickle no blob"""
111
115
  buf = io.BytesIO()
112
116
  pickle.dump(obj, buf)
113
117
  buf.seek(0)
114
- self.container_client.upload_blob(name=blob_name, data=buf, overwrite=True)
118
+ self._container.upload_blob(name=blob_name, data=buf, overwrite=True)
119
+
115
120
 
116
121
  def read_pickle(self, blob_name: str):
117
122
  """Lê pickle do blob e retorna objeto Python"""
118
- downloader = self.container_client.download_blob(blob_name)
123
+ downloader = self._container.download_blob(blob_name)
119
124
  buf = io.BytesIO(downloader.readall())
120
125
  return pickle.load(buf)
121
126
 
127
+
122
128
  def exists(self, blob_name: str) -> bool:
123
- return self.container_client.get_blob_client(blob_name).exists()
129
+ return self._container.get_blob_client(blob_name).exists()
130
+
131
+
132
+
133
+ class BlobExcelClient:
134
+ def __init__(self, *, adls_connection_string: str = None, container: str = "luxorasap"):
135
+ if adls_connection_string is None:
136
+ adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
137
+
138
+ if adls_connection_string is None:
139
+ raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
140
+
141
+ self._svc = BlobServiceClient.from_connection_string(adls_connection_string)
142
+ self._container = self._svc.get_container_client(container)
143
+
144
+
145
+ def write_excel(self, df: pd.DataFrame, blob_name: str, **kwargs):
146
+ """
147
+ Salva um DataFrame como arquivo Excel no blob.
148
+
149
+ Args:
150
+ df (pd.DataFrame): DataFrame a ser salvo
151
+ blob_name (str): caminho/nome do blob (ex: "reports/test.xlsx")
152
+ **kwargs: argumentos extras para `DataFrame.to_excel`
153
+ """
154
+ buf = io.BytesIO()
155
+ df.to_excel(buf, index=False, engine="openpyxl", **kwargs)
156
+ buf.seek(0)
157
+ self._container.upload_blob(name=blob_name, data=buf, overwrite=True)
158
+
159
+
160
+ def read_excel(self, blob_name: str, **kwargs) -> pd.DataFrame:
161
+ """
162
+ Lê um arquivo Excel do blob e retorna um DataFrame.
163
+
164
+ Args:
165
+ blob_name (str): caminho/nome do blob (ex: "reports/test.xlsx")
166
+ **kwargs: argumentos extras para `pd.read_excel`
167
+
168
+ Returns:
169
+ pd.DataFrame
170
+ """
171
+ downloader = self._container.download_blob(blob_name)
172
+ buf = io.BytesIO(downloader.readall())
173
+ return pd.read_excel(buf, engine="openpyxl", **kwargs)
174
+
175
+ def exists(self, blob_name: str) -> bool:
176
+ return self._container.get_blob_client(blob_name).exists()
177
+
178
+
179
+
180
+ def list_blob_files(blob_path: str, container="luxorasap", ends_with: str = None, adls_connection_string: str = None) -> list:
181
+ """
182
+ Lista os arquivos em um diretório do blob storage.
183
+
184
+ Args:
185
+ blob_path (str): O caminho do diretório no blob storage.
186
+ ends_with (str, optional): Filtra os arquivos que terminam com esta string.(Ex.: '.parquet')
187
+
188
+ Returns:
189
+ list: Uma lista de nomes de blob.
190
+
191
+ """
192
+
193
+ if adls_connection_string is None:
194
+ adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
195
+ if adls_connection_string is None:
196
+ raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
197
+
198
+ try:
199
+ svc = BlobServiceClient.from_connection_string(adls_connection_string)
200
+ container_client = svc.get_container_client(container)
201
+ blob_list = container_client.list_blobs(name_starts_with=blob_path)
202
+ if ends_with:
203
+ return [blob.name for blob in blob_list if blob.name.endswith(ends_with)]
204
+ return [blob.name for blob in blob_list]
205
+ except Exception:
206
+ return []
207
+
208
+
209
+ def delete_blob(
210
+ blob_name: str,
211
+ *,
212
+ adls_connection_string: str | None = None,
213
+ container: str = "luxorasap",
214
+ include_snapshots: bool = False,
215
+ ) -> None:
216
+ """
217
+ Exclui com segurança APENAS um arquivo (blob) exato do Azure Blob Storage.
218
+
219
+ Regras de segurança:
220
+ - Recusa nomes que terminem com "/" (prefixos de diretório virtual).
221
+ - Recusa curingas/shell globs (*, ?, []), para evitar exclusões indevidas.
222
+ - Verifica a existência do blob exato antes de remover.
223
+
224
+ Args:
225
+ blob_name: Caminho EXATO do blob (ex.: "enriched/parquet/tabela.parquet").
226
+ adls_connection_string: Se None, lê de AZURE_STORAGE_CONNECTION_STRING.
227
+ container: Nome do container.
228
+ include_snapshots: Se True, apaga snapshots vinculados ao blob.
229
+
230
+ Raises:
231
+ ValueError: Se o nome parecer um diretório/prefixo ou contiver curingas.
232
+ FileNotFoundError: Se o blob exato não existir.
233
+ RuntimeError: Se a conexão com o Azure não estiver configurada.
234
+ """
235
+ if adls_connection_string is None:
236
+ adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
237
+ if adls_connection_string is None:
238
+ raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
239
+
240
+ # 1) Bloqueios contra “diretórios” e curingas
241
+ if blob_name.endswith("/"):
242
+ raise ValueError("Nome termina com '/': recusa exclusão de diretórios/prefixos.")
243
+ if re.search(r"[\*\?\[\]]", blob_name):
244
+ raise ValueError("Curingas encontrados no nome do blob. Informe um arquivo exato.")
245
+
246
+ svc = BlobServiceClient.from_connection_string(adls_connection_string)
247
+ container_client = svc.get_container_client(container)
248
+ blob_client = container_client.get_blob_client(blob_name)
249
+
250
+ # 2) Checa existência do blob exato
251
+ try:
252
+ blob_client.get_blob_properties()
253
+ except ResourceNotFoundError:
254
+ raise FileNotFoundError(f"Blob não encontrado: {blob_name}")
255
+
256
+ # 3) Exclui apenas o alvo exato
257
+ delete_kwargs = {}
258
+ if include_snapshots:
259
+ delete_kwargs["delete_snapshots"] = "include"
260
+
261
+ blob_client.delete_blob(**delete_kwargs)
262
+
124
263
 
@@ -0,0 +1,294 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, asdict
4
+ from datetime import datetime, timezone
5
+ from typing import Dict, List, Optional, Sequence, Tuple
6
+
7
+ from azure.core.exceptions import ResourceNotFoundError
8
+ from azure.storage.blob import BlobServiceClient
9
+
10
+ # Reuso dos utilitários que você já tem no projeto
11
+ from luxorasap.utils.storage.blob import BlobPickleClient
12
+ import os
13
+
14
+
15
+ # ──────────────────────────────────────────────────────────────────────────────
16
+ # Tipos de dados
17
+ # ──────────────────────────────────────────────────────────────────────────────
18
+
19
+ @dataclass(frozen=True)
20
+ class BlobMetadata:
21
+ """
22
+ Conjunto mínimo de informações para detectar mudanças em um blob.
23
+ """
24
+ last_modified_utc: datetime # timezone-aware, sempre em UTC
25
+ etag: str
26
+ size_bytes: int
27
+
28
+ @staticmethod
29
+ def from_blob_properties(props) -> "BlobMetadata":
30
+ """
31
+ Constrói BlobMetadata a partir de BlobProperties (SDK azure.storage.blob).
32
+ Garante que last_modified seja timezone-aware em UTC.
33
+ """
34
+ last_mod = props.last_modified
35
+ if last_mod.tzinfo is None:
36
+ last_mod = last_mod.replace(tzinfo=timezone.utc)
37
+ else:
38
+ last_mod = last_mod.astimezone(timezone.utc)
39
+
40
+ return BlobMetadata(
41
+ last_modified_utc=last_mod,
42
+ etag=props.etag,
43
+ size_bytes=int(props.size),
44
+ )
45
+
46
+ def to_dict(self) -> Dict:
47
+ d = asdict(self)
48
+ d["last_modified_utc"] = self.last_modified_utc.isoformat()
49
+ return d
50
+
51
+ @staticmethod
52
+ def from_dict(d: Dict) -> "BlobMetadata":
53
+ lm = d["last_modified_utc"]
54
+ if isinstance(lm, str):
55
+ lm = datetime.fromisoformat(lm)
56
+ if lm.tzinfo is None:
57
+ lm = lm.replace(tzinfo=timezone.utc)
58
+ else:
59
+ lm = lm.astimezone(timezone.utc)
60
+ return BlobMetadata(last_modified_utc=lm, etag=d["etag"], size_bytes=int(d["size_bytes"]))
61
+
62
+
63
+ # ──────────────────────────────────────────────────────────────────────────────
64
+ # Watcher (com persistência em pickle no próprio ADLS)
65
+ # ──────────────────────────────────────────────────────────────────────────────
66
+
67
+ class BlobChangeWatcher:
68
+ """
69
+ Verificador de mudanças de blobs, com snapshot persistido via Pickle no ADLS.
70
+
71
+ Snapshot salvo como dict:
72
+ {
73
+ "<blob_path>": {"last_modified_utc": "...", "etag": "...", "size_bytes": int},
74
+ ...
75
+ }
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ *,
81
+ adls_connection_string: Optional[str] = None,
82
+ container: str = "luxorasap",
83
+ snapshot_blob_path: str = "system/state",
84
+ watcher_id: str = "blob_change_watcher.pkl",
85
+ treat_missing_as_changed: bool = True,
86
+ ) -> None:
87
+ """
88
+ Args:
89
+ adls_connection_string: Se None, usa AZURE_STORAGE_CONNECTION_STRING do ambiente.
90
+ container: Nome do container onde estão os blobs (e onde ficará o snapshot).
91
+ snapshot_blob_path: Caminho do arquivo pickle (no próprio container) que guarda o snapshot.
92
+ treat_missing_as_changed: Se True, um blob observado pela primeira vez é considerado "mudado".
93
+ """
94
+
95
+ if adls_connection_string is None:
96
+ adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
97
+
98
+ if adls_connection_string is None:
99
+ raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
100
+
101
+ self._container_name = container
102
+ self._snapshot_blob_path = f"{snapshot_blob_path}/{watcher_id}"
103
+ self._treat_missing_as_changed = treat_missing_as_changed
104
+
105
+ # Clientes
106
+ self._blob_service = BlobServiceClient.from_connection_string(adls_connection_string)
107
+ self._container_client = self._blob_service.get_container_client(self._container_name)
108
+ self._pickle_client = BlobPickleClient(
109
+ adls_connection_string=adls_connection_string,
110
+ container=self._container_name,
111
+ )
112
+
113
+ # Estado em memória
114
+ self._snapshot: Dict[str, Dict] = {}
115
+
116
+ # Carrega snapshot na inicialização (se não existir, começa vazio)
117
+ self._load_snapshot()
118
+
119
+ # ───────────────────────────── Persistência do snapshot ─────────────────────────────
120
+
121
+ def _load_snapshot(self) -> None:
122
+ """
123
+ Carrega o snapshot do ADLS (pickle).
124
+ Se não existir ou estiver inválido, inicia com dicionário vazio.
125
+ """
126
+ try:
127
+ data = self._pickle_client.read_pickle(self._snapshot_blob_path)
128
+ self._snapshot = data if isinstance(data, dict) else {}
129
+ except FileNotFoundError:
130
+ self._snapshot = {}
131
+ except Exception:
132
+ # Corrupção/versão antiga/etc → começa do zero
133
+ self._snapshot = {}
134
+
135
+
136
+ def _save_snapshot(self) -> None:
137
+ """
138
+ Salva o snapshot atual no ADLS via pickle.
139
+ """
140
+ self._pickle_client.write_pickle(self._snapshot, self._snapshot_blob_path)
141
+
142
+ # ───────────────────────────── Acesso a propriedades remotas ─────────────────────────────
143
+
144
+ def _fetch_remote_metadata(self, blob_path: str) -> BlobMetadata:
145
+ """
146
+ Busca metadados atuais do blob no ADLS.
147
+ Raises:
148
+ ResourceNotFoundError se o blob não existir.
149
+ """
150
+ props = self._container_client.get_blob_client(blob_path).get_blob_properties()
151
+ return BlobMetadata.from_blob_properties(props)
152
+
153
+ def _get_snapshot_metadata(self, blob_path: str) -> Optional[BlobMetadata]:
154
+ """
155
+ Retorna o metadata salvo no snapshot (se houver).
156
+ """
157
+ raw = self._snapshot.get(blob_path)
158
+ return BlobMetadata.from_dict(raw) if raw else None
159
+
160
+ # ───────────────────────────── API pública ─────────────────────────────
161
+
162
+ def has_changed(
163
+ self,
164
+ blob_path: str,
165
+ *,
166
+ update_snapshot: bool = False,
167
+ treat_missing_as_changed: Optional[bool] = None,
168
+ ) -> Tuple[bool, Optional[BlobMetadata], Optional[BlobMetadata]]:
169
+ """
170
+ Verifica se o blob mudou desde o snapshot anterior.
171
+
172
+ Args:
173
+ blob_path: Caminho do blob (ex.: "raw/xlsx/trades.xlsx").
174
+ update_snapshot: Se True, grava o novo estado no snapshot quando houver mudança.
175
+ treat_missing_as_changed: Override local para a regra de "primeira vez conta como mudança?".
176
+
177
+ Returns:
178
+ (mudou?, metadata_antigo, metadata_atual)
179
+ """
180
+ if treat_missing_as_changed is None:
181
+ treat_missing_as_changed = self._treat_missing_as_changed
182
+
183
+ previous = self._get_snapshot_metadata(blob_path)
184
+
185
+ # Se o blob não existe mais no remoto:
186
+ try:
187
+ current = self._fetch_remote_metadata(blob_path)
188
+ except ResourceNotFoundError:
189
+ changed = previous is not None
190
+ if update_snapshot and changed:
191
+ # remove do snapshot porque o blob foi apagado
192
+ self._snapshot.pop(blob_path, None)
193
+ self._save_snapshot()
194
+ return changed, previous, None
195
+
196
+ # Primeira observação desse blob?
197
+ if previous is None:
198
+ changed = bool(treat_missing_as_changed)
199
+ else:
200
+ # Critério de mudança (ordem de “força”: etag > last_modified > size)
201
+ changed = (
202
+ current.etag != previous.etag
203
+ or current.last_modified_utc != previous.last_modified_utc
204
+ or current.size_bytes != previous.size_bytes
205
+ )
206
+
207
+ if update_snapshot and changed:
208
+ self._snapshot[blob_path] = current.to_dict()
209
+ self._save_snapshot()
210
+
211
+ return changed, previous, current
212
+
213
+
214
+ def update_snapshot(self, blob_path: str) -> Optional[BlobMetadata]:
215
+ """
216
+ Força a atualização do snapshot para refletir o estado atual do blob.
217
+ Se o blob não existir, remove do snapshot e retorna None.
218
+ """
219
+ try:
220
+ current = self._fetch_remote_metadata(blob_path)
221
+ except ResourceNotFoundError:
222
+ self._snapshot.pop(blob_path, None)
223
+ self._save_snapshot()
224
+ return None
225
+
226
+ self._snapshot[blob_path] = current.to_dict()
227
+ self._save_snapshot()
228
+ return current
229
+
230
+
231
+ def mark_as_synchronized(self, blob_path: str, metadata: Optional[BlobMetadata] = None) -> None:
232
+ """
233
+ Marca explicitamente um blob como “sincronizado” no snapshot (ex.: após processar um pipeline).
234
+ Se `metadata` não for informado, consulta o estado atual no ADLS.
235
+ """
236
+ if metadata is None:
237
+ metadata = self._fetch_remote_metadata(blob_path)
238
+ self._snapshot[blob_path] = metadata.to_dict()
239
+ self._save_snapshot()
240
+
241
+
242
+ def list_changed_under_prefix(
243
+ self,
244
+ prefix: str,
245
+ *,
246
+ allowed_extensions: Optional[Sequence[str]] = None,
247
+ update_snapshot: bool = False,
248
+ ) -> List[str]:
249
+ """
250
+ Varre todos os blobs sob um prefixo e retorna a lista dos que mudaram
251
+ segundo as regras de comparação de metadados.
252
+
253
+ Args:
254
+ prefix: Ex.: "enriched/parquet/fundos" (com ou sem barra final).
255
+ allowed_extensions: Ex.: [".parquet", ".xlsx"] para filtrar por sufixo.
256
+ update_snapshot: Se True, atualiza o snapshot para os que mudaram.
257
+
258
+ Returns:
259
+ Lista de paths de blobs que mudaram.
260
+ """
261
+ if prefix and not prefix.endswith("/"):
262
+ prefix += "/"
263
+
264
+ extensions = tuple(e.lower() for e in (allowed_extensions or []))
265
+ changed_paths: List[str] = []
266
+
267
+ for blob_item in self._container_client.list_blobs(name_starts_with=prefix):
268
+ name = blob_item.name
269
+ if name.endswith("/"):
270
+ continue
271
+ if extensions and not name.lower().endswith(extensions):
272
+ continue
273
+
274
+ previous = self._get_snapshot_metadata(name)
275
+ current = BlobMetadata.from_blob_properties(blob_item)
276
+
277
+ if previous is None:
278
+ has_changed = self._treat_missing_as_changed
279
+ else:
280
+ has_changed = (
281
+ current.etag != previous.etag
282
+ or current.last_modified_utc != previous.last_modified_utc
283
+ or current.size_bytes != previous.size_bytes
284
+ )
285
+
286
+ if has_changed:
287
+ changed_paths.append(name)
288
+ if update_snapshot:
289
+ self._snapshot[name] = current.to_dict()
290
+
291
+ if update_snapshot and changed_paths:
292
+ self._save_snapshot()
293
+
294
+ return changed_paths
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: luxorasap
3
- Version: 0.1.38
3
+ Version: 0.2.0
4
4
  Summary: Toolbox da Luxor para ingestão, análise e automação de dados financeiros.
5
5
  Author-email: Luxor Group <backoffice@luxor.com.br>
6
6
  License: Proprietary – All rights reserved
@@ -1,23 +1,24 @@
1
- luxorasap/__init__.py,sha256=9J8487iln_1ebmUvAHnSt7P0PInY2nZhwzmH_MqL1NU,1356
1
+ luxorasap/__init__.py,sha256=4hN1iSr_kNBePyZxQAd1K2o0jJGru697Yy-ln78rRVA,1355
2
2
  luxorasap/btgapi/__init__.py,sha256=QUlfb5oiBY6K1Q5x4-a-x2wECe1At5wc2962I5odOJk,620
3
3
  luxorasap/btgapi/auth.py,sha256=PvyCtbEyBO2B1CIeAlNXWugKW1OgiKfPcVzS6K5FBnQ,1872
4
4
  luxorasap/btgapi/reports.py,sha256=ZVEMLoJPXc0r3XjPJPMsKQN0zZd1Npd7umNpAj1bncs,8040
5
5
  luxorasap/btgapi/trades.py,sha256=956HZ9BvN9C_VQvKTyBLN0x6ZygwVqBZN11F7OnNbDI,5985
6
6
  luxorasap/datareader/__init__.py,sha256=41RAvbrQ4R6oj67S32CrKqolx0CJ2W8cbOF6g5Cqm2g,120
7
- luxorasap/datareader/core.py,sha256=HBpVKwcNEQx83sDgJ6IJLXQZijoTZ1vFwTyBv_K7GDY,157345
7
+ luxorasap/datareader/core.py,sha256=HnsHyZ3K1u4_iuJEppy9km-_mPR_jv57CRO0niBF0FU,157363
8
8
  luxorasap/ingest/__init__.py,sha256=XhxDTN2ar-u6UCPhnxNU_to-nWiit-SpQ6cA_N9eMSs,795
9
- luxorasap/ingest/cloud/__init__.py,sha256=7yqEgeDxplkqYTzZNB0kfkSQ8PXF-77BXMW2DMYtJbU,2911
9
+ luxorasap/ingest/cloud/__init__.py,sha256=HRKLjxqa-COssAPoFrjfOlE_dkvTDcv5tyDhmqcdYOI,3690
10
10
  luxorasap/ingest/legacy_local/dataloader.py,sha256=DF3CvojDAi0itVDZPsQbmpl5pqMTNwOOpxTz4Ju8mho,12419
11
11
  luxorasap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  luxorasap/utils/dataframe/__init__.py,sha256=heKpmq58FmX35syzzwrHqlOWKYBkH2Z1jyqaQ_Vg-00,265
13
13
  luxorasap/utils/dataframe/reader.py,sha256=Vzjdw-AeS1lnWEHQ8RZNh0kK93NWTp0NWVi_B6mN5N0,616
14
14
  luxorasap/utils/dataframe/transforms.py,sha256=OIvlTTcjFX6bUhuQp_syEp7ssm4sLzwvgsag6n2Wl3k,2438
15
- luxorasap/utils/storage/__init__.py,sha256=a-c_Pb7cv2RN10rcg8s19SRMHJbuJnW5M8qw1G-ic9w,105
16
- luxorasap/utils/storage/blob.py,sha256=96uHWqTMqC9kfdGn0hATUmKpcUePzNsvXD5NsApcjWU,4413
15
+ luxorasap/utils/storage/__init__.py,sha256=461GYJcPMXGjHuJ9y9D3BHOC_oUS9Re32nVu1AwKyIA,334
16
+ luxorasap/utils/storage/blob.py,sha256=vgCKMOiVgP-V1A2xZRhG3kJhPFU-LA9E9kddOQTxYD8,9443
17
+ luxorasap/utils/storage/change_tracker.py,sha256=5URYI18mymcVfUyyb9zi8NeNDVrdYgdOAs8L58GwL5Q,11706
17
18
  luxorasap/utils/tools/__init__.py,sha256=dvK7Z4xnNQAuEiObVN7qjeLWAvP49JeFn2Oq9GdgmXs,76
18
19
  luxorasap/utils/tools/excel.py,sha256=SfeTcbJWsWq3uKruwKSjJ4aWgMovITzlNXjP2bhdMjI,1246
19
- luxorasap-0.1.38.dist-info/METADATA,sha256=yrcIMIEjE3vRrCAz2wgDlQ8ZOGGd9WvaAltYJ0jvA0A,3804
20
- luxorasap-0.1.38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- luxorasap-0.1.38.dist-info/entry_points.txt,sha256=XFh-dOwUhlya9DmGvgookMI0ezyUJjcOvTIHDEYS44g,52
22
- luxorasap-0.1.38.dist-info/top_level.txt,sha256=9YOL6bUIpzY06XFBRkUW1e4rgB32Ds91fQPGwUEjxzU,10
23
- luxorasap-0.1.38.dist-info/RECORD,,
20
+ luxorasap-0.2.0.dist-info/METADATA,sha256=2zENi1Kh37o6j3noZijo4RJ7Dg71p0yMIlR_ZSLHqBA,3803
21
+ luxorasap-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
+ luxorasap-0.2.0.dist-info/entry_points.txt,sha256=XFh-dOwUhlya9DmGvgookMI0ezyUJjcOvTIHDEYS44g,52
23
+ luxorasap-0.2.0.dist-info/top_level.txt,sha256=9YOL6bUIpzY06XFBRkUW1e4rgB32Ds91fQPGwUEjxzU,10
24
+ luxorasap-0.2.0.dist-info/RECORD,,