luxorasap 0.1.38__tar.gz → 0.1.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {luxorasap-0.1.38 → luxorasap-0.1.39}/PKG-INFO +1 -1
- {luxorasap-0.1.38 → luxorasap-0.1.39}/pyproject.toml +2 -2
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/__init__.py +1 -1
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/datareader/core.py +9 -10
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/ingest/cloud/__init__.py +26 -5
- luxorasap-0.1.39/src/luxorasap/utils/storage/__init__.py +2 -0
- luxorasap-0.1.39/src/luxorasap/utils/storage/blob.py +263 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap.egg-info/PKG-INFO +1 -1
- luxorasap-0.1.38/src/luxorasap/utils/storage/__init__.py +0 -2
- luxorasap-0.1.38/src/luxorasap/utils/storage/blob.py +0 -124
- {luxorasap-0.1.38 → luxorasap-0.1.39}/README.md +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/setup.cfg +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/btgapi/__init__.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/btgapi/auth.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/btgapi/reports.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/btgapi/trades.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/datareader/__init__.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/ingest/__init__.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/ingest/legacy_local/dataloader.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/utils/__init__.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/utils/dataframe/__init__.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/utils/dataframe/reader.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/utils/dataframe/transforms.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/utils/tools/__init__.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap/utils/tools/excel.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap.egg-info/SOURCES.txt +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap.egg-info/dependency_links.txt +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap.egg-info/entry_points.txt +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap.egg-info/requires.txt +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/src/luxorasap.egg-info/top_level.txt +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/tests/test_btgapi_auth.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/tests/test_btgapi_reports.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/tests/test_btgapi_trades.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/tests/test_datareader.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/tests/test_ingest_cloud.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/tests/test_ingest_legacy_local.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/tests/test_utils_dataframe.py +0 -0
- {luxorasap-0.1.38 → luxorasap-0.1.39}/tests/test_utils_storage.py +0 -0
|
@@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta"
|
|
|
10
10
|
#############################
|
|
11
11
|
[project]
|
|
12
12
|
name = "luxorasap"
|
|
13
|
-
version = "0.1.
|
|
13
|
+
version = "0.1.39"
|
|
14
14
|
description = "Toolbox da Luxor para ingestão, análise e automação de dados financeiros."
|
|
15
15
|
readme = "README.md"
|
|
16
16
|
requires-python = ">=3.9"
|
|
@@ -78,7 +78,7 @@ exclude = ["tests*"]
|
|
|
78
78
|
# bumpver (sem-ver)
|
|
79
79
|
#############################
|
|
80
80
|
[tool.bumpver]
|
|
81
|
-
current_version = "0.1.
|
|
81
|
+
current_version = "0.1.39"
|
|
82
82
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
83
83
|
|
|
84
84
|
# regex explícito – obrigatório no bumpver 2024+
|
|
@@ -13,7 +13,7 @@ from types import ModuleType
|
|
|
13
13
|
try:
|
|
14
14
|
__version__: str = metadata.version(__name__)
|
|
15
15
|
except metadata.PackageNotFoundError: # editable install
|
|
16
|
-
__version__ = "0.1.
|
|
16
|
+
__version__ = "0.1.39"
|
|
17
17
|
|
|
18
18
|
# ─── Lazy loader ─────────────────────────────────────────────────
|
|
19
19
|
def __getattr__(name: str) -> ModuleType:
|
|
@@ -24,12 +24,15 @@ load_dotenv()
|
|
|
24
24
|
#@logger.catch
|
|
25
25
|
class LuxorQuery:
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
# Criando construtor com docstring detalhada
|
|
28
|
+
def __init__(self, blob_directory='enriched/parquet', adls_connection_string:str=None,
|
|
29
|
+
container_name="luxorasap"):
|
|
29
30
|
"""
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
Classe para consulta de dados da Luxor.
|
|
32
|
+
Args:
|
|
33
|
+
blob_directory (str, optional): Diretório no blob onde estão as tabelas. Defaults to 'enriched/parquet'.
|
|
34
|
+
adls_connection_string (str, optional): String de conexão com o ADLS. Se None, usa variável de ambiente. Defaults to None.
|
|
35
|
+
container_name (str, optional): Nome do container no blob. Defaults to "luxorasap".
|
|
33
36
|
"""
|
|
34
37
|
|
|
35
38
|
self.blob_client = BlobParquetClient(adls_connection_string=adls_connection_string,
|
|
@@ -38,10 +41,8 @@ class LuxorQuery:
|
|
|
38
41
|
|
|
39
42
|
|
|
40
43
|
self.modified_tables = []
|
|
41
|
-
self.is_develop_mode = is_develop_mode
|
|
42
44
|
|
|
43
|
-
|
|
44
|
-
self.tables_path = tables_path
|
|
45
|
+
|
|
45
46
|
#if tables_path is None:
|
|
46
47
|
# self.tables_path = self.__set_tables_path()
|
|
47
48
|
|
|
@@ -54,8 +55,6 @@ class LuxorQuery:
|
|
|
54
55
|
self.lipi_manga_incorp_date = dt.date(2022,12,9)
|
|
55
56
|
|
|
56
57
|
|
|
57
|
-
self.update_modes_name = {"standard" : 0, "optimized" : 1}
|
|
58
|
-
self.update_mode = self.update_modes_name[update_mode]
|
|
59
58
|
self.update() # Nessa 1° exec. vai inicializar os dicionarios acima
|
|
60
59
|
|
|
61
60
|
|
|
@@ -4,7 +4,7 @@ import pandas as pd
|
|
|
4
4
|
import datetime as dt
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
7
|
-
from luxorasap.utils.storage import BlobParquetClient
|
|
7
|
+
from luxorasap.utils.storage import BlobParquetClient, BlobExcelClient, BlobPickleClient
|
|
8
8
|
from luxorasap.utils.dataframe import prep_for_save, astype_str_inplace
|
|
9
9
|
from luxorasap.datareader import LuxorQuery
|
|
10
10
|
|
|
@@ -12,6 +12,8 @@ from luxorasap.datareader import LuxorQuery
|
|
|
12
12
|
__all__ = ["save_table", "incremental_load"]
|
|
13
13
|
|
|
14
14
|
_client = BlobParquetClient() # instância única para o módulo
|
|
15
|
+
_client_excel = None
|
|
16
|
+
_client_pickle = None
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
# ────────────────────────────────────────────────────────────────
|
|
@@ -23,7 +25,8 @@ def save_table(
|
|
|
23
25
|
index_name: str = "index",
|
|
24
26
|
normalize_columns: bool = True,
|
|
25
27
|
directory: str = "enriched/parquet",
|
|
26
|
-
override=False
|
|
28
|
+
override=False,
|
|
29
|
+
format='parquet'
|
|
27
30
|
):
|
|
28
31
|
"""Salva DataFrame como Parquet em ADLS (sobrescrevendo)."""
|
|
29
32
|
|
|
@@ -43,9 +46,27 @@ def save_table(
|
|
|
43
46
|
|
|
44
47
|
df = prep_for_save(df, index=index, index_name=index_name, normalize=normalize_columns)
|
|
45
48
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
+
if format == 'parquet':
|
|
50
|
+
#_client.write_df(df.astype(str), f"{directory}/{table_name}.parquet")
|
|
51
|
+
astype_str_inplace(df)
|
|
52
|
+
_client.write_df(df, f"{directory}/{table_name}.parquet")
|
|
53
|
+
|
|
54
|
+
elif format == 'excel':
|
|
55
|
+
global _client_excel
|
|
56
|
+
if _client_excel is None:
|
|
57
|
+
_client_excel = BlobExcelClient()
|
|
58
|
+
if index:
|
|
59
|
+
df = df.reset_index().rename(columns={"index": index_name})
|
|
60
|
+
_client_excel.write_excel(df, f"{directory}/{table_name}.xlsx")
|
|
61
|
+
|
|
62
|
+
elif format == 'pickle':
|
|
63
|
+
global _client_pickle
|
|
64
|
+
if _client_pickle is None:
|
|
65
|
+
_client_pickle = BlobPickleClient()
|
|
66
|
+
_client_pickle.write_pickle(df, f"{directory}/{table_name}.pkl")
|
|
67
|
+
|
|
68
|
+
else:
|
|
69
|
+
raise ValueError(f"Formato '{format}' não suportado. Use 'parquet', 'excel' ou 'pickle'.")
|
|
49
70
|
|
|
50
71
|
|
|
51
72
|
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import io, os
|
|
2
|
+
from pathlib import PurePosixPath
|
|
3
|
+
from datetime import timezone
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import pyarrow as pa, pyarrow.parquet as pq
|
|
6
|
+
import pickle
|
|
7
|
+
import re
|
|
8
|
+
|
|
9
|
+
from azure.storage.blob import BlobServiceClient
|
|
10
|
+
from azure.core.exceptions import ResourceNotFoundError
|
|
11
|
+
|
|
12
|
+
from ..dataframe import read_bytes
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BlobParquetClient:
|
|
16
|
+
"""Leitura/gravacao de Parquet em Azure Blob – stateless & reutilizável."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, container: str = "luxorasap", adls_connection_string: str = None):
|
|
19
|
+
if adls_connection_string is None:
|
|
20
|
+
adls_connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
|
|
21
|
+
|
|
22
|
+
if adls_connection_string is None:
|
|
23
|
+
raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
|
|
24
|
+
self._svc = BlobServiceClient.from_connection_string(adls_connection_string)
|
|
25
|
+
self._container = container
|
|
26
|
+
|
|
27
|
+
# ---------- API pública ----------
|
|
28
|
+
def read_df(self, blob_path: str) -> (pd.DataFrame, bool):
|
|
29
|
+
buf = io.BytesIO()
|
|
30
|
+
try:
|
|
31
|
+
self._blob(blob_path).download_blob().readinto(buf)
|
|
32
|
+
return (
|
|
33
|
+
read_bytes(buf.getvalue(), filename=PurePosixPath(blob_path).name),
|
|
34
|
+
True,
|
|
35
|
+
)
|
|
36
|
+
except Exception:
|
|
37
|
+
return None, False
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def write_df(self, df, blob_path: str):
|
|
41
|
+
|
|
42
|
+
blob = self._blob(blob_path)
|
|
43
|
+
table = pa.Table.from_pandas(df)
|
|
44
|
+
buf = io.BytesIO()
|
|
45
|
+
pq.write_table(table, buf)
|
|
46
|
+
buf.seek(0)
|
|
47
|
+
blob.upload_blob(buf, overwrite=True)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_df_update_time(self, blob_path: str) -> float:
|
|
51
|
+
try:
|
|
52
|
+
properties = self._blob(blob_path).get_blob_properties()
|
|
53
|
+
return properties['last_modified'].replace(tzinfo=timezone.utc).timestamp()
|
|
54
|
+
except Exception:
|
|
55
|
+
return 0.0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def exists_df(self, blob_path: str) -> bool:
|
|
59
|
+
try:
|
|
60
|
+
self._blob(blob_path).get_blob_properties()
|
|
61
|
+
return True
|
|
62
|
+
except Exception:
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def list_blob_files(self, blob_path: str, ends_with: str = None) -> list:
|
|
67
|
+
"""
|
|
68
|
+
Lista os arquivos em um diretório do blob storage.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
blob_path (str): O caminho do diretório no blob storage.
|
|
72
|
+
ends_with (str, optional): Filtra os arquivos que terminam com esta string.(Ex.: '.parquet')
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
list: Uma lista de nomes de blob.
|
|
76
|
+
|
|
77
|
+
"""
|
|
78
|
+
try:
|
|
79
|
+
container_client = self._svc.get_container_client(self._container)
|
|
80
|
+
blob_list = container_client.list_blobs(name_starts_with=blob_path)
|
|
81
|
+
if ends_with:
|
|
82
|
+
return [blob.name for blob in blob_list if blob.name.endswith(ends_with)]
|
|
83
|
+
return [blob.name for blob in blob_list]
|
|
84
|
+
except Exception:
|
|
85
|
+
return []
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def table_exists(self, table_path: str) -> bool:
|
|
89
|
+
"""
|
|
90
|
+
Checa se uma tabela existe no blob storage.
|
|
91
|
+
"""
|
|
92
|
+
return self.exists_df(table_path)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ---------- interno --------------
|
|
96
|
+
def _blob(self, path: str):
|
|
97
|
+
path = str(PurePosixPath(path))
|
|
98
|
+
return self._svc.get_blob_client(self._container, path)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class BlobPickleClient:
|
|
102
|
+
def __init__(self, *, adls_connection_string: str = None, container: str = "luxorasap"):
|
|
103
|
+
if adls_connection_string is None:
|
|
104
|
+
adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
|
105
|
+
|
|
106
|
+
if adls_connection_string is None:
|
|
107
|
+
raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
|
|
108
|
+
|
|
109
|
+
self._svc = BlobServiceClient.from_connection_string(adls_connection_string)
|
|
110
|
+
self._container = self._svc.get_container_client(container)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def write_pickle(self, obj, blob_name: str):
|
|
114
|
+
"""Salva objeto Python (ex: DataFrame) como pickle no blob"""
|
|
115
|
+
buf = io.BytesIO()
|
|
116
|
+
pickle.dump(obj, buf)
|
|
117
|
+
buf.seek(0)
|
|
118
|
+
self._container.upload_blob(name=blob_name, data=buf, overwrite=True)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def read_pickle(self, blob_name: str):
|
|
122
|
+
"""Lê pickle do blob e retorna objeto Python"""
|
|
123
|
+
downloader = self._container.download_blob(blob_name)
|
|
124
|
+
buf = io.BytesIO(downloader.readall())
|
|
125
|
+
return pickle.load(buf)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def exists(self, blob_name: str) -> bool:
|
|
129
|
+
return self._container.get_blob_client(blob_name).exists()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class BlobExcelClient:
|
|
134
|
+
def __init__(self, *, adls_connection_string: str = None, container: str = "luxorasap"):
|
|
135
|
+
if adls_connection_string is None:
|
|
136
|
+
adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
|
137
|
+
|
|
138
|
+
if adls_connection_string is None:
|
|
139
|
+
raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
|
|
140
|
+
|
|
141
|
+
self._svc = BlobServiceClient.from_connection_string(adls_connection_string)
|
|
142
|
+
self._container = self._svc.get_container_client(container)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def write_excel(self, df: pd.DataFrame, blob_name: str, **kwargs):
|
|
146
|
+
"""
|
|
147
|
+
Salva um DataFrame como arquivo Excel no blob.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
df (pd.DataFrame): DataFrame a ser salvo
|
|
151
|
+
blob_name (str): caminho/nome do blob (ex: "reports/test.xlsx")
|
|
152
|
+
**kwargs: argumentos extras para `DataFrame.to_excel`
|
|
153
|
+
"""
|
|
154
|
+
buf = io.BytesIO()
|
|
155
|
+
df.to_excel(buf, index=False, engine="openpyxl", **kwargs)
|
|
156
|
+
buf.seek(0)
|
|
157
|
+
self._container.upload_blob(name=blob_name, data=buf, overwrite=True)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def read_excel(self, blob_name: str, **kwargs) -> pd.DataFrame:
|
|
161
|
+
"""
|
|
162
|
+
Lê um arquivo Excel do blob e retorna um DataFrame.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
blob_name (str): caminho/nome do blob (ex: "reports/test.xlsx")
|
|
166
|
+
**kwargs: argumentos extras para `pd.read_excel`
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
pd.DataFrame
|
|
170
|
+
"""
|
|
171
|
+
downloader = self._container.download_blob(blob_name)
|
|
172
|
+
buf = io.BytesIO(downloader.readall())
|
|
173
|
+
return pd.read_excel(buf, engine="openpyxl", **kwargs)
|
|
174
|
+
|
|
175
|
+
def exists(self, blob_name: str) -> bool:
|
|
176
|
+
return self._container.get_blob_client(blob_name).exists()
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def list_blob_files(blob_path: str, container="luxorasap", ends_with: str = None, adls_connection_string: str = None) -> list:
|
|
181
|
+
"""
|
|
182
|
+
Lista os arquivos em um diretório do blob storage.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
blob_path (str): O caminho do diretório no blob storage.
|
|
186
|
+
ends_with (str, optional): Filtra os arquivos que terminam com esta string.(Ex.: '.parquet')
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
list: Uma lista de nomes de blob.
|
|
190
|
+
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
if adls_connection_string is None:
|
|
194
|
+
adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
|
195
|
+
if adls_connection_string is None:
|
|
196
|
+
raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
svc = BlobServiceClient.from_connection_string(adls_connection_string)
|
|
200
|
+
container_client = svc.get_container_client(container)
|
|
201
|
+
blob_list = container_client.list_blobs(name_starts_with=blob_path)
|
|
202
|
+
if ends_with:
|
|
203
|
+
return [blob.name for blob in blob_list if blob.name.endswith(ends_with)]
|
|
204
|
+
return [blob.name for blob in blob_list]
|
|
205
|
+
except Exception:
|
|
206
|
+
return []
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def delete_blob(
|
|
210
|
+
blob_name: str,
|
|
211
|
+
*,
|
|
212
|
+
adls_connection_string: str | None = None,
|
|
213
|
+
container: str = "luxorasap",
|
|
214
|
+
include_snapshots: bool = False,
|
|
215
|
+
) -> None:
|
|
216
|
+
"""
|
|
217
|
+
Exclui com segurança APENAS um arquivo (blob) exato do Azure Blob Storage.
|
|
218
|
+
|
|
219
|
+
Regras de segurança:
|
|
220
|
+
- Recusa nomes que terminem com "/" (prefixos de diretório virtual).
|
|
221
|
+
- Recusa curingas/shell globs (*, ?, []), para evitar exclusões indevidas.
|
|
222
|
+
- Verifica a existência do blob exato antes de remover.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
blob_name: Caminho EXATO do blob (ex.: "enriched/parquet/tabela.parquet").
|
|
226
|
+
adls_connection_string: Se None, lê de AZURE_STORAGE_CONNECTION_STRING.
|
|
227
|
+
container: Nome do container.
|
|
228
|
+
include_snapshots: Se True, apaga snapshots vinculados ao blob.
|
|
229
|
+
|
|
230
|
+
Raises:
|
|
231
|
+
ValueError: Se o nome parecer um diretório/prefixo ou contiver curingas.
|
|
232
|
+
FileNotFoundError: Se o blob exato não existir.
|
|
233
|
+
RuntimeError: Se a conexão com o Azure não estiver configurada.
|
|
234
|
+
"""
|
|
235
|
+
if adls_connection_string is None:
|
|
236
|
+
adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
|
237
|
+
if adls_connection_string is None:
|
|
238
|
+
raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
|
|
239
|
+
|
|
240
|
+
# 1) Bloqueios contra “diretórios” e curingas
|
|
241
|
+
if blob_name.endswith("/"):
|
|
242
|
+
raise ValueError("Nome termina com '/': recusa exclusão de diretórios/prefixos.")
|
|
243
|
+
if re.search(r"[\*\?\[\]]", blob_name):
|
|
244
|
+
raise ValueError("Curingas encontrados no nome do blob. Informe um arquivo exato.")
|
|
245
|
+
|
|
246
|
+
svc = BlobServiceClient.from_connection_string(adls_connection_string)
|
|
247
|
+
container_client = svc.get_container_client(container)
|
|
248
|
+
blob_client = container_client.get_blob_client(blob_name)
|
|
249
|
+
|
|
250
|
+
# 2) Checa existência do blob exato
|
|
251
|
+
try:
|
|
252
|
+
blob_client.get_blob_properties()
|
|
253
|
+
except ResourceNotFoundError:
|
|
254
|
+
raise FileNotFoundError(f"Blob não encontrado: {blob_name}")
|
|
255
|
+
|
|
256
|
+
# 3) Exclui apenas o alvo exato
|
|
257
|
+
delete_kwargs = {}
|
|
258
|
+
if include_snapshots:
|
|
259
|
+
delete_kwargs["delete_snapshots"] = "include"
|
|
260
|
+
|
|
261
|
+
blob_client.delete_blob(**delete_kwargs)
|
|
262
|
+
|
|
263
|
+
|
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
import io, os
|
|
2
|
-
from pathlib import PurePosixPath
|
|
3
|
-
from datetime import timezone
|
|
4
|
-
import pandas as pd
|
|
5
|
-
import pyarrow as pa, pyarrow.parquet as pq
|
|
6
|
-
from azure.storage.blob import BlobServiceClient
|
|
7
|
-
import pickle
|
|
8
|
-
|
|
9
|
-
from ..dataframe import read_bytes
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class BlobParquetClient:
|
|
13
|
-
"""Leitura/gravacao de Parquet em Azure Blob – stateless & reutilizável."""
|
|
14
|
-
|
|
15
|
-
def __init__(self, container: str = "luxorasap", adls_connection_string: str = None):
|
|
16
|
-
if adls_connection_string is None:
|
|
17
|
-
adls_connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
|
|
18
|
-
|
|
19
|
-
if adls_connection_string is None:
|
|
20
|
-
raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
|
|
21
|
-
self._svc = BlobServiceClient.from_connection_string(adls_connection_string)
|
|
22
|
-
self._container = container
|
|
23
|
-
|
|
24
|
-
# ---------- API pública ----------
|
|
25
|
-
def read_df(self, blob_path: str) -> (pd.DataFrame, bool):
|
|
26
|
-
buf = io.BytesIO()
|
|
27
|
-
try:
|
|
28
|
-
self._blob(blob_path).download_blob().readinto(buf)
|
|
29
|
-
return (
|
|
30
|
-
read_bytes(buf.getvalue(), filename=PurePosixPath(blob_path).name),
|
|
31
|
-
True,
|
|
32
|
-
)
|
|
33
|
-
except Exception:
|
|
34
|
-
return None, False
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def write_df(self, df, blob_path: str):
|
|
38
|
-
|
|
39
|
-
blob = self._blob(blob_path)
|
|
40
|
-
table = pa.Table.from_pandas(df)
|
|
41
|
-
buf = io.BytesIO()
|
|
42
|
-
pq.write_table(table, buf)
|
|
43
|
-
buf.seek(0)
|
|
44
|
-
blob.upload_blob(buf, overwrite=True)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def get_df_update_time(self, blob_path: str) -> float:
|
|
48
|
-
try:
|
|
49
|
-
properties = self._blob(blob_path).get_blob_properties()
|
|
50
|
-
return properties['last_modified'].replace(tzinfo=timezone.utc).timestamp()
|
|
51
|
-
except Exception:
|
|
52
|
-
return 0.0
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def exists_df(self, blob_path: str) -> bool:
|
|
56
|
-
try:
|
|
57
|
-
self._blob(blob_path).get_blob_properties()
|
|
58
|
-
return True
|
|
59
|
-
except Exception:
|
|
60
|
-
return False
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def list_blob_files(self, blob_path: str, ends_with: str = None) -> list:
|
|
64
|
-
"""
|
|
65
|
-
Lista os arquivos em um diretório do blob storage.
|
|
66
|
-
|
|
67
|
-
Args:
|
|
68
|
-
blob_path (str): O caminho do diretório no blob storage.
|
|
69
|
-
ends_with (str, optional): Filtra os arquivos que terminam com esta string.(Ex.: '.parquet')
|
|
70
|
-
|
|
71
|
-
Returns:
|
|
72
|
-
list: Uma lista de nomes de blob.
|
|
73
|
-
|
|
74
|
-
"""
|
|
75
|
-
try:
|
|
76
|
-
container_client = self._svc.get_container_client(self._container)
|
|
77
|
-
blob_list = container_client.list_blobs(name_starts_with=blob_path)
|
|
78
|
-
if ends_with:
|
|
79
|
-
return [blob.name for blob in blob_list if blob.name.endswith(ends_with)]
|
|
80
|
-
return [blob.name for blob in blob_list]
|
|
81
|
-
except Exception:
|
|
82
|
-
return []
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def table_exists(self, table_path: str) -> bool:
|
|
86
|
-
"""
|
|
87
|
-
Checa se uma tabela existe no blob storage.
|
|
88
|
-
"""
|
|
89
|
-
return self.exists_df(table_path)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
# ---------- interno --------------
|
|
93
|
-
def _blob(self, path: str):
|
|
94
|
-
path = str(PurePosixPath(path))
|
|
95
|
-
return self._svc.get_blob_client(self._container, path)
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
class BlobPickleClient:
|
|
99
|
-
def __init__(self, *, adls_connection_string: str = None, container: str = "luxorasap"):
|
|
100
|
-
if adls_connection_string is None:
|
|
101
|
-
adls_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
|
102
|
-
|
|
103
|
-
if adls_connection_string is None:
|
|
104
|
-
raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
|
|
105
|
-
|
|
106
|
-
self.blob_service_client = BlobServiceClient.from_connection_string(adls_connection_string)
|
|
107
|
-
self.container_client = self.blob_service_client.get_container_client(container)
|
|
108
|
-
|
|
109
|
-
def write_pickle(self, obj, blob_name: str):
|
|
110
|
-
"""Salva objeto Python (ex: DataFrame) como pickle no blob"""
|
|
111
|
-
buf = io.BytesIO()
|
|
112
|
-
pickle.dump(obj, buf)
|
|
113
|
-
buf.seek(0)
|
|
114
|
-
self.container_client.upload_blob(name=blob_name, data=buf, overwrite=True)
|
|
115
|
-
|
|
116
|
-
def read_pickle(self, blob_name: str):
|
|
117
|
-
"""Lê pickle do blob e retorna objeto Python"""
|
|
118
|
-
downloader = self.container_client.download_blob(blob_name)
|
|
119
|
-
buf = io.BytesIO(downloader.readall())
|
|
120
|
-
return pickle.load(buf)
|
|
121
|
-
|
|
122
|
-
def exists(self, blob_name: str) -> bool:
|
|
123
|
-
return self.container_client.get_blob_client(blob_name).exists()
|
|
124
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|