luxorasap 0.1.17__tar.gz → 0.1.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {luxorasap-0.1.17 → luxorasap-0.1.18}/PKG-INFO +1 -1
- {luxorasap-0.1.17 → luxorasap-0.1.18}/pyproject.toml +2 -2
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/__init__.py +1 -1
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/ingest/cloud/__init__.py +8 -7
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/utils/dataframe/__init__.py +2 -2
- luxorasap-0.1.18/src/luxorasap/utils/dataframe/transforms.py +58 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/utils/storage/blob.py +10 -27
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/PKG-INFO +1 -1
- {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_ingest_cloud.py +3 -4
- luxorasap-0.1.17/src/luxorasap/utils/dataframe/transforms.py +0 -52
- {luxorasap-0.1.17 → luxorasap-0.1.18}/README.md +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/setup.cfg +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/btgapi/__init__.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/btgapi/auth.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/btgapi/reports.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/btgapi/trades.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/datareader/__init__.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/datareader/core.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/ingest/__init__.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/ingest/legacy_local/dataloader.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/utils/__init__.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/utils/dataframe/reader.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/utils/storage/__init__.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/SOURCES.txt +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/dependency_links.txt +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/entry_points.txt +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/requires.txt +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/top_level.txt +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_btgapi_auth.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_btgapi_reports.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_btgapi_trades.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_datareader.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_ingest_legacy_local.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_utils_dataframe.py +0 -0
- {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_utils_storage.py +0 -0
|
@@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta"
|
|
|
10
10
|
#############################
|
|
11
11
|
[project]
|
|
12
12
|
name = "luxorasap"
|
|
13
|
-
version = "0.1.
|
|
13
|
+
version = "0.1.18"
|
|
14
14
|
description = "Toolbox da Luxor para ingestão, análise e automação de dados financeiros."
|
|
15
15
|
readme = "README.md"
|
|
16
16
|
requires-python = ">=3.9"
|
|
@@ -78,7 +78,7 @@ exclude = ["tests*"]
|
|
|
78
78
|
# bumpver (sem-ver)
|
|
79
79
|
#############################
|
|
80
80
|
[tool.bumpver]
|
|
81
|
-
current_version = "0.1.
|
|
81
|
+
current_version = "0.1.18"
|
|
82
82
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
83
83
|
|
|
84
84
|
# regex explícito – obrigatório no bumpver 2024+
|
|
@@ -13,7 +13,7 @@ from types import ModuleType
|
|
|
13
13
|
try:
|
|
14
14
|
__version__: str = metadata.version(__name__)
|
|
15
15
|
except metadata.PackageNotFoundError: # editable install
|
|
16
|
-
__version__ = "0.1.
|
|
16
|
+
__version__ = "0.1.18"
|
|
17
17
|
|
|
18
18
|
# ─── Lazy loader ─────────────────────────────────────────────────
|
|
19
19
|
def __getattr__(name: str) -> ModuleType:
|
|
@@ -7,6 +7,7 @@ from luxorasap.utils.storage import BlobParquetClient
|
|
|
7
7
|
from luxorasap.utils.dataframe import prep_for_save
|
|
8
8
|
from luxorasap.datareader import LuxorQuery
|
|
9
9
|
|
|
10
|
+
|
|
10
11
|
__all__ = ["save_table", "incremental_load"]
|
|
11
12
|
|
|
12
13
|
_client = BlobParquetClient() # instância única para o módulo
|
|
@@ -21,8 +22,7 @@ def save_table(
|
|
|
21
22
|
index_name: str = "index",
|
|
22
23
|
normalize_columns: bool = True,
|
|
23
24
|
directory: str = "enriched/parquet",
|
|
24
|
-
override=False
|
|
25
|
-
large_df: bool = False
|
|
25
|
+
override=False
|
|
26
26
|
):
|
|
27
27
|
"""Salva DataFrame como Parquet em ADLS (sobrescrevendo)."""
|
|
28
28
|
|
|
@@ -35,7 +35,10 @@ def save_table(
|
|
|
35
35
|
return
|
|
36
36
|
|
|
37
37
|
df = prep_for_save(df, index=index, index_name=index_name, normalize=normalize_columns)
|
|
38
|
-
|
|
38
|
+
|
|
39
|
+
#_client.write_df(df.astype(str), f"{directory}/{table_name}.parquet")
|
|
40
|
+
_client.write_df(df, f"{directory}/{table_name}.parquet")
|
|
41
|
+
|
|
39
42
|
|
|
40
43
|
|
|
41
44
|
def incremental_load(
|
|
@@ -47,8 +50,7 @@ def incremental_load(
|
|
|
47
50
|
index: bool = False,
|
|
48
51
|
index_name: str = "index",
|
|
49
52
|
normalize_columns: bool = True,
|
|
50
|
-
directory: str = "enriched/parquet"
|
|
51
|
-
large_df: bool = False
|
|
53
|
+
directory: str = "enriched/parquet"
|
|
52
54
|
):
|
|
53
55
|
"""Concatena novos dados aos existentes, cortando duplicados pela data."""
|
|
54
56
|
df["Last_Updated"] = dt.datetime.now()
|
|
@@ -66,6 +68,5 @@ def incremental_load(
|
|
|
66
68
|
index_name=index_name,
|
|
67
69
|
normalize_columns=normalize_columns,
|
|
68
70
|
directory=directory,
|
|
69
|
-
override=True
|
|
70
|
-
large_df=large_df
|
|
71
|
+
override=True
|
|
71
72
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .transforms import prep_for_save, persist_column_formatting,
|
|
1
|
+
from .transforms import prep_for_save, persist_column_formatting, text_to_lowercase_inplace
|
|
2
2
|
from .reader import read_bytes
|
|
3
3
|
|
|
4
|
-
__all__ = ["prep_for_save", "persist_column_formatting", "
|
|
4
|
+
__all__ = ["prep_for_save", "persist_column_formatting", "text_to_lowercase_inplace", "read_bytes"]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from pandas.api.types import is_object_dtype, is_string_dtype
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def text_to_lowercase_inplace(df: pd.DataFrame, cols: list[str]) -> None:
|
|
6
|
+
"""
|
|
7
|
+
Converte para lower+strip apenas as células que são str.
|
|
8
|
+
Não tenta aplicar `.str` se a coluna (ou célula) não for string.
|
|
9
|
+
Opera in-place; não devolve nada.
|
|
10
|
+
"""
|
|
11
|
+
for col in cols:
|
|
12
|
+
# Precisa ser coluna potencialmente textual
|
|
13
|
+
if not (is_object_dtype(df[col]) or is_string_dtype(df[col])):
|
|
14
|
+
continue
|
|
15
|
+
|
|
16
|
+
# Cria máscara com valores realmente str (ignora NaN, ints, decimals…)
|
|
17
|
+
mask = df[col].apply(lambda x: isinstance(x, str))
|
|
18
|
+
|
|
19
|
+
if mask.any(): # só se houver algo a tratar
|
|
20
|
+
df.loc[mask, col] = (
|
|
21
|
+
df.loc[mask, col]
|
|
22
|
+
.str.lower()
|
|
23
|
+
.str.strip()
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def persist_column_formatting(df: pd.DataFrame,
|
|
28
|
+
columns_to_persist_override: set | None = None) -> pd.DataFrame:
|
|
29
|
+
if columns_to_persist_override is None:
|
|
30
|
+
columns_to_persist_override = set()
|
|
31
|
+
|
|
32
|
+
cols_keep_case = {
|
|
33
|
+
"Name", "Class", "Vehicles", "Segment"
|
|
34
|
+
}.union(columns_to_persist_override)
|
|
35
|
+
|
|
36
|
+
# Só colunas objeto/string candidatas
|
|
37
|
+
candidate_cols = [
|
|
38
|
+
c for c in df.columns
|
|
39
|
+
if c not in cols_keep_case and
|
|
40
|
+
(df[c].dtype == "object" or pd.api.types.is_string_dtype(df[c]))
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
text_to_lowercase_inplace(df, candidate_cols)
|
|
44
|
+
|
|
45
|
+
return df # mesma referência; alterações foram in-place
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def prep_for_save(
|
|
49
|
+
df: pd.DataFrame,
|
|
50
|
+
*,
|
|
51
|
+
index: bool = False,
|
|
52
|
+
index_name: str = "index",
|
|
53
|
+
normalize: bool = False,
|
|
54
|
+
):
|
|
55
|
+
if index:
|
|
56
|
+
name = df.index.name or index_name
|
|
57
|
+
df = df.reset_index().rename(columns={"index": name})
|
|
58
|
+
return persist_column_formatting(df) if normalize else df
|
|
@@ -4,7 +4,7 @@ from datetime import timezone
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import pyarrow as pa, pyarrow.parquet as pq
|
|
6
6
|
from azure.storage.blob import BlobServiceClient
|
|
7
|
-
import
|
|
7
|
+
import io
|
|
8
8
|
|
|
9
9
|
from ..dataframe import read_bytes
|
|
10
10
|
|
|
@@ -34,29 +34,14 @@ class BlobParquetClient:
|
|
|
34
34
|
return None, False
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
def write_df(self, df, blob_path: str
|
|
38
|
-
if not large_df:
|
|
39
|
-
table = pa.Table.from_pandas(df)
|
|
40
|
-
buf = io.BytesIO()
|
|
41
|
-
pq.write_table(table, buf)
|
|
42
|
-
buf.seek(0)
|
|
43
|
-
self._blob(blob_path).upload_blob(buf, overwrite=True)
|
|
37
|
+
def write_df(self, df, blob_path: str):
|
|
44
38
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if writer is None:
|
|
52
|
-
writer = pq.ParquetWriter(f.name, chunk.schema)
|
|
53
|
-
writer.write_table(chunk)
|
|
54
|
-
writer.close()
|
|
55
|
-
|
|
56
|
-
with open(f.name, "rb") as f_read:
|
|
57
|
-
self._blob(blob_path).upload_blob(f_read, overwrite=True)
|
|
58
|
-
|
|
59
|
-
os.remove(f.name)
|
|
39
|
+
blob = self._blob(blob_path)
|
|
40
|
+
table = pa.Table.from_pandas(df, preserve_index=False)
|
|
41
|
+
buf = io.BytesIO()
|
|
42
|
+
pq.write_table(table, buf)
|
|
43
|
+
buf.seek(0)
|
|
44
|
+
blob.upload_blob(buf, overwrite=True)
|
|
60
45
|
|
|
61
46
|
|
|
62
47
|
def get_df_update_time(self, blob_path: str) -> float:
|
|
@@ -102,12 +87,10 @@ class BlobParquetClient:
|
|
|
102
87
|
Checa se uma tabela existe no blob storage.
|
|
103
88
|
"""
|
|
104
89
|
return self.exists_df(table_path)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
90
|
|
|
109
91
|
|
|
110
92
|
# ---------- interno --------------
|
|
111
93
|
def _blob(self, path: str):
|
|
112
94
|
path = str(PurePosixPath(path))
|
|
113
|
-
return self._svc.get_blob_client(self._container, path)
|
|
95
|
+
return self._svc.get_blob_client(self._container, path)
|
|
96
|
+
|
|
@@ -7,17 +7,16 @@ import luxorasap.ingest.cloud as cloud
|
|
|
7
7
|
def test_save_table_calls_blob_client(fake_blob, monkeypatch):
|
|
8
8
|
captured = {}
|
|
9
9
|
|
|
10
|
-
def fake_write(df, path
|
|
10
|
+
def fake_write(df, path):
|
|
11
11
|
captured["df"] = df.copy()
|
|
12
12
|
captured["path"] = path
|
|
13
|
-
captured["large_df"] = large_df
|
|
14
13
|
|
|
15
14
|
monkeypatch.setattr(cloud, "_client", SimpleNamespace(write_df=fake_write))
|
|
16
15
|
|
|
17
16
|
df = pd.DataFrame({"x": [1]})
|
|
18
17
|
cloud.save_table("t1", df, directory="dir")
|
|
19
18
|
assert captured["path"] == "dir/t1.parquet"
|
|
20
|
-
assert captured["df"].equals(df
|
|
19
|
+
assert captured["df"].equals(df)
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
def test_incremental_load_merges_correctly(fake_blob, monkeypatch):
|
|
@@ -30,5 +29,5 @@ def test_incremental_load_merges_correctly(fake_blob, monkeypatch):
|
|
|
30
29
|
writes = {}
|
|
31
30
|
monkeypatch.setattr(cloud, "_client", SimpleNamespace(write_df=lambda df, p: writes.setdefault("df", df)))
|
|
32
31
|
new = pd.DataFrame({"Date":[dt.date(2024,1,2)], "v":[2]})
|
|
33
|
-
cloud.incremental_load(stub_lq, "prices", new, increment_column="Date"
|
|
32
|
+
cloud.incremental_load(stub_lq, "prices", new, increment_column="Date")
|
|
34
33
|
assert len(writes["df"]) == 2
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
|
|
3
|
-
def text_to_lowercase(t: pd.DataFrame) -> pd.DataFrame:
|
|
4
|
-
"""
|
|
5
|
-
Converte todas as colunas de texto para lowercase
|
|
6
|
-
Args:
|
|
7
|
-
t (pd.DataFrame): pandas DataFrame
|
|
8
|
-
Returns:
|
|
9
|
-
pd.DataFrame
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
return t.map(lambda x: x.lower().strip() if isinstance(x, str) else x)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def persist_column_formatting(t: pd.DataFrame, columns_to_persist_override : set = {}) -> pd.DataFrame:
|
|
16
|
-
"""
|
|
17
|
-
Persiste a formatacao de algumas colunas, e transforma o resto em lowercase
|
|
18
|
-
Args:
|
|
19
|
-
t (pd.DataFrame): pandas DataFrame
|
|
20
|
-
Returns:
|
|
21
|
-
pd.DataFrame
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
columns_to_persist = {"Name", "Class", "Vehicles", "Segment"}
|
|
25
|
-
columns_to_persist = columns_to_persist.union(columns_to_persist_override)
|
|
26
|
-
|
|
27
|
-
if len(set(t.columns).intersection(columns_to_persist)) > 0:
|
|
28
|
-
# Vamos persistir a formatacao de algumas colunas
|
|
29
|
-
columns_order = list(t.columns)
|
|
30
|
-
columns_to_persist = list(set(t.columns).intersection(columns_to_persist))
|
|
31
|
-
persistent_data = t[columns_to_persist].copy()
|
|
32
|
-
|
|
33
|
-
columns_to_normalize = list(set(columns_order) - set(columns_to_persist))
|
|
34
|
-
t = text_to_lowercase(t[columns_to_normalize])
|
|
35
|
-
t.loc[:,columns_to_persist] = persistent_data
|
|
36
|
-
return t[columns_order]
|
|
37
|
-
|
|
38
|
-
# Nos outros casos, transformaremos tudo em lowercase
|
|
39
|
-
return text_to_lowercase(t)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def prep_for_save(
|
|
43
|
-
df: pd.DataFrame,
|
|
44
|
-
*,
|
|
45
|
-
index: bool = False,
|
|
46
|
-
index_name: str = "index",
|
|
47
|
-
normalize: bool = False,
|
|
48
|
-
):
|
|
49
|
-
if index:
|
|
50
|
-
name = df.index.name or index_name
|
|
51
|
-
df = df.reset_index().rename(columns={"index": name})
|
|
52
|
-
return persist_column_formatting(df) if normalize else df
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|