luxorasap 0.1.17__tar.gz → 0.1.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {luxorasap-0.1.17 → luxorasap-0.1.18}/PKG-INFO +1 -1
  2. {luxorasap-0.1.17 → luxorasap-0.1.18}/pyproject.toml +2 -2
  3. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/__init__.py +1 -1
  4. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/ingest/cloud/__init__.py +8 -7
  5. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/utils/dataframe/__init__.py +2 -2
  6. luxorasap-0.1.18/src/luxorasap/utils/dataframe/transforms.py +58 -0
  7. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/utils/storage/blob.py +10 -27
  8. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/PKG-INFO +1 -1
  9. {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_ingest_cloud.py +3 -4
  10. luxorasap-0.1.17/src/luxorasap/utils/dataframe/transforms.py +0 -52
  11. {luxorasap-0.1.17 → luxorasap-0.1.18}/README.md +0 -0
  12. {luxorasap-0.1.17 → luxorasap-0.1.18}/setup.cfg +0 -0
  13. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/btgapi/__init__.py +0 -0
  14. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/btgapi/auth.py +0 -0
  15. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/btgapi/reports.py +0 -0
  16. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/btgapi/trades.py +0 -0
  17. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/datareader/__init__.py +0 -0
  18. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/datareader/core.py +0 -0
  19. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/ingest/__init__.py +0 -0
  20. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/ingest/legacy_local/dataloader.py +0 -0
  21. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/utils/__init__.py +0 -0
  22. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/utils/dataframe/reader.py +0 -0
  23. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap/utils/storage/__init__.py +0 -0
  24. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/SOURCES.txt +0 -0
  25. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/dependency_links.txt +0 -0
  26. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/entry_points.txt +0 -0
  27. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/requires.txt +0 -0
  28. {luxorasap-0.1.17 → luxorasap-0.1.18}/src/luxorasap.egg-info/top_level.txt +0 -0
  29. {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_btgapi_auth.py +0 -0
  30. {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_btgapi_reports.py +0 -0
  31. {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_btgapi_trades.py +0 -0
  32. {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_datareader.py +0 -0
  33. {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_ingest_legacy_local.py +0 -0
  34. {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_utils_dataframe.py +0 -0
  35. {luxorasap-0.1.17 → luxorasap-0.1.18}/tests/test_utils_storage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: luxorasap
3
- Version: 0.1.17
3
+ Version: 0.1.18
4
4
  Summary: Toolbox da Luxor para ingestão, análise e automação de dados financeiros.
5
5
  Author-email: Luxor Group <backoffice@luxor.com.br>
6
6
  License: Proprietary – All rights reserved
@@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta"
10
10
  #############################
11
11
  [project]
12
12
  name = "luxorasap"
13
- version = "0.1.17"
13
+ version = "0.1.18"
14
14
  description = "Toolbox da Luxor para ingestão, análise e automação de dados financeiros."
15
15
  readme = "README.md"
16
16
  requires-python = ">=3.9"
@@ -78,7 +78,7 @@ exclude = ["tests*"]
78
78
  # bumpver (sem-ver)
79
79
  #############################
80
80
  [tool.bumpver]
81
- current_version = "0.1.17"
81
+ current_version = "0.1.18"
82
82
  version_pattern = "MAJOR.MINOR.PATCH"
83
83
 
84
84
  # regex explícito – obrigatório no bumpver 2024+
@@ -13,7 +13,7 @@ from types import ModuleType
13
13
  try:
14
14
  __version__: str = metadata.version(__name__)
15
15
  except metadata.PackageNotFoundError: # editable install
16
- __version__ = "0.1.17"
16
+ __version__ = "0.1.18"
17
17
 
18
18
  # ─── Lazy loader ─────────────────────────────────────────────────
19
19
  def __getattr__(name: str) -> ModuleType:
@@ -7,6 +7,7 @@ from luxorasap.utils.storage import BlobParquetClient
7
7
  from luxorasap.utils.dataframe import prep_for_save
8
8
  from luxorasap.datareader import LuxorQuery
9
9
 
10
+
10
11
  __all__ = ["save_table", "incremental_load"]
11
12
 
12
13
  _client = BlobParquetClient() # instância única para o módulo
@@ -21,8 +22,7 @@ def save_table(
21
22
  index_name: str = "index",
22
23
  normalize_columns: bool = True,
23
24
  directory: str = "enriched/parquet",
24
- override=False,
25
- large_df: bool = False
25
+ override=False
26
26
  ):
27
27
  """Salva DataFrame como Parquet em ADLS (sobrescrevendo)."""
28
28
 
@@ -35,7 +35,10 @@ def save_table(
35
35
  return
36
36
 
37
37
  df = prep_for_save(df, index=index, index_name=index_name, normalize=normalize_columns)
38
- _client.write_df(df.astype(str), f"{directory}/{table_name}.parquet", large_df=large_df)
38
+
39
+ #_client.write_df(df.astype(str), f"{directory}/{table_name}.parquet")
40
+ _client.write_df(df, f"{directory}/{table_name}.parquet")
41
+
39
42
 
40
43
 
41
44
  def incremental_load(
@@ -47,8 +50,7 @@ def incremental_load(
47
50
  index: bool = False,
48
51
  index_name: str = "index",
49
52
  normalize_columns: bool = True,
50
- directory: str = "enriched/parquet",
51
- large_df: bool = False
53
+ directory: str = "enriched/parquet"
52
54
  ):
53
55
  """Concatena novos dados aos existentes, cortando duplicados pela data."""
54
56
  df["Last_Updated"] = dt.datetime.now()
@@ -66,6 +68,5 @@ def incremental_load(
66
68
  index_name=index_name,
67
69
  normalize_columns=normalize_columns,
68
70
  directory=directory,
69
- override=True,
70
- large_df=large_df
71
+ override=True
71
72
  )
@@ -1,4 +1,4 @@
1
- from .transforms import prep_for_save, persist_column_formatting, text_to_lowercase
1
+ from .transforms import prep_for_save, persist_column_formatting, text_to_lowercase_inplace
2
2
  from .reader import read_bytes
3
3
 
4
- __all__ = ["prep_for_save", "persist_column_formatting", "text_to_lowercase", "read_bytes"]
4
+ __all__ = ["prep_for_save", "persist_column_formatting", "text_to_lowercase_inplace", "read_bytes"]
@@ -0,0 +1,58 @@
1
+ import pandas as pd
2
+ from pandas.api.types import is_object_dtype, is_string_dtype
3
+
4
+
5
+ def text_to_lowercase_inplace(df: pd.DataFrame, cols: list[str]) -> None:
6
+ """
7
+ Converte para lower+strip apenas as células que são str.
8
+ Não tenta aplicar `.str` se a coluna (ou célula) não for string.
9
+ Opera in-place; não devolve nada.
10
+ """
11
+ for col in cols:
12
+ # Precisa ser coluna potencialmente textual
13
+ if not (is_object_dtype(df[col]) or is_string_dtype(df[col])):
14
+ continue
15
+
16
+ # Cria máscara com valores realmente str (ignora NaN, ints, decimals…)
17
+ mask = df[col].apply(lambda x: isinstance(x, str))
18
+
19
+ if mask.any(): # só se houver algo a tratar
20
+ df.loc[mask, col] = (
21
+ df.loc[mask, col]
22
+ .str.lower()
23
+ .str.strip()
24
+ )
25
+
26
+
27
+ def persist_column_formatting(df: pd.DataFrame,
28
+ columns_to_persist_override: set | None = None) -> pd.DataFrame:
29
+ if columns_to_persist_override is None:
30
+ columns_to_persist_override = set()
31
+
32
+ cols_keep_case = {
33
+ "Name", "Class", "Vehicles", "Segment"
34
+ }.union(columns_to_persist_override)
35
+
36
+ # Só colunas objeto/string candidatas
37
+ candidate_cols = [
38
+ c for c in df.columns
39
+ if c not in cols_keep_case and
40
+ (df[c].dtype == "object" or pd.api.types.is_string_dtype(df[c]))
41
+ ]
42
+
43
+ text_to_lowercase_inplace(df, candidate_cols)
44
+
45
+ return df # mesma referência; alterações foram in-place
46
+
47
+
48
+ def prep_for_save(
49
+ df: pd.DataFrame,
50
+ *,
51
+ index: bool = False,
52
+ index_name: str = "index",
53
+ normalize: bool = False,
54
+ ):
55
+ if index:
56
+ name = df.index.name or index_name
57
+ df = df.reset_index().rename(columns={"index": name})
58
+ return persist_column_formatting(df) if normalize else df
@@ -4,7 +4,7 @@ from datetime import timezone
4
4
  import pandas as pd
5
5
  import pyarrow as pa, pyarrow.parquet as pq
6
6
  from azure.storage.blob import BlobServiceClient
7
- import tempfile
7
+ import io
8
8
 
9
9
  from ..dataframe import read_bytes
10
10
 
@@ -34,29 +34,14 @@ class BlobParquetClient:
34
34
  return None, False
35
35
 
36
36
 
37
- def write_df(self, df, blob_path: str, large_df: bool = False):
38
- if not large_df:
39
- table = pa.Table.from_pandas(df)
40
- buf = io.BytesIO()
41
- pq.write_table(table, buf)
42
- buf.seek(0)
43
- self._blob(blob_path).upload_blob(buf, overwrite=True)
37
+ def write_df(self, df, blob_path: str):
44
38
 
45
- else:
46
- with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as f:
47
- writer = None
48
- chunk_size = 100_000
49
- for i in range(0, len(df), chunk_size):
50
- chunk = pa.Table.from_pandas(df.iloc[i:i+chunk_size])
51
- if writer is None:
52
- writer = pq.ParquetWriter(f.name, chunk.schema)
53
- writer.write_table(chunk)
54
- writer.close()
55
-
56
- with open(f.name, "rb") as f_read:
57
- self._blob(blob_path).upload_blob(f_read, overwrite=True)
58
-
59
- os.remove(f.name)
39
+ blob = self._blob(blob_path)
40
+ table = pa.Table.from_pandas(df, preserve_index=False)
41
+ buf = io.BytesIO()
42
+ pq.write_table(table, buf)
43
+ buf.seek(0)
44
+ blob.upload_blob(buf, overwrite=True)
60
45
 
61
46
 
62
47
  def get_df_update_time(self, blob_path: str) -> float:
@@ -102,12 +87,10 @@ class BlobParquetClient:
102
87
  Checa se uma tabela existe no blob storage.
103
88
  """
104
89
  return self.exists_df(table_path)
105
-
106
-
107
-
108
90
 
109
91
 
110
92
  # ---------- interno --------------
111
93
  def _blob(self, path: str):
112
94
  path = str(PurePosixPath(path))
113
- return self._svc.get_blob_client(self._container, path)
95
+ return self._svc.get_blob_client(self._container, path)
96
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: luxorasap
3
- Version: 0.1.17
3
+ Version: 0.1.18
4
4
  Summary: Toolbox da Luxor para ingestão, análise e automação de dados financeiros.
5
5
  Author-email: Luxor Group <backoffice@luxor.com.br>
6
6
  License: Proprietary – All rights reserved
@@ -7,17 +7,16 @@ import luxorasap.ingest.cloud as cloud
7
7
  def test_save_table_calls_blob_client(fake_blob, monkeypatch):
8
8
  captured = {}
9
9
 
10
- def fake_write(df, path, large_df):
10
+ def fake_write(df, path):
11
11
  captured["df"] = df.copy()
12
12
  captured["path"] = path
13
- captured["large_df"] = large_df
14
13
 
15
14
  monkeypatch.setattr(cloud, "_client", SimpleNamespace(write_df=fake_write))
16
15
 
17
16
  df = pd.DataFrame({"x": [1]})
18
17
  cloud.save_table("t1", df, directory="dir")
19
18
  assert captured["path"] == "dir/t1.parquet"
20
- assert captured["df"].equals(df.astype(str))
19
+ assert captured["df"].equals(df)
21
20
 
22
21
 
23
22
  def test_incremental_load_merges_correctly(fake_blob, monkeypatch):
@@ -30,5 +29,5 @@ def test_incremental_load_merges_correctly(fake_blob, monkeypatch):
30
29
  writes = {}
31
30
  monkeypatch.setattr(cloud, "_client", SimpleNamespace(write_df=lambda df, p: writes.setdefault("df", df)))
32
31
  new = pd.DataFrame({"Date":[dt.date(2024,1,2)], "v":[2]})
33
- cloud.incremental_load(stub_lq, "prices", new, increment_column="Date", large_df=False)
32
+ cloud.incremental_load(stub_lq, "prices", new, increment_column="Date")
34
33
  assert len(writes["df"]) == 2
@@ -1,52 +0,0 @@
1
- import pandas as pd
2
-
3
- def text_to_lowercase(t: pd.DataFrame) -> pd.DataFrame:
4
- """
5
- Converte todas as colunas de texto para lowercase
6
- Args:
7
- t (pd.DataFrame): pandas DataFrame
8
- Returns:
9
- pd.DataFrame
10
- """
11
-
12
- return t.map(lambda x: x.lower().strip() if isinstance(x, str) else x)
13
-
14
-
15
- def persist_column_formatting(t: pd.DataFrame, columns_to_persist_override : set = {}) -> pd.DataFrame:
16
- """
17
- Persiste a formatacao de algumas colunas, e transforma o resto em lowercase
18
- Args:
19
- t (pd.DataFrame): pandas DataFrame
20
- Returns:
21
- pd.DataFrame
22
- """
23
-
24
- columns_to_persist = {"Name", "Class", "Vehicles", "Segment"}
25
- columns_to_persist = columns_to_persist.union(columns_to_persist_override)
26
-
27
- if len(set(t.columns).intersection(columns_to_persist)) > 0:
28
- # Vamos persistir a formatacao de algumas colunas
29
- columns_order = list(t.columns)
30
- columns_to_persist = list(set(t.columns).intersection(columns_to_persist))
31
- persistent_data = t[columns_to_persist].copy()
32
-
33
- columns_to_normalize = list(set(columns_order) - set(columns_to_persist))
34
- t = text_to_lowercase(t[columns_to_normalize])
35
- t.loc[:,columns_to_persist] = persistent_data
36
- return t[columns_order]
37
-
38
- # Nos outros casos, transformaremos tudo em lowercase
39
- return text_to_lowercase(t)
40
-
41
-
42
- def prep_for_save(
43
- df: pd.DataFrame,
44
- *,
45
- index: bool = False,
46
- index_name: str = "index",
47
- normalize: bool = False,
48
- ):
49
- if index:
50
- name = df.index.name or index_name
51
- df = df.reset_index().rename(columns={"index": name})
52
- return persist_column_formatting(df) if normalize else df
File without changes
File without changes