luxorasap 0.1.17__py3-none-any.whl → 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
luxorasap/__init__.py CHANGED
@@ -13,7 +13,7 @@ from types import ModuleType
13
13
  try:
14
14
  __version__: str = metadata.version(__name__)
15
15
  except metadata.PackageNotFoundError: # editable install
16
- __version__ = "0.1.17"
16
+ __version__ = "0.1.18"
17
17
 
18
18
  # ─── Lazy loader ─────────────────────────────────────────────────
19
19
  def __getattr__(name: str) -> ModuleType:
@@ -7,6 +7,7 @@ from luxorasap.utils.storage import BlobParquetClient
7
7
  from luxorasap.utils.dataframe import prep_for_save
8
8
  from luxorasap.datareader import LuxorQuery
9
9
 
10
+
10
11
  __all__ = ["save_table", "incremental_load"]
11
12
 
12
13
  _client = BlobParquetClient() # instância única para o módulo
@@ -21,8 +22,7 @@ def save_table(
21
22
  index_name: str = "index",
22
23
  normalize_columns: bool = True,
23
24
  directory: str = "enriched/parquet",
24
- override=False,
25
- large_df: bool = False
25
+ override=False
26
26
  ):
27
27
  """Salva DataFrame como Parquet em ADLS (sobrescrevendo)."""
28
28
 
@@ -35,7 +35,10 @@ def save_table(
35
35
  return
36
36
 
37
37
  df = prep_for_save(df, index=index, index_name=index_name, normalize=normalize_columns)
38
- _client.write_df(df.astype(str), f"{directory}/{table_name}.parquet", large_df=large_df)
38
+
39
+ #_client.write_df(df.astype(str), f"{directory}/{table_name}.parquet")
40
+ _client.write_df(df, f"{directory}/{table_name}.parquet")
41
+
39
42
 
40
43
 
41
44
  def incremental_load(
@@ -47,8 +50,7 @@ def incremental_load(
47
50
  index: bool = False,
48
51
  index_name: str = "index",
49
52
  normalize_columns: bool = True,
50
- directory: str = "enriched/parquet",
51
- large_df: bool = False
53
+ directory: str = "enriched/parquet"
52
54
  ):
53
55
  """Concatena novos dados aos existentes, cortando duplicados pela data."""
54
56
  df["Last_Updated"] = dt.datetime.now()
@@ -66,6 +68,5 @@ def incremental_load(
66
68
  index_name=index_name,
67
69
  normalize_columns=normalize_columns,
68
70
  directory=directory,
69
- override=True,
70
- large_df=large_df
71
+ override=True
71
72
  )
@@ -1,4 +1,4 @@
1
- from .transforms import prep_for_save, persist_column_formatting, text_to_lowercase
1
+ from .transforms import prep_for_save, persist_column_formatting, text_to_lowercase_inplace
2
2
  from .reader import read_bytes
3
3
 
4
- __all__ = ["prep_for_save", "persist_column_formatting", "text_to_lowercase", "read_bytes"]
4
+ __all__ = ["prep_for_save", "persist_column_formatting", "text_to_lowercase_inplace", "read_bytes"]
@@ -1,42 +1,48 @@
1
1
  import pandas as pd
2
+ from pandas.api.types import is_object_dtype, is_string_dtype
2
3
 
3
- def text_to_lowercase(t: pd.DataFrame) -> pd.DataFrame:
4
+
5
+ def text_to_lowercase_inplace(df: pd.DataFrame, cols: list[str]) -> None:
4
6
  """
5
- Converte todas as colunas de texto para lowercase
6
- Args:
7
- t (pd.DataFrame): pandas DataFrame
8
- Returns:
9
- pd.DataFrame
7
+ Converte para lower+strip apenas as células que são str.
8
+ Não tenta aplicar `.str` se a coluna (ou célula) não for string.
9
+ Opera in-place; não devolve nada.
10
10
  """
11
+ for col in cols:
12
+ # Precisa ser coluna potencialmente textual
13
+ if not (is_object_dtype(df[col]) or is_string_dtype(df[col])):
14
+ continue
11
15
 
12
- return t.map(lambda x: x.lower().strip() if isinstance(x, str) else x)
16
+ # Cria máscara com valores realmente str (ignora NaN, ints, decimals…)
17
+ mask = df[col].apply(lambda x: isinstance(x, str))
13
18
 
19
+ if mask.any(): # só se houver algo a tratar
20
+ df.loc[mask, col] = (
21
+ df.loc[mask, col]
22
+ .str.lower()
23
+ .str.strip()
24
+ )
14
25
 
15
- def persist_column_formatting(t: pd.DataFrame, columns_to_persist_override : set = {}) -> pd.DataFrame:
16
- """
17
- Persiste a formatacao de algumas colunas, e transforma o resto em lowercase
18
- Args:
19
- t (pd.DataFrame): pandas DataFrame
20
- Returns:
21
- pd.DataFrame
22
- """
23
26
 
24
- columns_to_persist = {"Name", "Class", "Vehicles", "Segment"}
25
- columns_to_persist = columns_to_persist.union(columns_to_persist_override)
26
-
27
- if len(set(t.columns).intersection(columns_to_persist)) > 0:
28
- # Vamos persistir a formatacao de algumas colunas
29
- columns_order = list(t.columns)
30
- columns_to_persist = list(set(t.columns).intersection(columns_to_persist))
31
- persistent_data = t[columns_to_persist].copy()
32
-
33
- columns_to_normalize = list(set(columns_order) - set(columns_to_persist))
34
- t = text_to_lowercase(t[columns_to_normalize])
35
- t.loc[:,columns_to_persist] = persistent_data
36
- return t[columns_order]
37
-
38
- # Nos outros casos, transformaremos tudo em lowercase
39
- return text_to_lowercase(t)
27
+ def persist_column_formatting(df: pd.DataFrame,
28
+ columns_to_persist_override: set | None = None) -> pd.DataFrame:
29
+ if columns_to_persist_override is None:
30
+ columns_to_persist_override = set()
31
+
32
+ cols_keep_case = {
33
+ "Name", "Class", "Vehicles", "Segment"
34
+ }.union(columns_to_persist_override)
35
+
36
+ # colunas objeto/string candidatas
37
+ candidate_cols = [
38
+ c for c in df.columns
39
+ if c not in cols_keep_case and
40
+ (df[c].dtype == "object" or pd.api.types.is_string_dtype(df[c]))
41
+ ]
42
+
43
+ text_to_lowercase_inplace(df, candidate_cols)
44
+
45
+ return df # mesma referência; alterações foram in-place
40
46
 
41
47
 
42
48
  def prep_for_save(
@@ -4,7 +4,7 @@ from datetime import timezone
4
4
  import pandas as pd
5
5
  import pyarrow as pa, pyarrow.parquet as pq
6
6
  from azure.storage.blob import BlobServiceClient
7
- import tempfile
7
+ import io
8
8
 
9
9
  from ..dataframe import read_bytes
10
10
 
@@ -34,29 +34,14 @@ class BlobParquetClient:
34
34
  return None, False
35
35
 
36
36
 
37
- def write_df(self, df, blob_path: str, large_df: bool = False):
38
- if not large_df:
39
- table = pa.Table.from_pandas(df)
40
- buf = io.BytesIO()
41
- pq.write_table(table, buf)
42
- buf.seek(0)
43
- self._blob(blob_path).upload_blob(buf, overwrite=True)
37
+ def write_df(self, df, blob_path: str):
44
38
 
45
- else:
46
- with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as f:
47
- writer = None
48
- chunk_size = 100_000
49
- for i in range(0, len(df), chunk_size):
50
- chunk = pa.Table.from_pandas(df.iloc[i:i+chunk_size])
51
- if writer is None:
52
- writer = pq.ParquetWriter(f.name, chunk.schema)
53
- writer.write_table(chunk)
54
- writer.close()
55
-
56
- with open(f.name, "rb") as f_read:
57
- self._blob(blob_path).upload_blob(f_read, overwrite=True)
58
-
59
- os.remove(f.name)
39
+ blob = self._blob(blob_path)
40
+ table = pa.Table.from_pandas(df, preserve_index=False)
41
+ buf = io.BytesIO()
42
+ pq.write_table(table, buf)
43
+ buf.seek(0)
44
+ blob.upload_blob(buf, overwrite=True)
60
45
 
61
46
 
62
47
  def get_df_update_time(self, blob_path: str) -> float:
@@ -102,12 +87,10 @@ class BlobParquetClient:
102
87
  Checa se uma tabela existe no blob storage.
103
88
  """
104
89
  return self.exists_df(table_path)
105
-
106
-
107
-
108
90
 
109
91
 
110
92
  # ---------- interno --------------
111
93
  def _blob(self, path: str):
112
94
  path = str(PurePosixPath(path))
113
- return self._svc.get_blob_client(self._container, path)
95
+ return self._svc.get_blob_client(self._container, path)
96
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: luxorasap
3
- Version: 0.1.17
3
+ Version: 0.1.18
4
4
  Summary: Toolbox da Luxor para ingestão, análise e automação de dados financeiros.
5
5
  Author-email: Luxor Group <backoffice@luxor.com.br>
6
6
  License: Proprietary – All rights reserved
@@ -1,4 +1,4 @@
1
- luxorasap/__init__.py,sha256=aPCQIYh22iaw5xRNSlLfrz65PJNvNMc4NXPrDfeLITs,1356
1
+ luxorasap/__init__.py,sha256=K9mc2r4AggUR7HX2DzwrsIQ2O68lH_nPqSOY3NZo0u4,1356
2
2
  luxorasap/btgapi/__init__.py,sha256=QUlfb5oiBY6K1Q5x4-a-x2wECe1At5wc2962I5odOJk,620
3
3
  luxorasap/btgapi/auth.py,sha256=PvyCtbEyBO2B1CIeAlNXWugKW1OgiKfPcVzS6K5FBnQ,1872
4
4
  luxorasap/btgapi/reports.py,sha256=ZVEMLoJPXc0r3XjPJPMsKQN0zZd1Npd7umNpAj1bncs,8040
@@ -6,16 +6,16 @@ luxorasap/btgapi/trades.py,sha256=956HZ9BvN9C_VQvKTyBLN0x6ZygwVqBZN11F7OnNbDI,59
6
6
  luxorasap/datareader/__init__.py,sha256=41RAvbrQ4R6oj67S32CrKqolx0CJ2W8cbOF6g5Cqm2g,120
7
7
  luxorasap/datareader/core.py,sha256=P8AjtRFRRmUrqjbjfKRb0wTLW2eHcUva8iWid4uh4PE,155123
8
8
  luxorasap/ingest/__init__.py,sha256=XhxDTN2ar-u6UCPhnxNU_to-nWiit-SpQ6cA_N9eMSs,795
9
- luxorasap/ingest/cloud/__init__.py,sha256=P4GSvfC4JFMbTSpevhfvbfyn-zqiBMIMWhc_U0mQCFc,2153
9
+ luxorasap/ingest/cloud/__init__.py,sha256=CT1lTyr5_kxy9BwWWO0QjxBYzFHNJRcAK8eiGPJdmwM,2121
10
10
  luxorasap/ingest/legacy_local/dataloader.py,sha256=zKPhuiBSFwkuWN6d8g2s60KkbVk1R_1cGMCtQM9j-0c,11908
11
11
  luxorasap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- luxorasap/utils/dataframe/__init__.py,sha256=dU_RwTTOi6F3mlhM-0MYWM_qexBN9BmmKc_yrDE1Lwc,207
12
+ luxorasap/utils/dataframe/__init__.py,sha256=wRY4e0rNOp4JlzIxqR_tjwPW8leFMCm7SifzuxLwY0o,223
13
13
  luxorasap/utils/dataframe/reader.py,sha256=Vzjdw-AeS1lnWEHQ8RZNh0kK93NWTp0NWVi_B6mN5N0,616
14
- luxorasap/utils/dataframe/transforms.py,sha256=Bm_cv9L9923QIXH82Fa_M4pM94f2AJRPu62Vv_i7tto,1684
14
+ luxorasap/utils/dataframe/transforms.py,sha256=bqGhMOB-fnJl_SrTT9JTg7eC8oDsqKlKWuO6rnRtGeA,1857
15
15
  luxorasap/utils/storage/__init__.py,sha256=U3XRq94yzRp3kgBSUcRzs2tQgJ4o8h8a1ZzwiscA5XM,67
16
- luxorasap/utils/storage/blob.py,sha256=MVGOXnZR62jJPNWIM8sH_-DF-WuxMFVlpmi1llfos78,3949
17
- luxorasap-0.1.17.dist-info/METADATA,sha256=aqn2ND5T-_1YUIx1HuRWAxStin3ikanLYS0e9dTJ2F4,3804
18
- luxorasap-0.1.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
- luxorasap-0.1.17.dist-info/entry_points.txt,sha256=XFh-dOwUhlya9DmGvgookMI0ezyUJjcOvTIHDEYS44g,52
20
- luxorasap-0.1.17.dist-info/top_level.txt,sha256=9YOL6bUIpzY06XFBRkUW1e4rgB32Ds91fQPGwUEjxzU,10
21
- luxorasap-0.1.17.dist-info/RECORD,,
16
+ luxorasap/utils/storage/blob.py,sha256=hy18amzBqJtJqKGTaO74tHuy4_7FfvyN83yIC_Dnz7g,3206
17
+ luxorasap-0.1.18.dist-info/METADATA,sha256=c2ioyv688f7TbbRGkh3hJXFNn18JqrYKzOc8FsEKF0c,3804
18
+ luxorasap-0.1.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
+ luxorasap-0.1.18.dist-info/entry_points.txt,sha256=XFh-dOwUhlya9DmGvgookMI0ezyUJjcOvTIHDEYS44g,52
20
+ luxorasap-0.1.18.dist-info/top_level.txt,sha256=9YOL6bUIpzY06XFBRkUW1e4rgB32Ds91fQPGwUEjxzU,10
21
+ luxorasap-0.1.18.dist-info/RECORD,,