luxorasap 0.0.2__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
luxorasap/__init__.py CHANGED
@@ -13,7 +13,7 @@ from types import ModuleType
13
13
  try:
14
14
  __version__: str = metadata.version(__name__)
15
15
  except metadata.PackageNotFoundError: # editable install
16
- __version__ = "0.0.2"
16
+ __version__ = "0.1.1"
17
17
 
18
18
  # ─── Lazy loader ─────────────────────────────────────────────────
19
19
  def __getattr__(name: str) -> ModuleType:
@@ -0,0 +1,16 @@
1
+ """Wrapper para as APIs do BTG Pactual."""
2
+
3
+ from .auth import get_access_token, BTGApiError
4
+ from .reports import request_portfolio, await_report_ticket_result, process_zip_to_dfs, request_investors_transactions_report
5
+ from .trades import submit_offshore_equity_trades, await_transaction_ticket_result
6
+
7
+ __all__ = [
8
+ "BTGApiError",
9
+ "get_access_token",
10
+ "request_portfolio",
11
+ "await_report_ticket_result",
12
+ "submit_offshore_equity_trades",
13
+ "await_transaction_ticket_result",
14
+ "process_zip_to_dfs",
15
+ "request_investors_transactions_report"
16
+ ]
@@ -0,0 +1,57 @@
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+ from loguru import logger
5
+
6
+ __all__ = ["BTGApiError", "get_access_token"]
7
+
8
+
9
+ class BTGApiError(Exception):
10
+ """Erro genérico da API do BTG."""
11
+
12
+
13
+ def get_access_token(*, client_id=None, client_secret=None, test_env: bool = True,
14
+ timeout: int = 20) -> str:
15
+ """Obtém JWT válido por ~1 h para autenticação nas APIs BTG.
16
+ Args:
17
+ client_id: ID do cliente (opcional, lê de env var se None).
18
+ client_secret: Segredo do cliente (opcional, lê de env var se None).
19
+ test_env: Ambiente de teste (True) ou produção (False).
20
+ timeout: Timeout da requisição em segundos.
21
+
22
+ Returns:
23
+ Token de acesso.
24
+
25
+ Raises:
26
+ BTGApiError: Se as credenciais não estiverem disponíveis ou a requisição falhar.
27
+ """
28
+
29
+ if not client_id or not client_secret:
30
+ load_dotenv()
31
+ client_id = os.getenv("BTG_CLIENT_ID")
32
+ client_secret = os.getenv("BTG_CLIENT_SECRET")
33
+ if not client_id or not client_secret:
34
+ raise BTGApiError("BTG_CLIENT_ID ou BTG_CLIENT_SECRET não definidos no ambiente")
35
+
36
+ url = (
37
+ "https://funds-uat.btgpactual.com/connect/token"
38
+ if test_env
39
+ else "https://funds.btgpactual.com/connect/token"
40
+ )
41
+
42
+ resp = requests.post(
43
+ url,
44
+ headers={"Content-Type": "application/x-www-form-urlencoded"},
45
+ data={
46
+ "grant_type": "client_credentials",
47
+ "client_id": client_id,
48
+ "client_secret": client_secret,
49
+ },
50
+ timeout=timeout,
51
+ )
52
+
53
+ if resp.ok:
54
+ token = resp.json().get("access_token")
55
+ logger.debug("Token BTG obtido (len=%s)", len(token) if token else "None")
56
+ return token or ""
57
+ raise BTGApiError(f"Falha ao autenticar: HTTP {resp.status_code} – {resp.text}")
@@ -0,0 +1,188 @@
1
+ import datetime as dt
2
+ import io
3
+ import json
4
+ import time
5
+ import zipfile
6
+ from typing import Optional, Dict
7
+ import pandas as pd
8
+ import requests
9
+ from loguru import logger
10
+
11
+ from .auth import BTGApiError
12
+ from luxorasap.utils.dataframe import read_bytes
13
+
14
+ __all__ = [
15
+ "request_portfolio",
16
+ "check_report_ticket",
17
+ "await_report_ticket_result",
18
+ "process_zip_to_dfs",
19
+ "request_investors_transactions_report"
20
+ ]
21
+
22
+ _REPORT_ENDPOINT = "https://funds.btgpactual.com/reports/Portfolio"
23
+ _TICKET_ENDPOINT = "https://funds.btgpactual.com/reports/Ticket"
24
+ _INVESTOR_TX_ENDPOINT = (
25
+ "https://funds.btgpactual.com/reports/RTA/InvestorTransactionsFileReport"
26
+ )
27
+ _REPORT_TYPES = {"excel": 10, "xml5": 81, "pdf": 2}
28
+
29
+
30
+ def request_portfolio(token: str, fund_name: str, start_date: dt.date, end_date: dt.date,
31
+ format: str = "excel") -> str:
32
+ """Envia requisição de carteira; retorna *ticket*.
33
+
34
+ Args:
35
+ token: Token de autenticação.
36
+ fund_name: Nome do fundo.
37
+ start_date: Data de início.
38
+ end_date: Data de fim.
39
+ format: Formato do relatório ("excel", "xml5", "pdf").
40
+
41
+ Returns:
42
+ Ticket da requisição.
43
+ """
44
+ body = {
45
+ "contract": {
46
+ "startDate": f"{start_date}T00:00:00Z",
47
+ "endDate": f"{end_date}T00:00:00Z",
48
+ "typeReport": _REPORT_TYPES[format],
49
+ "fundName": fund_name,
50
+ },
51
+ "pageSize": 100,
52
+ "webhookEndpoint": "string",
53
+ }
54
+ r = requests.post(
55
+ _REPORT_ENDPOINT,
56
+ headers={"X-SecureConnect-Token": token, "Content-Type": "application/json"},
57
+ json=body,
58
+ timeout=30,
59
+ )
60
+ if r.ok:
61
+ return r.json()["ticket"]
62
+ raise BTGApiError(f"Erro ao solicitar relatório: {r.status_code} – {r.text}")
63
+
64
+
65
+ def _download_url(download_url: str) -> bytes:
66
+ r = requests.get(download_url, timeout=60)
67
+ if r.ok:
68
+ return r.content
69
+ raise BTGApiError(f"Falha no download: {r.status_code} – {r.text}")
70
+
71
+
72
+ def check_report_ticket(token: str, ticket: str, *, page: Optional[int] = None) -> bytes:
73
+ """Consulta único ticket; devolve bytes se pronto, lança BTGApiError caso contrário."""
74
+
75
+ params = {"ticketId": ticket}
76
+ if page is not None:
77
+ params["pageNumber"] = str(page)
78
+
79
+ r = requests.get(
80
+ _TICKET_ENDPOINT,
81
+ params=params,
82
+ headers={"X-SecureConnect-Token": token},
83
+ timeout=30,
84
+ )
85
+ # 1. Se resposta é ZIP direto → retornamos conteúdo
86
+ try:
87
+ payload = r.json()
88
+ except json.JSONDecodeError:
89
+ if r.ok:
90
+ return r.content
91
+ raise BTGApiError(f"Resposta inesperada: {r.status_code} – {r.text}")
92
+
93
+ # 2. Caso contrário tenta decodificar JSON
94
+
95
+ result = payload.get("result")
96
+ if result == "Processando":
97
+ raise BTGApiError("Processando")
98
+
99
+ # 3. Quando pronto, result é JSON string com UrlDownload
100
+ if isinstance(result, str):
101
+ try:
102
+ info: Dict[str, str] = json.loads(result)
103
+ url = info["UrlDownload"]
104
+ return _download_url(url)
105
+ except Exception as exc:
106
+ raise BTGApiError(f"Falha ao interpretar result: {exc}") from exc
107
+
108
+ raise BTGApiError("Formato de resposta desconhecido")
109
+
110
+
111
+ def await_report_ticket_result(token: str, ticket: str, *, attempts: int = 10,
112
+ interval: int = 15) -> bytes:
113
+ """Espera até que o relatório esteja pronto e devolve conteúdo binário.
114
+
115
+ Args:
116
+ token: Token de autenticação.
117
+ ticket: Ticket da requisição.
118
+ attempts: Número de tentativas.
119
+ interval: Intervalo entre tentativas em segundos.
120
+
121
+ Returns:
122
+ Conteúdo binário do relatório (arquivo ZIP).
123
+
124
+ Raises:
125
+ BTGApiError: Se o relatório não ficar pronto ou falhar.
126
+ """
127
+ for i in range(attempts):
128
+ try:
129
+ return check_report_ticket(token, ticket)
130
+ except BTGApiError as err:
131
+ if "Processando" in str(err):
132
+ logger.debug("Ticket %s pendente (%d/%d)", ticket, i + 1, attempts)
133
+ time.sleep(interval)
134
+ continue
135
+ raise
136
+ raise BTGApiError("Relatório não ficou pronto no tempo limite")
137
+
138
+
139
+ def process_zip_to_dfs(zip_bytes: bytes) -> dict[str, pd.DataFrame]:
140
+ """Extrai todos os arquivos do ZIP e devolve DataFrames por nome."""
141
+ out: dict[str, pd.DataFrame] = {}
142
+ with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
143
+ for name in zf.namelist():
144
+ if name.endswith("/"):
145
+ continue
146
+ out[name] = read_bytes(zf.read(name), filename=name)
147
+
148
+ return out
149
+
150
+
151
+ def request_investors_transactions_report( token: str, query_date: dt.date, *,
152
+ distributors: list[str] | None = None, fund_names: list[str] | None = None,
153
+ consolidate_by_account: bool = True, page_size: int = 100) -> str:
154
+ """
155
+ Gera um ticket para o relatório de transações de cotistas (RTA).
156
+ Args:
157
+ token: Token de autenticação.
158
+ query_date: Data da consulta.
159
+ distributors: Lista de nomes de distribuidores (opcional).
160
+ fund_names: Lista de nomes de fundos (opcional).
161
+ consolidate_by_account: Consolidar por conta (default True).
162
+ page_size: Tamanho da página (default 100).
163
+
164
+ Retorna *ticket* (string) a ser usado em `await_report_ticket_result`.
165
+ """
166
+ body = {
167
+ "contract": {
168
+ "distributors": distributors or [],
169
+ "queryDate": f"{query_date.isoformat()}T00:00:00Z",
170
+ "accountNumber": "",
171
+ "consolidateByAccount": str(consolidate_by_account).lower(),
172
+ "fundNames": fund_names or [],
173
+ },
174
+ "pageSize": page_size,
175
+ "webhookEndpoint": "string",
176
+ }
177
+
178
+ r = requests.post(
179
+ _INVESTOR_TX_ENDPOINT,
180
+ headers={"X-SecureConnect-Token": token, "Content-Type": "application/json"},
181
+ json=body,
182
+ timeout=30,
183
+ )
184
+ if r.ok:
185
+ return r.json()["ticket"]
186
+ raise BTGApiError(
187
+ f"Erro InvestorTransactionsFileReport: {r.status_code} – {r.text}"
188
+ )
@@ -0,0 +1,181 @@
1
+ import time
2
+ from typing import List, Dict
3
+
4
+ import pandas as pd
5
+ import requests
6
+ from loguru import logger
7
+
8
+ from .auth import BTGApiError
9
+
10
+ __all__ = [
11
+ "submit_offshore_equity_trades",
12
+ "get_submitted_transactions",
13
+ "await_transaction_ticket_result",
14
+ ]
15
+
16
+ _EP_SUBMIT_TEST = "https://funds-uat.btgpactual.com/offshore/TradeOffShore/Equity"
17
+ _EP_SUBMIT_PROD = "https://funds.btgpactual.com/offshore/TradeOffShore/Equity"
18
+ _EP_TICKET_TEST = "https://funds-uat.btgpactual.com/offshore/Ticket"
19
+ _EP_TICKET_PROD = "https://funds.btgpactual.com/offshore/Ticket"
20
+
21
+ _MARKET_IDS = {
22
+ "equity": 20,
23
+ "future": 22,
24
+ "bonds": 24,
25
+ "repo": 28,
26
+ "portfolio_swap": 29,
27
+ "interest_rate_swap": 30,
28
+ "performance_swap": 31,
29
+ "variance_swap": 32,
30
+ "equity_option": 33,
31
+ "future_option": 34,
32
+ "fx_option_vanilla": 35,
33
+ "fx_option_barrier": 36,
34
+ "fx": 25,
35
+ }
36
+
37
+
38
+ def submit_offshore_equity_trades(token: str, trades: list[dict], *, test_env: bool = True) -> str:
39
+ """
40
+ Submete lista de trades de Equity Offshore para a API do BTG.
41
+
42
+ Args:
43
+ token: Token de autenticação.
44
+ trades: Lista de dicionários representando os trades. Cada dict deve ter
45
+ a estrutura esperada pela API. Modelo:
46
+ [{
47
+ "currency": "USD",
48
+ "price": "60.12",
49
+ "productCodeValue": "...",
50
+ "glAccount": "...",
51
+ "primeBroker": "...",
52
+ "side": "Buy",
53
+ "tradeQuantity": "1000",
54
+ "commissionAmount": "12.50",
55
+ "settlementCurrency": "USD",
56
+ "fXRate": "1.0",
57
+ "externalReference": "TRADE-001",
58
+ "counterparty": "...",
59
+ "fundNickname": "my_fund",
60
+ "orderIdentification": "...",
61
+ "book": "some_book",
62
+ "tradeDate": "2025-02-19T16:03:53.596Z"
63
+ }]
64
+
65
+ test_env: Ambiente de teste (True) ou produção (False).
66
+
67
+ Returns:
68
+ Ticket da requisição.
69
+
70
+ Raises:
71
+ BTGApiError: Se a requisição falhar.
72
+
73
+ """
74
+ url = _EP_SUBMIT_TEST if test_env else _EP_SUBMIT_PROD
75
+ r = requests.post(
76
+ url,
77
+ headers={
78
+ "X-SecureConnect-Token": token,
79
+ "Content-Type": "application/json-patch+json",
80
+ },
81
+ json={"results": trades},
82
+ timeout=30,
83
+ )
84
+ if r.status_code in (200, 201):
85
+ ticket = r.json()["ticket"]
86
+ logger.debug("Trades submetidos, ticket %s", ticket)
87
+ return ticket
88
+ raise BTGApiError(f"Falha no submit: {r.status_code} – {r.text}")
89
+
90
+
91
+ def get_submitted_transactions(token: str, *, ticket_id: str = "", start_date: str = "",
92
+ end_date: str = "", market: str = "", test_env: bool = True) -> Dict:
93
+ """Consulta status detalhado de ticket ou filtro de datas/mercado.
94
+ Args:
95
+ token: Token de autenticação.
96
+ ticket_id: ID do ticket (opcional).
97
+ start_date: Data de início (opcional, formato YYYY-MM-DD).
98
+ end_date: Data de fim (opcional, formato YYYY-MM-DD).
99
+ market: Mercado (opcional. Valores válidos: "equity", "future", "bonds", "repo",
100
+ "portfolio_swap", "interest_rate_swap", "performance_swap", "variance_swap",
101
+ "equity_option", "future_option", "fx_option_vanilla", "fx_option_barrier", "fx"]).
102
+ test_env: Ambiente de teste (True) ou produção (False).
103
+
104
+ Returns:
105
+ Dicionário com os dados da resposta.
106
+
107
+ Raises:
108
+ BTGApiError: Se a requisição falhar ou a resposta for inválida.
109
+
110
+ """
111
+
112
+ base_url = _EP_TICKET_TEST if test_env else _EP_TICKET_PROD
113
+
114
+ if ticket_id:
115
+ params = {"Ticket": ticket_id, "Detailed": "true"}
116
+ elif start_date and end_date and market:
117
+ params = {
118
+ "StartDate": start_date,
119
+ "EndDate": end_date,
120
+ "Market": _MARKET_IDS.get(market.lower(), market),
121
+ "Detailed": "true",
122
+ }
123
+ else:
124
+ raise BTGApiError("Forneça ticket_id OU start_date+end_date+market")
125
+
126
+ r = requests.get(
127
+ base_url,
128
+ headers={"X-SecureConnect-Token": token},
129
+ params=params,
130
+ timeout=30,
131
+ )
132
+ try:
133
+ return r.json()
134
+ except Exception as exc:
135
+ raise BTGApiError(f"Resposta inválida: {r.status_code}") from exc
136
+
137
+
138
+ def await_transaction_ticket_result( token: str, ticket_id: str, *, attempts: int = 10,
139
+ interval: int = 30, test_env: bool = True) -> pd.DataFrame:
140
+ """Espera a conclusão do ticket e devolve DataFrame com metadados.
141
+ Args:
142
+ token: Token de autenticação.
143
+ ticket_id: ID do ticket.
144
+ attempts: Número de tentativas.
145
+ interval: Intervalo entre tentativas em segundos.
146
+ test_env: Ambiente de teste (True) ou produção (False).
147
+
148
+ Returns:
149
+ DataFrame com o status detalhado das transações.
150
+
151
+ Raises:
152
+ BTGApiError: Se o ticket não for finalizado ou falhar.
153
+
154
+ """
155
+
156
+ cols = ["Status", "Ticket", "TradeId", "Env", "Msg"]
157
+ results = pd.DataFrame(columns=cols)
158
+
159
+ for i in range(attempts):
160
+ data = get_submitted_transactions(token, ticket_id=ticket_id, test_env=test_env)
161
+ trades_info = data["trades"]
162
+ ticket_status = trades_info[0]["Status"].lower()
163
+
164
+ # ambiente de produção pode ficar em pendente; aguardamos
165
+ if ticket_status == "pendente":
166
+ logger.debug("Ticket %s pendente (%d/%d)", ticket_id, i + 1, attempts)
167
+ time.sleep(interval)
168
+ continue
169
+
170
+ trades = trades_info[0]["Details"]["TicketDetalhesEquity"]
171
+ for tr in trades:
172
+ results.loc[len(results)] = [
173
+ tr["stateItemFile"].lower(),
174
+ ticket_id.lower(),
175
+ tr["externalReference"],
176
+ "test" if test_env else "prod",
177
+ tr["mensagens"],
178
+ ]
179
+ return results
180
+
181
+ raise BTGApiError("Ticket não finalizado no tempo limite")
@@ -1,6 +1,4 @@
1
1
  # Imports
2
- __version__ = "0.0.1"
3
-
4
2
  import pandas as pd
5
3
  import datetime as dt
6
4
  from datetime import timezone
@@ -9,20 +7,16 @@ import os, sys
9
7
  import time
10
8
  import numpy as np
11
9
  from scipy.optimize import newton
12
- from pathlib import Path
13
10
  import io
14
- try:
15
- from azure.storage.blob import BlobServiceClient
16
- import pyarrow as pa
17
- import pyarrow.parquet as pq
18
- from dotenv import load_dotenv
19
- load_dotenv()
20
- except ImportError:
21
- print("Favor instalar as dependencias abaixo:")
22
- print('pip install azure-storage-blob')
23
- print('pip install pyarrow')
24
- print('pip install python-dotenv')
25
- sys.exit()
11
+ from dotenv import load_dotenv
12
+
13
+ import pyarrow as pa
14
+ import pyarrow.parquet as pq
15
+
16
+ from luxorasap.utils.storage import BlobParquetClient
17
+ load_dotenv()
18
+
19
+ # Nao fazer import do ingest, risco de impor circular.
26
20
 
27
21
 
28
22
  #ADLS_CONNECTION_STRING = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
@@ -32,22 +26,16 @@ class LuxorQuery:
32
26
 
33
27
  def __init__(self, update_mode="optimized", is_develop_mode=False, tables_path=None,
34
28
  blob_directory='enriched/parquet', adls_connection_string:str=None, container_name="luxorasap"):
35
-
36
29
  """
37
30
  update_mode:
38
31
  'standard' - Carrega todas as tabelas disponiveis
39
32
  'optimized' - Carrega apenas as tabelas utilizadas sob demanda
40
33
  """
41
-
42
- if adls_connection_string is None:
43
- adls_connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
44
34
 
35
+ self.blob_client = BlobParquetClient(adls_connection_string=adls_connection_string,
36
+ container=container_name)
45
37
  self.blob_directory = blob_directory
46
- self.container_name = container_name
47
38
 
48
- self.CONNECTION_STRING = adls_connection_string
49
- self.blob_service_client = BlobServiceClient.from_connection_string(self.CONNECTION_STRING)
50
- self.container_client = self.blob_service_client.get_container_client(self.container_name)
51
39
 
52
40
  self.modified_tables = []
53
41
  self.is_develop_mode = is_develop_mode
@@ -71,40 +59,12 @@ class LuxorQuery:
71
59
  self.update() # Nessa 1° exec. vai inicializar os dicionarios acima
72
60
 
73
61
 
74
-
75
- #def __set_tables_path(self):
76
- #
77
- # cur_dir = Path().absolute()
78
- # cur_dir_items = cur_dir.parts
79
- # home = cur_dir.home()
80
- # onedrive_name = cur_dir_items[3]
81
- # if "OneDrive".lower() not in onedrive_name.lower():
82
- # logger.critical("No formato atual, para utilizar esse modulo a main deve ser executada dentro do diretorio do OneDrive.")
83
- # #formato considerado eh: "C:/Users/{user}/{onedrive_name}"
84
- #
85
- # tables_path = Path(home)/onedrive_name/"projetos"/"LuxorASAP"/"luxorDB"/"tables"
86
- # if self.is_develop_mode:
87
- # tables_path = Path(home)/onedrive_name/"projetos"/"LuxorASAP_Develop"/"luxorDB"/"tables"
88
- #
89
- # return tables_path
90
-
91
-
92
- def __get_blob_update_time(self, table_name):
93
-
94
- blob_name = f"{self.blob_directory}/{table_name}.parquet"
95
- blob_client = self.blob_service_client.get_blob_client(container=self.container_name, blob=blob_name)
96
-
97
- # Obter as propriedades do blob
98
- properties = blob_client.get_blob_properties()
99
- return properties['last_modified'].replace(tzinfo=timezone.utc).timestamp()
100
-
101
-
102
62
  def __is_table_modified(self, table_name):
103
63
  """ Retorna 'True' ou 'False' informando se a tabela informada em 'table_name' foi criada ou modificada.
104
64
 
105
65
  Args:
106
66
  table_name (str): nome tabela
107
-
67
+ table_path (str): caminho ate a tabela no blob
108
68
  Returns:
109
69
  bool: True se foi criada ou modificada
110
70
  """
@@ -114,32 +74,13 @@ class LuxorQuery:
114
74
 
115
75
  try:
116
76
  file_path = self.tables_in_use[table_name]["table_path"]
117
- file_last_update = self.__get_blob_update_time(table_name)
77
+ file_last_update = self.blob_client.get_df_update_time(file_path)#self.__get_blob_update_time(table_name)
118
78
  return file_last_update > self.tables_in_use[table_name]["update_time"]
119
79
 
120
80
  except:
121
81
  logger.critical(f"Arquivo <{file_path}> não encontrado.")
122
82
 
123
83
  return False
124
-
125
-
126
- def __persist_column_formatting(self, t):
127
-
128
- columns_to_persist = {"Name", "Class", "Vehicles", "Segment"}
129
-
130
- if len(set(t.columns).intersection(columns_to_persist)) > 0:
131
- # Vamos persistir a formatacao de algumas colunas
132
- columns_order = list(t.columns)
133
- columns_to_persist = list(set(t.columns).intersection(columns_to_persist))
134
- persistent_data = t[columns_to_persist].copy()
135
-
136
- columns_to_normalize = list(set(columns_order) - set(columns_to_persist))
137
- t = self.text_to_lowercase(t[columns_to_normalize])
138
- t.loc[:,columns_to_persist] = persistent_data
139
- return t[columns_order]
140
-
141
- # Nos outros casos, transformaremos tudo em lowercase
142
- return self.text_to_lowercase(t)
143
84
 
144
85
 
145
86
  def __get_tickers_bbg(self):
@@ -152,15 +93,13 @@ class LuxorQuery:
152
93
  def table_exists(self, table_name, blob_directory=None):
153
94
  # Checa no ADLS se existe alguma tabela com o nome informado
154
95
 
155
- table_path = f"{blob_directory}/{table_name}.parquet"
156
96
  if blob_directory is None:
157
97
  blob_directory = self.blob_directory
158
- table_path = f"{blob_directory}/{table_name}.parquet"
98
+
99
+ table_path = f"{blob_directory}/{table_name}.parquet"
159
100
 
160
- blob_client = self.container_client.get_blob_client(table_path)
101
+ return self.blob_client.table_exists(table_path)
161
102
 
162
- return blob_client.exists()
163
-
164
103
 
165
104
  def get_table(self, table_name, index=False, index_name="index", dtypes_override={}, force_reload=False):
166
105
  """
@@ -189,6 +128,7 @@ class LuxorQuery:
189
128
  dtypes_override: dict : set - Dicionario com os tipos de dados das colunas devem ser sobrescritos.
190
129
  Deve possuir as chaves 'float', 'date', 'bool' e 'str_nan_format'(troca 'nan' por pd.NA)
191
130
  Para cada chave, colocar um Set com os nomes das colunas que receberao o cast.
131
+
192
132
  """
193
133
  table_name = table_name.lower().replace(" ", "_")
194
134
  if table_name == 'bbg_tickers': return self.__get_tickers_bbg() # DEPRECATED TODO: remover apos testes
@@ -201,47 +141,24 @@ class LuxorQuery:
201
141
 
202
142
 
203
143
  def __load_table(self, table_name, index=False, index_name="index", dtypes_override={}):
204
-
205
-
206
- def __read_blob_parquet(table_name):
207
-
208
- container_name = "luxorasap"
209
- blob_name = f"{self.blob_directory}/{table_name}.parquet"
210
-
211
- blob_client = self.blob_service_client.get_blob_client(container=container_name, blob=blob_name)
212
-
213
- # Download do blob em memória
214
- download_stream = None
215
- try:
216
- download_stream = blob_client.download_blob()
217
- except Exception:
218
- print(f"Tabela '{table_name}' não encontrada no blob.")
219
- return None, False
220
- parquet_data = download_stream.readall()
221
-
222
- # Ler o parquet do stream em memória
223
- parquet_buffer = io.BytesIO(parquet_data)
224
- table = pq.read_table(parquet_buffer)
225
- df = table.to_pandas()
226
-
227
- return df, True
228
144
 
229
-
230
145
  def __load_parquet(table_name):
231
146
  table_path = f"{self.blob_directory}/{table_name}.parquet"#self.tables_path/"parquet"/f"{table_name}.parquet"
232
147
 
233
148
  try:
234
149
  #update_time = os.path.getmtime(table_path)
235
- update_time = self.__get_blob_update_time(table_name)
236
150
  table_data = None
237
151
  # Primeiro, vamos tentar ler do blob
238
- table_data, blob_read_success = __read_blob_parquet(table_name)
239
-
152
+ table_data, blob_read_success = self.blob_client.read_df(table_path)#__read_blob_parquet(table_name)
153
+
240
154
  if not blob_read_success:
241
155
  logger.critical(f"Não foi possível carregar a tabela '{table_name}' do blob.")
242
156
  #print("--> Onedrive fallback.")
243
157
  #table_data = pd.read_parquet(table_path,engine="fastparquet")
244
-
158
+ update_time = self.blob_client.get_df_update_time(table_path)
159
+
160
+ assert(table_data is not None)
161
+
245
162
  table_columns = set(table_data.columns)
246
163
 
247
164
  float_dtypes = {"Last_Price", "Price", "px_last", "Quota", "#", "Avg_price", "Variation", "Variation_tot",
@@ -339,7 +256,7 @@ class LuxorQuery:
339
256
  except Exception:
340
257
  logger.error(f"Nao foi possível setar a coluna {index_name} como index para a tabela {table_name}.")
341
258
 
342
- table_data = self.__persist_column_formatting(table_data)
259
+ #table_data = self.__persist_column_formatting(table_data)
343
260
 
344
261
  self.tables_in_use[table_name] = {"table_data" : table_data,
345
262
  "table_path" : table_path,
@@ -2909,19 +2826,6 @@ class LuxorQuery:
2909
2826
  assets = self.get_table("assets")
2910
2827
  name = assets.query("Ticker == @ticker")["Name"].squeeze()
2911
2828
  return name
2912
-
2913
-
2914
- def list_blob_files(self, sub_dir, ends_with=None):
2915
- """
2916
- Lista todos os arquivos dentro de um diretorio no blob storage.
2917
- """
2918
-
2919
- # Vamos listar os arquivos do diretorio
2920
- blob_files = self.container_client.list_blobs(name_starts_with=sub_dir)
2921
-
2922
- if ends_with is not None:
2923
- return [blob_file.name for blob_file in blob_files if blob_file.name.endswith(ends_with)]
2924
- return [blob_file.name for blob_file in blob_files]
2925
2829
 
2926
2830
 
2927
2831
  def simulate_portfolio_performance(self, portfolio: dict, portfolio_date: dt.date, adm_fee: float, performance_fee: float = 0):
@@ -0,0 +1,23 @@
1
+ """Exporta a API “cloud” por padrão e mantém DataLoader legado disponível."""
2
+
3
+ from importlib import import_module
4
+ from warnings import warn
5
+
6
+ # API moderna (recomendada)
7
+ from .cloud import save_table, incremental_load # noqa: F401
8
+
9
+ __all__ = ["save_table", "incremental_load"]
10
+
11
+ # Ponte para o loader antigo -------------------------------------------------
12
+ try:
13
+ legacy_mod = import_module(".legacy_local.dataloader", __name__)
14
+ DataLoader = legacy_mod.DataLoader # noqa: F401
15
+ warn(
16
+ "luxorasap.ingest.DataLoader está legado e será descontinuado; "
17
+ "migre para luxorasap.ingest.save_table / incremental_load.",
18
+ DeprecationWarning,
19
+ stacklevel=1,
20
+ )
21
+ except Exception:
22
+ # se o arquivo legado não existir, simplesmente não exporta
23
+ pass
@@ -0,0 +1,54 @@
1
+ """Camada moderna de ingestão: grava / incrementa tabelas em ADLS (Parquet)."""
2
+
3
+ import pandas as pd
4
+
5
+ from luxorasap.utils.storage import BlobParquetClient
6
+ from luxorasap.utils.dataframe import prep_for_save
7
+ from luxorasap.datareader import LuxorQuery
8
+
9
+ __all__ = ["save_table", "incremental_load"]
10
+
11
+ _client = BlobParquetClient() # instância única para o módulo
12
+
13
+
14
+ # ────────────────────────────────────────────────────────────────
15
+ def save_table(
16
+ table_name: str,
17
+ df,
18
+ *,
19
+ index: bool = False,
20
+ index_name: str = "index",
21
+ normalize_columns: bool = False,
22
+ directory: str = "enriched/parquet",
23
+ ):
24
+ """Salva DataFrame como Parquet em ADLS (sobrescrevendo)."""
25
+ df = prep_for_save(df, index=index, index_name=index_name, normalize=normalize_columns)
26
+ _client.write_df(df.astype(str), f"{directory}/{table_name}.parquet")
27
+
28
+
29
+ def incremental_load(
30
+ lq: LuxorQuery,
31
+ table_name: str,
32
+ df,
33
+ *,
34
+ increment_column: str = "Date",
35
+ index: bool = False,
36
+ index_name: str = "index",
37
+ normalize_columns: bool = False,
38
+ directory: str = "enriched/parquet",
39
+ ):
40
+ """Concatena novos dados aos existentes, cortando duplicados pela data."""
41
+ if lq.table_exists(table_name):
42
+ prev = lq.get_table(table_name)
43
+ cutoff = df[increment_column].max()
44
+ prev = prev.query(f"{increment_column} < @cutoff")
45
+ df = pd.concat([prev, df], ignore_index=True)
46
+
47
+ save_table(
48
+ table_name,
49
+ df,
50
+ index=index,
51
+ index_name=index_name,
52
+ normalize_columns=normalize_columns,
53
+ directory=directory,
54
+ )
@@ -0,0 +1,280 @@
1
+ import pandas as pd
2
+ import datetime as dt
3
+ import time
4
+ import io
5
+ import sys, os
6
+
7
+ from loguru import logger
8
+ from pathlib import Path
9
+
10
+ from azure.storage.blob import BlobServiceClient
11
+ import pyarrow as pa
12
+ import pyarrow.parquet as pq
13
+ from dotenv import load_dotenv
14
+ load_dotenv()
15
+
16
+ from luxorasap.datareader import LuxorQuery
17
+ from luxorasap.utils.dataframe import transforms
18
+
19
+ import warnings
20
+ warnings.warn(
21
+ "luxorasap.ingest.legacy_local.dataloader é legado; "
22
+ "utilize luxorasap.ingest.cloud em novas rotinas.",
23
+ DeprecationWarning, stacklevel=2
24
+ )
25
+
26
+
27
+
28
+ class DataLoader:
29
+
30
+ def __init__(self, luxorDB_directory = None):
31
+ """Fornece uma forma padronizada de carregar tabelas para a luxorDB.
32
+ 1. Possui metodos para carregar tabelas que já estao carregadas na memória
33
+ - Sao os metodos que possuem 'table' no nome
34
+ 2. Possui metodos para carregar arquivos de excel, com todas as suas abas
35
+ Inclui metodo para checagem de alteracao de versao do arquivo
36
+ - Sao os metodos que possuem 'file' no nome
37
+ Args:
38
+ luxorDB_directory (pathlib.Path, optional): Caminho completo ate o diretorio de destino dos dados.
39
+ """
40
+ self.luxorDB_directory = luxorDB_directory
41
+
42
+ if self.luxorDB_directory is None:
43
+ self.luxorDB_directory = Path(__file__).absolute().parent/"LuxorDB"/"tables"
44
+
45
+ self.tracked_files = {}
46
+ self.tracked_tables = {}
47
+
48
+
49
+ def add_file_tracker(self, tracked_file_path, filetype="excel", sheet_names={},
50
+ excel_size_limit = None,index=False, index_name="index",normalize_columns=False):
51
+ """ Adiciona arquivo na lista para checar por alteracao
52
+ Args:
53
+ tracked_file_path (pathlib.Path): caminho completo ate o arquivo,
54
+ incluindo nome do arquivo e extensão.
55
+ sheet_names (dict, optional): Caso seja uma planilha com varias abas, mapear
56
+ aqui o nome da aba para o nome do arquivo de saida desejado.
57
+ """
58
+ if tracked_file_path not in self.tracked_files:
59
+ self.tracked_files[tracked_file_path] = {
60
+ "last_mtime" : dt.datetime.timestamp(dt.datetime(2000,1,1)),
61
+ "filetype" : filetype, "sheet_names": sheet_names,
62
+ "excel_size_limit" : excel_size_limit,
63
+ "index" : index,
64
+ "index_name" : index_name,
65
+ "normalize_columns" : normalize_columns,
66
+ }
67
+
68
+
69
+ def add_table_tracker(self, table_name:str):
70
+ """ Adiciona tabela na lista para controle de alteracao."""
71
+
72
+ if table_name not in self.tracked_tables:
73
+ self.tracked_tables[table_name] = dt.datetime.timestamp(dt.datetime(2000,1,1))
74
+
75
+
76
+ def remove_file_tracker(self, tracked_file_path):
77
+
78
+ if tracked_file_path in self.tracked_files:
79
+ del self.tracked_files[tracked_file_path]
80
+
81
+
82
+ def remove_table_tracker(self, table_name:str):
83
+
84
+ if table_name in self.tracked_tables:
85
+ del self.tracked_tables[table_name]
86
+
87
+
88
+ def is_file_modified(self, tracked_file_path: Path) -> {bool, float}:
89
+ """ Checa se o arquivo foi modificado desde a ultima leitura.
90
+ Returns:
91
+ tuple(bool, float): (foi modificado?, timestamp da ultima modificacao)
92
+ """
93
+
94
+ file_data = self.tracked_files[tracked_file_path]
95
+
96
+ last_saved_time = file_data["last_mtime"]
97
+ file_last_update = tracked_file_path.stat().st_mtime
98
+ return file_last_update > last_saved_time, file_last_update
99
+
100
+
101
+ def set_file_modified_time(self, tracked_file_path, file_mtime):
102
+
103
+ self.tracked_files[tracked_file_path]["last_mtime"] = file_mtime
104
+
105
+
106
+ def load_file_if_modified(self, tracked_file_path, export_to_blob=False, blob_directory='enriched/parquet'):
107
+ """Carrega arquivo no caminho indicado, carregando na base de dados caso modificado.
108
+ Args:
109
+ tracked_file_path (pathlib.Path): caminho ate o arquivo(cadastro previamente por add_file_tracker)
110
+ type_map (_type_, optional): _description_. Defaults to None.
111
+ filetype (str, optional): _description_. Defaults to "excel".
112
+ """
113
+ file_data = self.tracked_files[tracked_file_path]
114
+
115
+ last_saved_time = file_data["last_mtime"]
116
+ filetype = file_data["filetype"]
117
+ file_sheets = file_data["sheet_names"]
118
+
119
+ file_last_update = tracked_file_path.stat().st_mtime
120
+
121
+ if file_last_update > last_saved_time: # Houve alteracao no arquivo
122
+ if filetype == "excel":
123
+ file_sheets = None if len(file_sheets) == 0 else list(file_sheets.keys())
124
+
125
+ # tables sera sempre um dicionario de tabelas
126
+ tables = None
127
+ trials = 25
128
+ t_counter = 1
129
+ while trials - t_counter > 0:
130
+ try:
131
+ tables = pd.read_excel(tracked_file_path, sheet_name=file_sheets)
132
+ t_counter = trials # leitura concluida
133
+ except PermissionError:
134
+
135
+ logger.error(f"Erro ao tentar ler arquivo '{tracked_file_path}.\nTentativa {t_counter} de {trials};'.\nSe estiver aberto feche.")
136
+ time.sleep(10)
137
+ t_counter += 1
138
+
139
+ for sheet_name, table_data in tables.items():
140
+
141
+ table_name = sheet_name if file_sheets is None else file_data["sheet_names"][sheet_name]
142
+
143
+ if table_name == "trades":
144
+ table_data["ID"] = table_data.index
145
+
146
+ self.__export_table(table_name, table_data, index=file_data["index"], index_name=file_data["index_name"],
147
+ normalize_columns=file_data["normalize_columns"], export_to_blob=export_to_blob,
148
+ blob_directory=blob_directory)
149
+ self.tracked_files[tracked_file_path]["last_mtime"] = file_last_update
150
+
151
+
152
+ def load_table_if_modified(self, table_name, table_data, last_update, index=False, index_name="index", normalize_columns=False,
153
+ do_not_load_excel=False, export_to_blob=False, blob_directory='enriched/parquet',
154
+ is_data_in_bytes=False, bytes_extension=".xlsx"):
155
+ """
156
+ Args:
157
+ table_name (str): nome da tabela (sera o mesmo do arquivo a ser salvo)
158
+ table_data (pd.DataFrame): tabela de dados
159
+ last_update (timestamp): timestamp da ultima edicao feita na tabela
160
+ """
161
+
162
+ if table_name not in self.tracked_tables:
163
+ self.add_table_tracker(table_name)
164
+
165
+
166
+ last_update_time = self.tracked_tables[table_name]
167
+ if last_update > last_update_time:
168
+
169
+ self.tracked_tables[table_name] = last_update
170
+ self.__export_table(table_name, table_data, index=index, index_name=index_name, normalize_columns=normalize_columns,
171
+ do_not_load_excel=do_not_load_excel, export_to_blob=export_to_blob, blob_directory=blob_directory,
172
+ is_data_in_bytes=is_data_in_bytes, bytes_extension=bytes_extension)
173
+
174
+
175
+ def scan_files(self, export_to_blob=False, blob_directory='enriched/parquet'):
176
+ """
177
+ Para todos os arquivos cadastrados, vai buscar e carregar quando houver
178
+ arquivo mais recente.
179
+ """
180
+
181
+ for file in self.tracked_files:
182
+
183
+ self.load_file_if_modified(file, export_to_blob=export_to_blob, blob_directory=blob_directory)
184
+
185
+
186
+ #def __load_bytes(self, content: bytes, extension=".xlsx") -> pd.DataFrame:
187
+ # if extension == ".xlsx" or extension == "xlsx" or extension == "xls":
188
+ # df = pd.read_excel(io.BytesIO(content), engine="openpyxl")
189
+ #
190
+ # return df
191
+ #
192
+ # raise ValueError(f'Extension {extension} not supported')
193
+
194
+ def __load_bytes(self, content: bytes, extension: str) -> pd.DataFrame:
195
+ extension = extension.lower()
196
+
197
+ if extension in [".xlsx", ".xls", "xlsx", "xls"]:
198
+ df = pd.read_excel(io.BytesIO(content), engine="openpyxl")
199
+ return df
200
+
201
+ if extension == ".csv":
202
+ try:
203
+ return pd.read_csv(io.BytesIO(content), encoding="utf-8")
204
+ except UnicodeDecodeError:
205
+ return pd.read_csv(io.BytesIO(content), encoding="latin1")
206
+
207
+ if extension == ".parquet":
208
+ df = pd.read_parquet(io.BytesIO(content))
209
+ return df
210
+
211
+ raise ValueError(f'Extension {extension} not supported')
212
+
213
+
214
+ def __export_table(self, table_name, table_data, index=False, index_name="index", normalize_columns=False,
215
+ do_not_load_excel=False, export_to_blob=False, blob_directory='enriched/parquet',
216
+ is_data_in_bytes=False, bytes_extension=".xlsx"):
217
+
218
+ dest_directory = self.luxorDB_directory
219
+ #TODO -> formatar para index=False
220
+ # Salvando em formato excel
221
+ attempts = 10
222
+ count_attempt = 0
223
+
224
+ if is_data_in_bytes:
225
+ table_data = self.__load_bytes(table_data, extension=bytes_extension)
226
+
227
+ # Se o index tiver dados, vamos trata-los para virar uma coluna
228
+ if index:
229
+ # Tratando o nome do index, caso seja necessario transformar em coluna
230
+ prev_index = table_data.index.name
231
+ if prev_index is not None and index_name == "index":
232
+ index_name = prev_index
233
+ table_data.index.name = index_name
234
+
235
+ table_data = table_data.reset_index()
236
+
237
+
238
+ if normalize_columns:
239
+ table_data = transforms.persist_column_formatting(table_data)
240
+
241
+ if not do_not_load_excel:
242
+ while count_attempt < attempts:
243
+ count_attempt += 1
244
+ try:
245
+ if len(table_data) > 1_000_000:
246
+ table_data = table_data.tail(1_000_000)
247
+ table_data.to_excel(dest_directory/f"{table_name}.xlsx", index=False)
248
+ count_attempt = attempts # sair do loop
249
+
250
+ except PermissionError:
251
+ logger.error(f"Erro ao tentar salvar arquivo {table_name}. Feche o arquivo. Tentativa {count_attempt} de {attempts}")
252
+ time.sleep(10 + count_attempt * 5)
253
+
254
+ # Salvando em csv
255
+ # -> Salvar em csv foi descontinuado por falta de uso.
256
+ #table_data.to_csv(dest_directory/"csv"/f"{table_name}.csv", sep=";", index=False)
257
+
258
+ # Salvando em parquet (tudo como string... dtypes deverao ser atribuidos na leitura)
259
+ table_data = table_data.astype(str)
260
+ table_data.to_parquet(dest_directory/"parquet"/f"{table_name}.parquet", engine="pyarrow", index=False)
261
+
262
+ if export_to_blob:
263
+ # Definindo o Container e o Blob Name
264
+ container_name = "luxorasap"
265
+ blob_name = f"{blob_directory}/{table_name}.parquet" #
266
+
267
+ # Conversão para parquet em memória (sem precisar salvar local)
268
+ table = pa.Table.from_pandas(table_data)
269
+ parquet_buffer = io.BytesIO()
270
+ pq.write_table(table, parquet_buffer)
271
+ parquet_buffer.seek(0) # Reseta o ponteiro para o início do buffer
272
+
273
+ # Conectando ao Blob Storage
274
+ connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
275
+ blob_service_client = BlobServiceClient.from_connection_string(conn_str=connection_string)
276
+
277
+ # Criando um Blob Client
278
+ blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
279
+ blob_client.upload_blob(parquet_buffer, overwrite=True)
280
+
@@ -0,0 +1,4 @@
1
+ from .transforms import prep_for_save, persist_column_formatting, text_to_lowercase
2
+ from .reader import read_bytes
3
+
4
+ __all__ = ["prep_for_save", "persist_column_formatting", "text_to_lowercase", "read_bytes"]
@@ -0,0 +1,19 @@
1
+ import io, pandas as pd, pyarrow.parquet as pq
2
+
3
+ def read_bytes(buf: bytes, *, filename: str) -> pd.DataFrame:
4
+ """Detecta a extensão e carrega em DataFrame."""
5
+ ext = filename.split(".")[-1].lower()
6
+ f = io.BytesIO(buf)
7
+
8
+ if ext in {"xlsx", "xls"}:
9
+ return pd.read_excel(f)
10
+ if ext == "parquet":
11
+ return pq.read_table(f).to_pandas()
12
+ if ext == "csv":
13
+ try:
14
+ return pd.read_csv(f, encoding="utf-8")
15
+ except UnicodeDecodeError:
16
+ f.seek(0)
17
+ return pd.read_csv(f, encoding="latin1")
18
+
19
+ raise ValueError(f"Extensão {ext} não suportada")
@@ -0,0 +1,52 @@
1
+ import pandas as pd
2
+
3
+ def text_to_lowercase(t: pd.DataFrame) -> pd.DataFrame:
4
+ """
5
+ Converte todas as colunas de texto para lowercase
6
+ Args:
7
+ t (pd.DataFrame): pandas DataFrame
8
+ Returns:
9
+ pd.DataFrame
10
+ """
11
+
12
+ return t.map(lambda x: x.lower().strip() if isinstance(x, str) else x)
13
+
14
+
15
+ def persist_column_formatting(t: pd.DataFrame, columns_to_persist_override : set = {}) -> pd.DataFrame:
16
+ """
17
+ Persiste a formatacao de algumas colunas, e transforma o resto em lowercase
18
+ Args:
19
+ t (pd.DataFrame): pandas DataFrame
20
+ Returns:
21
+ pd.DataFrame
22
+ """
23
+
24
+ columns_to_persist = {"Name", "Class", "Vehicles", "Segment"}
25
+ columns_to_persist = columns_to_persist.union(columns_to_persist_override)
26
+
27
+ if len(set(t.columns).intersection(columns_to_persist)) > 0:
28
+ # Vamos persistir a formatacao de algumas colunas
29
+ columns_order = list(t.columns)
30
+ columns_to_persist = list(set(t.columns).intersection(columns_to_persist))
31
+ persistent_data = t[columns_to_persist].copy()
32
+
33
+ columns_to_normalize = list(set(columns_order) - set(columns_to_persist))
34
+ t = text_to_lowercase(t[columns_to_normalize])
35
+ t.loc[:,columns_to_persist] = persistent_data
36
+ return t[columns_order]
37
+
38
+ # Nos outros casos, transformaremos tudo em lowercase
39
+ return text_to_lowercase(t)
40
+
41
+
42
+ def prep_for_save(
43
+ df: pd.DataFrame,
44
+ *,
45
+ index: bool = False,
46
+ index_name: str = "index",
47
+ normalize: bool = False,
48
+ ):
49
+ if index:
50
+ name = df.index.name or index_name
51
+ df = df.reset_index().rename(columns={"index": name})
52
+ return persist_column_formatting(df) if normalize else df
@@ -0,0 +1,2 @@
1
+ from .blob import BlobParquetClient
2
+ __all__ = ["BlobParquetClient"]
@@ -0,0 +1,95 @@
1
+ import io, os
2
+ from pathlib import PurePosixPath
3
+ from datetime import timezone
4
+ import pandas as pd
5
+ import pyarrow as pa, pyarrow.parquet as pq
6
+ from azure.storage.blob import BlobServiceClient
7
+
8
+ from ..dataframe import read_bytes
9
+
10
+
11
+ class BlobParquetClient:
12
+ """Leitura/gravacao de Parquet em Azure Blob – stateless & reutilizável."""
13
+
14
+ def __init__(self, container: str = "luxorasap", adls_connection_string: str = None):
15
+ if adls_connection_string is None:
16
+ adls_connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
17
+
18
+ if adls_connection_string is None:
19
+ raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
20
+ self._svc = BlobServiceClient.from_connection_string(adls_connection_string)
21
+ self._container = container
22
+
23
+ # ---------- API pública ----------
24
+ def read_df(self, blob_path: str) -> (pd.DataFrame, bool):
25
+ buf = io.BytesIO()
26
+ try:
27
+ self._blob(blob_path).download_blob().readinto(buf)
28
+ return (
29
+ read_bytes(buf.getvalue(), filename=PurePosixPath(blob_path).name),
30
+ True,
31
+ )
32
+ except Exception:
33
+ return None, False
34
+
35
+
36
+ def write_df(self, df, blob_path: str):
37
+ table = pa.Table.from_pandas(df)
38
+ buf = io.BytesIO()
39
+ pq.write_table(table, buf)
40
+ buf.seek(0)
41
+ self._blob(blob_path).upload_blob(buf, overwrite=True)
42
+
43
+
44
+ def get_df_update_time(self, blob_path: str) -> float:
45
+ try:
46
+ properties = self._blob(blob_path).get_blob_properties()
47
+ return properties['last_modified'].replace(tzinfo=timezone.utc).timestamp()
48
+ except Exception:
49
+ return 0.0
50
+
51
+
52
+ def exists_df(self, blob_path: str) -> bool:
53
+ try:
54
+ self._blob(blob_path).get_blob_properties()
55
+ return True
56
+ except Exception:
57
+ return False
58
+
59
+
60
+ def list_blob_files(self, blob_path: str, ends_with: str = None) -> list:
61
+ """
62
+ Lista os arquivos em um diretório do blob storage.
63
+
64
+ Args:
65
+ blob_path (str): O caminho do diretório no blob storage.
66
+ ends_with (str, optional): Filtra os arquivos que terminam com esta string.(Ex.: '.parquet')
67
+
68
+ Returns:
69
+ list: Uma lista de nomes de blob.
70
+
71
+ """
72
+ try:
73
+ container_client = self._svc.get_container_client(self._container)
74
+ blob_list = container_client.list_blobs(name_starts_with=blob_path)
75
+ if ends_with:
76
+ return [blob.name for blob in blob_list if blob.name.endswith(ends_with)]
77
+ return [blob.name for blob in blob_list]
78
+ except Exception:
79
+ return []
80
+
81
+
82
+ def table_exists(self, table_path: str) -> bool:
83
+ """
84
+ Checa se uma tabela existe no blob storage.
85
+ """
86
+ return self.exists_df(table_path)
87
+
88
+
89
+
90
+
91
+
92
+ # ---------- interno --------------
93
+ def _blob(self, path: str):
94
+ path = str(PurePosixPath(path))
95
+ return self._svc.get_blob_client(self._container, path)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: luxorasap
3
- Version: 0.0.2
3
+ Version: 0.1.1
4
4
  Summary: Luxor’s unified toolbox for data ingestion, querying and analytics.
5
5
  Author-email: Luxor Group <backoffice@luxor.com.br>
6
6
  License: Proprietary – All rights reserved
@@ -18,15 +18,25 @@ Requires-Dist: pyarrow>=15.0
18
18
  Requires-Dist: requests>=2.32
19
19
  Requires-Dist: pydantic>=2.7
20
20
  Requires-Dist: scipy>=1.13
21
+ Requires-Dist: openpyxl
22
+ Provides-Extra: storage
23
+ Requires-Dist: azure-storage-blob>=12.19; extra == "storage"
24
+ Requires-Dist: pyarrow>=15.0; extra == "storage"
25
+ Provides-Extra: dataframe
26
+ Requires-Dist: pandas>=2.2; extra == "dataframe"
21
27
  Provides-Extra: datareader
22
- Requires-Dist: pyarrow>=15.0; extra == "datareader"
28
+ Requires-Dist: luxorasap[dataframe,storage]; extra == "datareader"
29
+ Requires-Dist: numpy>=1.25; extra == "datareader"
30
+ Requires-Dist: scipy>=1.13; extra == "datareader"
23
31
  Provides-Extra: ingest
24
- Requires-Dist: pandera>=0.18; extra == "ingest"
32
+ Requires-Dist: luxorasap[dataframe,storage]; extra == "ingest"
33
+ Requires-Dist: pandas>=2.2; extra == "ingest"
25
34
  Provides-Extra: btgapi
26
35
  Requires-Dist: requests>=2.32; extra == "btgapi"
27
36
  Requires-Dist: pydantic>=2.7; extra == "btgapi"
28
37
  Provides-Extra: dev
29
38
  Requires-Dist: pytest>=8.2; extra == "dev"
39
+ Requires-Dist: requests-mock>=1.11; extra == "dev"
30
40
  Requires-Dist: black>=24.4.0; extra == "dev"
31
41
  Requires-Dist: isort>=5.13; extra == "dev"
32
42
  Requires-Dist: bumpver>=2024.3; extra == "dev"
@@ -0,0 +1,21 @@
1
+ luxorasap/__init__.py,sha256=nXxCeYO_7SQzrZ_jTanpDSplTLqWOGIFXTIWKy5xyKI,1355
2
+ luxorasap/btgapi/__init__.py,sha256=DISzvHp-J7oeNq_PhmCt-_ZRBCaUgkQ9k2wtJLm-kgs,563
3
+ luxorasap/btgapi/auth.py,sha256=UEihM5OXHhtHdB9NiAMlmEUDR_H3cqg305CdFxATrYY,1846
4
+ luxorasap/btgapi/reports.py,sha256=bn54MiYzdrm2SMXx6nqdy5nd6Pd2uBQk2cYaEFFiaMY,6145
5
+ luxorasap/btgapi/trades.py,sha256=1Cn1RMjaHO073YHJFeN2XRxYElQH7A98GIfUVy0VmSg,5987
6
+ luxorasap/datareader/__init__.py,sha256=41RAvbrQ4R6oj67S32CrKqolx0CJ2W8cbOF6g5Cqm2g,120
7
+ luxorasap/datareader/core.py,sha256=LpXe5g4lZpfEqaz_gjjHizVA-vPEjBi5yJKg_7K0Nkw,153205
8
+ luxorasap/ingest/__init__.py,sha256=XhxDTN2ar-u6UCPhnxNU_to-nWiit-SpQ6cA_N9eMSs,795
9
+ luxorasap/ingest/cloud/__init__.py,sha256=V8cCNloP1RgPTEPsepHvWVL4m_t5geQuBORLm7x-OKQ,1729
10
+ luxorasap/ingest/legacy_local/dataloader.py,sha256=zKPhuiBSFwkuWN6d8g2s60KkbVk1R_1cGMCtQM9j-0c,11908
11
+ luxorasap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ luxorasap/utils/dataframe/__init__.py,sha256=dU_RwTTOi6F3mlhM-0MYWM_qexBN9BmmKc_yrDE1Lwc,207
13
+ luxorasap/utils/dataframe/reader.py,sha256=Vzjdw-AeS1lnWEHQ8RZNh0kK93NWTp0NWVi_B6mN5N0,616
14
+ luxorasap/utils/dataframe/transforms.py,sha256=Bm_cv9L9923QIXH82Fa_M4pM94f2AJRPu62Vv_i7tto,1684
15
+ luxorasap/utils/storage/__init__.py,sha256=U3XRq94yzRp3kgBSUcRzs2tQgJ4o8h8a1ZzwiscA5XM,67
16
+ luxorasap/utils/storage/blob.py,sha256=pcEixGxwXM9y5iPPpkX__ySWq0milghJGketYZlRL-0,3171
17
+ luxorasap-0.1.1.dist-info/METADATA,sha256=m7ksGU4yvcq3T62mSpMa8IcDA5jrPAiRTDcRTJaVGfA,3093
18
+ luxorasap-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
+ luxorasap-0.1.1.dist-info/entry_points.txt,sha256=XFh-dOwUhlya9DmGvgookMI0ezyUJjcOvTIHDEYS44g,52
20
+ luxorasap-0.1.1.dist-info/top_level.txt,sha256=9YOL6bUIpzY06XFBRkUW1e4rgB32Ds91fQPGwUEjxzU,10
21
+ luxorasap-0.1.1.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- luxorasap/__init__.py,sha256=UTn522ZJQ17wVxA8touIplzT0y21tzJL3XmsqAbq7XY,1355
2
- luxorasap/datareader/__init__.py,sha256=41RAvbrQ4R6oj67S32CrKqolx0CJ2W8cbOF6g5Cqm2g,120
3
- luxorasap/datareader/core.py,sha256=VURibG9qly5Q1Fu6rrgvXypERb39S535wKd-xc8g0uU,156887
4
- luxorasap/ingest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- luxorasap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- luxorasap-0.0.2.dist-info/METADATA,sha256=SrdjdL1Ce62fym3jfpsTYxI66NCuaRx_21GdwJwnQb0,2633
7
- luxorasap-0.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
- luxorasap-0.0.2.dist-info/entry_points.txt,sha256=XFh-dOwUhlya9DmGvgookMI0ezyUJjcOvTIHDEYS44g,52
9
- luxorasap-0.0.2.dist-info/top_level.txt,sha256=9YOL6bUIpzY06XFBRkUW1e4rgB32Ds91fQPGwUEjxzU,10
10
- luxorasap-0.0.2.dist-info/RECORD,,