lambda-risk 0.0.0.5__tar.gz → 0.0.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lambda_risk-0.0.0.5 → lambda_risk-0.0.0.6}/PKG-INFO +2 -1
- lambda_risk-0.0.0.6/lambda_risk/risk.py +1051 -0
- {lambda_risk-0.0.0.5 → lambda_risk-0.0.0.6}/lambda_risk.egg-info/PKG-INFO +2 -1
- {lambda_risk-0.0.0.5 → lambda_risk-0.0.0.6}/lambda_risk.egg-info/requires.txt +1 -0
- {lambda_risk-0.0.0.5 → lambda_risk-0.0.0.6}/setup.py +3 -2
- lambda_risk-0.0.0.5/lambda_risk/risk.py +0 -43
- {lambda_risk-0.0.0.5 → lambda_risk-0.0.0.6}/README.md +0 -0
- {lambda_risk-0.0.0.5 → lambda_risk-0.0.0.6}/lambda_risk/__init__.py +0 -0
- {lambda_risk-0.0.0.5 → lambda_risk-0.0.0.6}/lambda_risk/client.py +0 -0
- {lambda_risk-0.0.0.5 → lambda_risk-0.0.0.6}/lambda_risk.egg-info/SOURCES.txt +0 -0
- {lambda_risk-0.0.0.5 → lambda_risk-0.0.0.6}/lambda_risk.egg-info/dependency_links.txt +0 -0
- {lambda_risk-0.0.0.5 → lambda_risk-0.0.0.6}/lambda_risk.egg-info/top_level.txt +0 -0
- {lambda_risk-0.0.0.5 → lambda_risk-0.0.0.6}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lambda_risk
|
|
3
|
-
Version: 0.0.0.
|
|
3
|
+
Version: 0.0.0.6
|
|
4
4
|
Description-Content-Type: text/markdown
|
|
5
5
|
Requires-Dist: pandas>=2.2.3
|
|
6
6
|
Requires-Dist: requests>=2.32.3
|
|
@@ -10,6 +10,7 @@ Requires-Dist: PyQuantimClient
|
|
|
10
10
|
Requires-Dist: holidays
|
|
11
11
|
Requires-Dist: pyarrow
|
|
12
12
|
Requires-Dist: fastparquet
|
|
13
|
+
Requires-Dist: bcchapi
|
|
13
14
|
Dynamic: description
|
|
14
15
|
Dynamic: description-content-type
|
|
15
16
|
Dynamic: requires-dist
|
|
@@ -0,0 +1,1051 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.stats import norm
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import List, Dict, Optional
|
|
8
|
+
from .client import Datamart, FrameworkRiesgo
|
|
9
|
+
import bcchapi
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ExpostRisk:
|
|
13
|
+
|
|
14
|
+
def ewma_varianza(self,returns:pd.DataFrame,factor:float) -> float:
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
def decay_varianza(self,returns:pd.DataFrame,factor:float) -> float:
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
def volatilidad(self,returns:pd.DataFrame,ewma:bool=False, decay:bool=False) -> float:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
def value_at_risk(self, confidence_level:float, sigma:float, mu:float=0.0) -> float:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
def tracking_error(self, ret_target:pd.DataFrame, ret_bmk:pd.DataFrame, ewma:bool=False, decay:bool=False) -> float:
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class RetornosBrutos:
|
|
32
|
+
end_date : str
|
|
33
|
+
agf_target : str
|
|
34
|
+
year : str
|
|
35
|
+
bmk_categ : str
|
|
36
|
+
run_target : Optional[str] = None
|
|
37
|
+
dtd : bool = True
|
|
38
|
+
wtw : bool = True
|
|
39
|
+
mtm : bool = True
|
|
40
|
+
rolling_window : Optional[list[str]] = None
|
|
41
|
+
drop_weekends : bool = True
|
|
42
|
+
len_data : str = '6Y'
|
|
43
|
+
moneda : str = 'original'
|
|
44
|
+
usd : bool = False
|
|
45
|
+
decay_factor : Optional[list[float]] = None
|
|
46
|
+
result : Dict[str, pd.DataFrame] = None
|
|
47
|
+
bc_user : str = None
|
|
48
|
+
bc_pwd : str = None
|
|
49
|
+
|
|
50
|
+
def __post_init__(self):
|
|
51
|
+
# clientes
|
|
52
|
+
self.client_datamart = Datamart()
|
|
53
|
+
self.client_framework = FrameworkRiesgo()
|
|
54
|
+
|
|
55
|
+
# Banco Central
|
|
56
|
+
if self.usd:
|
|
57
|
+
if self.bc_user is None:
|
|
58
|
+
try:
|
|
59
|
+
self.bc_user = os.getenv("BCCH_API_USER")
|
|
60
|
+
except KeyError:
|
|
61
|
+
raise ValueError("BCCH_API_USER environment variable not set")
|
|
62
|
+
if self.bc_pwd is None:
|
|
63
|
+
try:
|
|
64
|
+
self.bc_pwd = os.getenv("BCCH_API_PWD")
|
|
65
|
+
except KeyError:
|
|
66
|
+
raise ValueError("BCCH_API_PWD environment variable not set")
|
|
67
|
+
self.API_BCCH_USER = self.bc_user
|
|
68
|
+
self.API_BCCH_PWD = self.bc_pwd
|
|
69
|
+
self.usd_data = self.descargar_usd()
|
|
70
|
+
|
|
71
|
+
# vigente
|
|
72
|
+
self.fondos_vigentes = self.vigente()
|
|
73
|
+
|
|
74
|
+
# target
|
|
75
|
+
if self.run_target is None:
|
|
76
|
+
self.runs_target = self.target()
|
|
77
|
+
|
|
78
|
+
# competidores
|
|
79
|
+
self.comp = self.competidores()
|
|
80
|
+
self.runs_competidores = self.comp['COMPETIDOR'].tolist()
|
|
81
|
+
|
|
82
|
+
# datos
|
|
83
|
+
self.raw_data = self.load_data()
|
|
84
|
+
self.result = self.calculate_returns()
|
|
85
|
+
|
|
86
|
+
def vigente(self):
|
|
87
|
+
df : pd.DataFrame = self.client_datamart.export_fondos(
|
|
88
|
+
ignorar_cambio_nombre=True,
|
|
89
|
+
fecha_corte=self.end_date
|
|
90
|
+
)
|
|
91
|
+
# print("[DEBUG] Fondos vigentes exportados\n",df.dtypes)
|
|
92
|
+
df["FECHA_MAX"] = pd.to_datetime(df['FECHA_MAX'])
|
|
93
|
+
df["FECHA_MIN"] = pd.to_datetime(df['FECHA_MIN'])
|
|
94
|
+
df['VIGENTE'] = df.apply(lambda x: 1 if x['FECHA_MAX']==pd.to_datetime(self.end_date) else 0, axis=1)
|
|
95
|
+
return df
|
|
96
|
+
|
|
97
|
+
def target(self):
|
|
98
|
+
df = self.fondos_vigentes[(self.fondos_vigentes['ADMINISTRADORA'] == self.agf_target) & (self.fondos_vigentes['VIGENTE'] == 1)]
|
|
99
|
+
runs : list = df['RUN'].tolist()
|
|
100
|
+
return runs
|
|
101
|
+
|
|
102
|
+
def competidores(self):
|
|
103
|
+
df : pd.DataFrame = self.client_framework.export_benchmarks(
|
|
104
|
+
year=self.year,
|
|
105
|
+
categoria=self.bmk_categ
|
|
106
|
+
)
|
|
107
|
+
df = df[[f"RUN_{self.agf_target}", "COMPETIDOR"]].merge(
|
|
108
|
+
self.fondos_vigentes[['RUN', 'ADMINISTRADORA', 'FONDO', 'FECHA_MIN', 'FECHA_MAX', 'VIGENTE']],left_on="COMPETIDOR", right_on="RUN", how="left")
|
|
109
|
+
df = df.rename(columns={f"RUN_{self.agf_target}":"RUN_TARGET"})
|
|
110
|
+
|
|
111
|
+
return df
|
|
112
|
+
|
|
113
|
+
def load_data(self):
|
|
114
|
+
lst_runs = list(set(self.runs_target + self.runs_competidores))
|
|
115
|
+
start_date = pd.to_datetime(self.end_date) - pd.DateOffset(years=int(self.len_data.replace("Y", "")))
|
|
116
|
+
start_date = start_date.strftime("%Y-%m-%d")
|
|
117
|
+
# print("[DEBUG] lista de runs a cargar: \n", lst_runs)
|
|
118
|
+
df : pd.DataFrame = self.client_datamart.export_vc(
|
|
119
|
+
fecha_inicio=start_date,
|
|
120
|
+
fecha_fin=self.end_date,
|
|
121
|
+
moneda=self.moneda,
|
|
122
|
+
bruto=True,
|
|
123
|
+
runs = lst_runs,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
if self.drop_weekends:
|
|
127
|
+
df['FECHA'] = pd.to_datetime(df['FECHA'])
|
|
128
|
+
df = df[df['FECHA'].dt.weekday < 5].copy()
|
|
129
|
+
if self.usd:
|
|
130
|
+
df = pd.concat([df, self.sabana_usd(df.columns)], ignore_index=True)
|
|
131
|
+
return df
|
|
132
|
+
|
|
133
|
+
def sabana_usd(self, columns):
|
|
134
|
+
usd = self.usd_data.copy()
|
|
135
|
+
if 'FECHA' not in usd.columns:
|
|
136
|
+
usd = usd.reset_index()
|
|
137
|
+
usd = usd.rename(columns={usd.columns[0]: 'FECHA'})
|
|
138
|
+
usd['FECHA'] = pd.to_datetime(usd['FECHA'])
|
|
139
|
+
|
|
140
|
+
value_col = 'USDCLP' if 'USDCLP' in usd.columns else usd.columns.difference(['FECHA'])[0]
|
|
141
|
+
usd = usd[['FECHA', value_col]].rename(columns={value_col: 'VALORCUOTA'})
|
|
142
|
+
usd['VALORCUOTA'] = pd.to_numeric(usd['VALORCUOTA'], errors='coerce')
|
|
143
|
+
usd = usd.sort_values('FECHA')
|
|
144
|
+
usd['VALORCUOTA'] = usd['VALORCUOTA'].ffill()
|
|
145
|
+
usd['RUN'] = 'USDCLP'
|
|
146
|
+
|
|
147
|
+
start_date = pd.to_datetime(self.end_date) - pd.DateOffset(years=int(self.len_data.replace("Y", "")))
|
|
148
|
+
end_date = pd.to_datetime(self.end_date)
|
|
149
|
+
usd = usd[(usd['FECHA'] >= start_date) & (usd['FECHA'] <= end_date)]
|
|
150
|
+
if self.drop_weekends:
|
|
151
|
+
usd = usd[usd['FECHA'].dt.weekday < 5]
|
|
152
|
+
|
|
153
|
+
return usd.reindex(columns=columns)
|
|
154
|
+
|
|
155
|
+
def valores_cuota(self):
|
|
156
|
+
df = self.raw_data.copy()
|
|
157
|
+
df['FECHA'] = pd.to_datetime(df['FECHA'])
|
|
158
|
+
return df.pivot_table(
|
|
159
|
+
index='FECHA',
|
|
160
|
+
columns='RUN',
|
|
161
|
+
values='VALORCUOTA',
|
|
162
|
+
aggfunc='last'
|
|
163
|
+
).sort_index()
|
|
164
|
+
|
|
165
|
+
def ajustar_fecha_min(self, returns: pd.DataFrame):
|
|
166
|
+
fecha_min = self.fondos_vigentes[['RUN', 'FECHA_MIN']].dropna().copy()
|
|
167
|
+
fecha_min['FECHA_MIN'] = pd.to_datetime(fecha_min['FECHA_MIN'])
|
|
168
|
+
fecha_min_by_run = fecha_min.set_index(fecha_min['RUN'].astype(str))['FECHA_MIN'].to_dict()
|
|
169
|
+
|
|
170
|
+
returns = returns.copy()
|
|
171
|
+
returns.index = pd.to_datetime(returns.index)
|
|
172
|
+
for run in returns.columns:
|
|
173
|
+
min_date = fecha_min_by_run.get(str(run))
|
|
174
|
+
if min_date is not None:
|
|
175
|
+
returns.loc[returns.index < min_date, run] = None
|
|
176
|
+
return returns
|
|
177
|
+
|
|
178
|
+
def retorno_periodo(self, valores_cuota: pd.DataFrame, periods: int):
|
|
179
|
+
returns = valores_cuota.pct_change(periods=periods)
|
|
180
|
+
end_date = pd.to_datetime(self.end_date)
|
|
181
|
+
anchor_pos = returns.index.searchsorted(end_date, side='right') - 1
|
|
182
|
+
if anchor_pos < 0:
|
|
183
|
+
return returns.iloc[0:0]
|
|
184
|
+
|
|
185
|
+
positions = np.arange(anchor_pos, -1, -periods)
|
|
186
|
+
positions = np.sort(positions)
|
|
187
|
+
return returns.iloc[positions]
|
|
188
|
+
|
|
189
|
+
def retorno_rolling(self, valores_cuota: pd.DataFrame, window: int):
|
|
190
|
+
return valores_cuota.pct_change(periods=window)
|
|
191
|
+
|
|
192
|
+
def decay_key(self, factor: float):
|
|
193
|
+
factor_pct = factor * 100
|
|
194
|
+
if factor_pct.is_integer():
|
|
195
|
+
return f'DECAY{int(factor_pct)}'
|
|
196
|
+
return f'DECAY{str(factor_pct).replace(".", "_")}'
|
|
197
|
+
|
|
198
|
+
def pesos_decay(self, index: pd.Index, factor: float):
|
|
199
|
+
end_date = pd.to_datetime(self.end_date)
|
|
200
|
+
index = pd.to_datetime(index)
|
|
201
|
+
weights = pd.Series(index=index, dtype=float)
|
|
202
|
+
anchor_pos = index.searchsorted(end_date, side='right') - 1
|
|
203
|
+
if anchor_pos < 0:
|
|
204
|
+
return weights
|
|
205
|
+
|
|
206
|
+
positions = np.arange(anchor_pos, -1, -1)
|
|
207
|
+
age = anchor_pos - positions
|
|
208
|
+
weights.iloc[positions] = (1 - factor) * np.power(factor, age)
|
|
209
|
+
return weights
|
|
210
|
+
|
|
211
|
+
def agregar_decay(self, returns: pd.DataFrame):
|
|
212
|
+
returns = returns.copy()
|
|
213
|
+
if self.decay_factor is not None:
|
|
214
|
+
for factor in self.decay_factor:
|
|
215
|
+
factor = float(factor)
|
|
216
|
+
returns[self.decay_key(factor)] = self.pesos_decay(returns.index, factor=factor)
|
|
217
|
+
return returns
|
|
218
|
+
|
|
219
|
+
def calculate_returns(self):
|
|
220
|
+
if not any([self.dtd, self.wtw, self.mtm]) and self.rolling_window is None:
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
valores_cuota = self.valores_cuota()
|
|
224
|
+
selected_returns = {
|
|
225
|
+
'dtd': self.dtd,
|
|
226
|
+
'wtw': self.wtw,
|
|
227
|
+
'mtm': self.mtm,
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
result = {}
|
|
231
|
+
if selected_returns['dtd']:
|
|
232
|
+
result['dtd'] = self.agregar_decay(self.ajustar_fecha_min(self.retorno_periodo(valores_cuota, periods=1)))
|
|
233
|
+
if selected_returns['wtw']:
|
|
234
|
+
result['wtw'] = self.agregar_decay(self.ajustar_fecha_min(self.retorno_periodo(valores_cuota, periods=5)))
|
|
235
|
+
if selected_returns['mtm']:
|
|
236
|
+
result['mtm'] = self.agregar_decay(self.ajustar_fecha_min(self.retorno_periodo(valores_cuota, periods=20)))
|
|
237
|
+
if self.rolling_window is not None:
|
|
238
|
+
for window in self.rolling_window:
|
|
239
|
+
window = int(window)
|
|
240
|
+
result[f'ROLL{window}'] = self.agregar_decay(self.ajustar_fecha_min(self.retorno_rolling(valores_cuota, window=window)))
|
|
241
|
+
return result
|
|
242
|
+
|
|
243
|
+
def descargar_usd(self,serie:str="F073.TCO.PRE.Z.D", nombre:str="USDCLP"):
|
|
244
|
+
bc = bcchapi.Siete(self.bc_user, self.bc_pwd)
|
|
245
|
+
df_descarga = bc.cuadro(series=serie, nombres=[nombre])
|
|
246
|
+
return df_descarga
|
|
247
|
+
|
|
248
|
+
@dataclass
|
|
249
|
+
class MetricasExpost:
|
|
250
|
+
year : str
|
|
251
|
+
data : Dict[str, pd.DataFrame] = field(default_factory=dict)
|
|
252
|
+
fondos_vigentes : pd.DataFrame = field(default_factory=pd.DataFrame)
|
|
253
|
+
target : Optional[str] = None
|
|
254
|
+
decays : Optional[List[float]] = None
|
|
255
|
+
rangos : Optional[Dict[str, int]] = None
|
|
256
|
+
moneda_default : str = "CLP"
|
|
257
|
+
result : pd.DataFrame = field(default_factory=pd.DataFrame)
|
|
258
|
+
|
|
259
|
+
def __post_init__(self):
|
|
260
|
+
self.year = str(self.year)
|
|
261
|
+
self.rangos = self.rangos or {}
|
|
262
|
+
self._monedas = self._mapa_monedas()
|
|
263
|
+
self._target_runs = self._mapa_target_runs()
|
|
264
|
+
self.result = self.calcular()
|
|
265
|
+
|
|
266
|
+
def _mapa_monedas(self):
|
|
267
|
+
if self.fondos_vigentes.empty or "RUN" not in self.fondos_vigentes.columns:
|
|
268
|
+
return {}
|
|
269
|
+
|
|
270
|
+
moneda_col = next(
|
|
271
|
+
(col for col in ["MONEDA", "MONEDA_SERIE", "MONEDA_CUOTA"] if col in self.fondos_vigentes.columns),
|
|
272
|
+
None
|
|
273
|
+
)
|
|
274
|
+
if moneda_col is None:
|
|
275
|
+
return {}
|
|
276
|
+
|
|
277
|
+
monedas = self.fondos_vigentes[["RUN", moneda_col]].dropna().copy()
|
|
278
|
+
monedas["RUN"] = monedas["RUN"].astype(str)
|
|
279
|
+
return monedas.drop_duplicates("RUN").set_index("RUN")[moneda_col].to_dict()
|
|
280
|
+
|
|
281
|
+
def _mapa_target_runs(self):
|
|
282
|
+
if self.target is None:
|
|
283
|
+
return None
|
|
284
|
+
if self.fondos_vigentes.empty:
|
|
285
|
+
return set()
|
|
286
|
+
if "RUN" not in self.fondos_vigentes.columns or "ADMINISTRADORA" not in self.fondos_vigentes.columns:
|
|
287
|
+
return set()
|
|
288
|
+
|
|
289
|
+
fondos = self.fondos_vigentes[
|
|
290
|
+
self.fondos_vigentes["ADMINISTRADORA"].astype(str).eq(str(self.target))
|
|
291
|
+
].copy()
|
|
292
|
+
return set(fondos["RUN"].astype(str))
|
|
293
|
+
|
|
294
|
+
def _normalizar_data(self, df: pd.DataFrame):
|
|
295
|
+
df = df.copy()
|
|
296
|
+
if "FECHA" in df.columns:
|
|
297
|
+
df["FECHA"] = pd.to_datetime(df["FECHA"])
|
|
298
|
+
df = df.set_index("FECHA")
|
|
299
|
+
else:
|
|
300
|
+
df.index = pd.to_datetime(df.index)
|
|
301
|
+
|
|
302
|
+
df = df.sort_index()
|
|
303
|
+
decay_cols = [col for col in df.columns if self._es_columna_decay(col)]
|
|
304
|
+
ret_cols = [col for col in df.columns if col not in decay_cols]
|
|
305
|
+
returns = df[ret_cols].apply(pd.to_numeric, errors="coerce")
|
|
306
|
+
return returns, decay_cols
|
|
307
|
+
|
|
308
|
+
@staticmethod
|
|
309
|
+
def _es_columna_decay(col):
|
|
310
|
+
return str(col).upper().startswith("DECAY")
|
|
311
|
+
|
|
312
|
+
@staticmethod
|
|
313
|
+
def _parse_decay(col):
|
|
314
|
+
value = str(col).upper().replace("DECAY", "").replace("_", ".")
|
|
315
|
+
return float(value) / 100
|
|
316
|
+
|
|
317
|
+
@staticmethod
|
|
318
|
+
def _decay_label(decay):
|
|
319
|
+
pct = decay * 100
|
|
320
|
+
if float(pct).is_integer():
|
|
321
|
+
return int(pct)
|
|
322
|
+
return pct
|
|
323
|
+
|
|
324
|
+
@classmethod
|
|
325
|
+
def _decay_suffix(cls, decay):
|
|
326
|
+
return f"decay{cls._decay_label(decay)}"
|
|
327
|
+
|
|
328
|
+
@staticmethod
|
|
329
|
+
def _dato99(decay):
|
|
330
|
+
acumulado = 0.0
|
|
331
|
+
dato = 0
|
|
332
|
+
while acumulado < 0.999:
|
|
333
|
+
acumulado += (1 - decay) * (decay ** dato)
|
|
334
|
+
dato += 1
|
|
335
|
+
return dato
|
|
336
|
+
|
|
337
|
+
def _decays(self, decay_cols):
|
|
338
|
+
if self.decays is not None:
|
|
339
|
+
return [float(decay) for decay in self.decays]
|
|
340
|
+
return [self._parse_decay(col) for col in decay_cols]
|
|
341
|
+
|
|
342
|
+
def _runs_usd(self, returns: pd.DataFrame):
|
|
343
|
+
return [
|
|
344
|
+
run for run in returns.columns
|
|
345
|
+
if str(run).upper() != "USDCLP"
|
|
346
|
+
and (self._target_runs is None or str(run) in self._target_runs)
|
|
347
|
+
and str(self._monedas.get(str(run), "")).upper() == "USD"
|
|
348
|
+
]
|
|
349
|
+
|
|
350
|
+
def _retornos_fondos(self, returns: pd.DataFrame):
|
|
351
|
+
cols = [
|
|
352
|
+
col for col in returns.columns
|
|
353
|
+
if str(col).upper() != "USDCLP"
|
|
354
|
+
and (self._target_runs is None or str(col) in self._target_runs)
|
|
355
|
+
]
|
|
356
|
+
return returns[cols]
|
|
357
|
+
|
|
358
|
+
def _formato_resultado(self, values, retorno, metrica, decay, dato99, datos, incluir_valor_null=False):
|
|
359
|
+
year_mask = values.index.year.astype(str) == self.year
|
|
360
|
+
values = values.loc[year_mask]
|
|
361
|
+
datos = datos.loc[year_mask]
|
|
362
|
+
if incluir_valor_null:
|
|
363
|
+
stacked = values.reset_index()
|
|
364
|
+
stacked = stacked.rename(columns={stacked.columns[0]: "FECHA"})
|
|
365
|
+
stacked = stacked.melt(id_vars="FECHA", var_name="RUN", value_name="VALOR")
|
|
366
|
+
else:
|
|
367
|
+
stacked = values.stack().dropna().rename("VALOR").reset_index()
|
|
368
|
+
if stacked.empty:
|
|
369
|
+
return pd.DataFrame(columns=["FECHA", "RUN", "MONEDA", "METRICA", "DECAY", "DATO99", "RETORNO", "DATOS", "VALOR"])
|
|
370
|
+
|
|
371
|
+
stacked.columns = ["FECHA", "RUN", "VALOR"]
|
|
372
|
+
datos_stacked = datos.stack().rename("DATOS").reset_index()
|
|
373
|
+
datos_stacked.columns = ["FECHA", "RUN", "DATOS"]
|
|
374
|
+
stacked = stacked.merge(datos_stacked, on=["FECHA", "RUN"], how="left")
|
|
375
|
+
stacked = stacked[stacked["DATOS"] > 0].copy()
|
|
376
|
+
if stacked.empty:
|
|
377
|
+
return pd.DataFrame(columns=["FECHA", "RUN", "MONEDA", "METRICA", "DECAY", "DATO99", "RETORNO", "DATOS", "VALOR"])
|
|
378
|
+
|
|
379
|
+
stacked["RUN_KEY"] = stacked["RUN"].astype(str)
|
|
380
|
+
stacked["MONEDA"] = stacked["RUN_KEY"].map(self._monedas).fillna(self.moneda_default)
|
|
381
|
+
stacked["METRICA"] = metrica
|
|
382
|
+
stacked["DECAY"] = self._decay_label(decay) if decay is not None else pd.NA
|
|
383
|
+
stacked["DATO99"] = dato99 if dato99 is not None else pd.NA
|
|
384
|
+
stacked["RETORNO"] = retorno.upper()
|
|
385
|
+
stacked["DATOS"] = stacked["DATOS"].astype("Int64")
|
|
386
|
+
return stacked[["FECHA", "RUN", "MONEDA", "METRICA", "DECAY", "DATO99", "RETORNO", "DATOS", "VALOR"]]
|
|
387
|
+
|
|
388
|
+
@staticmethod
|
|
389
|
+
def _varianza_decay_acumulada(values: pd.DataFrame, decay: float):
|
|
390
|
+
arr = values.to_numpy(dtype=float, copy=True)
|
|
391
|
+
valid = ~np.isnan(arr)
|
|
392
|
+
squared = np.where(valid, arr * arr, 0.0)
|
|
393
|
+
out = np.empty_like(squared, dtype=float)
|
|
394
|
+
state = np.zeros(squared.shape[1], dtype=float)
|
|
395
|
+
|
|
396
|
+
for row in range(squared.shape[0]):
|
|
397
|
+
state *= decay
|
|
398
|
+
state += (1 - decay) * squared[row]
|
|
399
|
+
out[row] = state
|
|
400
|
+
|
|
401
|
+
out[valid.cumsum(axis=0) == 0] = np.nan
|
|
402
|
+
return pd.DataFrame(out, index=values.index, columns=values.columns)
|
|
403
|
+
|
|
404
|
+
@staticmethod
|
|
405
|
+
def _media_decay_acumulada(values: pd.DataFrame, decay: float):
|
|
406
|
+
arr = values.to_numpy(dtype=float, copy=True)
|
|
407
|
+
valid = ~np.isnan(arr)
|
|
408
|
+
arr = np.where(valid, arr, 0.0)
|
|
409
|
+
out = np.empty_like(arr, dtype=float)
|
|
410
|
+
state = np.zeros(arr.shape[1], dtype=float)
|
|
411
|
+
|
|
412
|
+
for row in range(arr.shape[0]):
|
|
413
|
+
state *= decay
|
|
414
|
+
state += (1 - decay) * arr[row]
|
|
415
|
+
out[row] = state
|
|
416
|
+
|
|
417
|
+
out[valid.cumsum(axis=0) == 0] = np.nan
|
|
418
|
+
return pd.DataFrame(out, index=values.index, columns=values.columns)
|
|
419
|
+
|
|
420
|
+
@staticmethod
|
|
421
|
+
def _covarianza_acumulada(values: pd.DataFrame, usd: pd.Series):
|
|
422
|
+
valid = values.notna() & usd.notna().to_numpy()[:, None]
|
|
423
|
+
x = values.where(valid, 0.0)
|
|
424
|
+
y = pd.DataFrame(
|
|
425
|
+
np.where(valid, usd.to_numpy()[:, None], 0.0),
|
|
426
|
+
index=values.index,
|
|
427
|
+
columns=values.columns
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
datos = valid.cumsum()
|
|
431
|
+
sum_x = x.cumsum()
|
|
432
|
+
sum_y = y.cumsum()
|
|
433
|
+
sum_xy = (x * y).cumsum()
|
|
434
|
+
cov = (sum_xy - (sum_x * sum_y / datos)) / (datos - 1)
|
|
435
|
+
cov = cov.mask(datos < 2)
|
|
436
|
+
return cov, datos
|
|
437
|
+
|
|
438
|
+
@staticmethod
|
|
439
|
+
def _covarianza_decay_acumulada(values: pd.DataFrame, usd: pd.Series, decay: float):
|
|
440
|
+
arr = values.to_numpy(dtype=float, copy=True)
|
|
441
|
+
usd_arr = usd.to_numpy(dtype=float, copy=True)[:, None]
|
|
442
|
+
valid = ~np.isnan(arr) & ~np.isnan(usd_arr)
|
|
443
|
+
product = np.where(valid, arr * usd_arr, 0.0)
|
|
444
|
+
out = np.empty_like(product, dtype=float)
|
|
445
|
+
state = np.zeros(product.shape[1], dtype=float)
|
|
446
|
+
|
|
447
|
+
for row in range(product.shape[0]):
|
|
448
|
+
state *= decay
|
|
449
|
+
state += (1 - decay) * product[row]
|
|
450
|
+
out[row] = state
|
|
451
|
+
|
|
452
|
+
datos = pd.DataFrame(valid, index=values.index, columns=values.columns).cumsum()
|
|
453
|
+
out[datos.to_numpy() == 0] = np.nan
|
|
454
|
+
cov = pd.DataFrame(out, index=values.index, columns=values.columns)
|
|
455
|
+
return cov, datos
|
|
456
|
+
|
|
457
|
+
def _varianza(self, returns, retorno):
|
|
458
|
+
datos = returns.notna().cumsum()
|
|
459
|
+
varianza = returns.expanding(min_periods=2).var(ddof=1)
|
|
460
|
+
return self._formato_resultado(
|
|
461
|
+
values=varianza,
|
|
462
|
+
retorno=retorno,
|
|
463
|
+
metrica="varianza",
|
|
464
|
+
decay=None,
|
|
465
|
+
dato99=None,
|
|
466
|
+
datos=datos
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
def _varianza_decay(self, returns, retorno, decay):
|
|
470
|
+
datos = returns.notna().cumsum()
|
|
471
|
+
dato99 = self._dato99(decay)
|
|
472
|
+
varianza = self._varianza_decay_acumulada(returns, decay)
|
|
473
|
+
varianza = varianza.mask(datos < dato99)
|
|
474
|
+
return self._formato_resultado(
|
|
475
|
+
values=varianza,
|
|
476
|
+
retorno=retorno,
|
|
477
|
+
metrica="varianza",
|
|
478
|
+
decay=decay,
|
|
479
|
+
dato99=dato99,
|
|
480
|
+
datos=datos,
|
|
481
|
+
incluir_valor_null=True
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
def _volatilidad(self, returns, retorno):
|
|
485
|
+
datos = returns.notna().cumsum()
|
|
486
|
+
sigma = returns.expanding(min_periods=2).std(ddof=1)
|
|
487
|
+
return self._formato_resultado(
|
|
488
|
+
values=sigma,
|
|
489
|
+
retorno=retorno,
|
|
490
|
+
metrica="sigma",
|
|
491
|
+
decay=None,
|
|
492
|
+
dato99=None,
|
|
493
|
+
datos=datos
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
def _volatilidad_decay(self, returns, retorno, decay):
|
|
497
|
+
datos = returns.notna().cumsum()
|
|
498
|
+
dato99 = self._dato99(decay)
|
|
499
|
+
varianza = self._varianza_decay_acumulada(returns, decay)
|
|
500
|
+
sigma = np.sqrt(varianza)
|
|
501
|
+
sigma = sigma.mask(datos < dato99)
|
|
502
|
+
return self._formato_resultado(
|
|
503
|
+
values=sigma,
|
|
504
|
+
retorno=retorno,
|
|
505
|
+
metrica="sigma",
|
|
506
|
+
decay=decay,
|
|
507
|
+
dato99=dato99,
|
|
508
|
+
datos=datos,
|
|
509
|
+
incluir_valor_null=True
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
def _metricas_var(self, sigma, media, retorno, decay, dato99, datos, incluir_valor_null, sufijo=None):
|
|
513
|
+
chunks = []
|
|
514
|
+
for confidence in [0.95, 0.99]:
|
|
515
|
+
confidence_label = int(confidence * 100)
|
|
516
|
+
z_score = norm.ppf(confidence)
|
|
517
|
+
for usar_media in [True, False]:
|
|
518
|
+
mu = media if usar_media else 0.0
|
|
519
|
+
values = z_score * sigma - mu
|
|
520
|
+
name_parts = [f"var{confidence_label}"]
|
|
521
|
+
if not usar_media:
|
|
522
|
+
name_parts.append("mu0")
|
|
523
|
+
if decay is not None:
|
|
524
|
+
name_parts.append(self._decay_suffix(decay))
|
|
525
|
+
if sufijo is not None:
|
|
526
|
+
name_parts.append(sufijo)
|
|
527
|
+
|
|
528
|
+
chunk = self._formato_resultado(
|
|
529
|
+
values=values,
|
|
530
|
+
retorno=retorno,
|
|
531
|
+
metrica="_".join(name_parts),
|
|
532
|
+
decay=decay,
|
|
533
|
+
dato99=dato99,
|
|
534
|
+
datos=datos,
|
|
535
|
+
incluir_valor_null=incluir_valor_null
|
|
536
|
+
)
|
|
537
|
+
if not chunk.empty:
|
|
538
|
+
chunks.append(chunk)
|
|
539
|
+
|
|
540
|
+
if not chunks:
|
|
541
|
+
return pd.DataFrame()
|
|
542
|
+
return pd.concat(chunks, ignore_index=True)
|
|
543
|
+
|
|
544
|
+
def _value_at_risk(self, returns, retorno):
|
|
545
|
+
datos = returns.notna().cumsum()
|
|
546
|
+
sigma = returns.expanding(min_periods=2).std(ddof=1)
|
|
547
|
+
media = returns.expanding(min_periods=2).mean()
|
|
548
|
+
return self._metricas_var(
|
|
549
|
+
sigma=sigma,
|
|
550
|
+
media=media,
|
|
551
|
+
retorno=retorno,
|
|
552
|
+
decay=None,
|
|
553
|
+
dato99=None,
|
|
554
|
+
datos=datos,
|
|
555
|
+
incluir_valor_null=False
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
def _value_at_risk_decay(self, returns, retorno, decay):
|
|
559
|
+
datos = returns.notna().cumsum()
|
|
560
|
+
dato99 = self._dato99(decay)
|
|
561
|
+
varianza = self._varianza_decay_acumulada(returns, decay)
|
|
562
|
+
sigma = np.sqrt(varianza)
|
|
563
|
+
media = self._media_decay_acumulada(returns, decay)
|
|
564
|
+
sigma = sigma.mask(datos < dato99)
|
|
565
|
+
media = media.mask(datos < dato99)
|
|
566
|
+
return self._metricas_var(
|
|
567
|
+
sigma=sigma,
|
|
568
|
+
media=media,
|
|
569
|
+
retorno=retorno,
|
|
570
|
+
decay=decay,
|
|
571
|
+
dato99=dato99,
|
|
572
|
+
datos=datos,
|
|
573
|
+
incluir_valor_null=True
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
def _covarianza_usd(self, returns, retorno):
|
|
577
|
+
if "USDCLP" not in returns.columns:
|
|
578
|
+
return pd.DataFrame()
|
|
579
|
+
|
|
580
|
+
usd_runs = self._runs_usd(returns)
|
|
581
|
+
if not usd_runs:
|
|
582
|
+
return pd.DataFrame()
|
|
583
|
+
|
|
584
|
+
values, datos = self._covarianza_acumulada(returns[usd_runs], returns["USDCLP"])
|
|
585
|
+
return self._formato_resultado(
|
|
586
|
+
values=values,
|
|
587
|
+
retorno=retorno,
|
|
588
|
+
metrica="covarianza_usd",
|
|
589
|
+
decay=None,
|
|
590
|
+
dato99=None,
|
|
591
|
+
datos=datos
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
def _covarianza_usd_decay(self, returns, retorno, decay):
|
|
595
|
+
if "USDCLP" not in returns.columns:
|
|
596
|
+
return pd.DataFrame()
|
|
597
|
+
|
|
598
|
+
usd_runs = self._runs_usd(returns)
|
|
599
|
+
if not usd_runs:
|
|
600
|
+
return pd.DataFrame()
|
|
601
|
+
|
|
602
|
+
dato99 = self._dato99(decay)
|
|
603
|
+
values, datos = self._covarianza_decay_acumulada(returns[usd_runs], returns["USDCLP"], decay)
|
|
604
|
+
values = values.mask(datos < dato99)
|
|
605
|
+
return self._formato_resultado(
|
|
606
|
+
values=values,
|
|
607
|
+
retorno=retorno,
|
|
608
|
+
metrica="covarianza_usd",
|
|
609
|
+
decay=decay,
|
|
610
|
+
dato99=dato99,
|
|
611
|
+
datos=datos,
|
|
612
|
+
incluir_valor_null=True
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
@staticmethod
|
|
616
|
+
def _expandir_serie(serie: pd.Series, columnas: pd.Index):
|
|
617
|
+
return pd.DataFrame(
|
|
618
|
+
np.repeat(serie.to_numpy()[:, None], len(columnas), axis=1),
|
|
619
|
+
index=serie.index,
|
|
620
|
+
columns=columnas
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
def _metricas_clp(self, returns, retorno):
|
|
624
|
+
if "USDCLP" not in returns.columns:
|
|
625
|
+
return pd.DataFrame()
|
|
626
|
+
|
|
627
|
+
usd_runs = self._runs_usd(returns)
|
|
628
|
+
if not usd_runs:
|
|
629
|
+
return pd.DataFrame()
|
|
630
|
+
|
|
631
|
+
fund_returns = returns[usd_runs]
|
|
632
|
+
usd_return = returns["USDCLP"]
|
|
633
|
+
covarianza, datos = self._covarianza_acumulada(fund_returns, usd_return)
|
|
634
|
+
varianza_run = fund_returns.expanding(min_periods=2).var(ddof=1)
|
|
635
|
+
varianza_usd = self._expandir_serie(usd_return.expanding(min_periods=2).var(ddof=1), fund_returns.columns)
|
|
636
|
+
varianza_clp = varianza_run + varianza_usd + 2 * covarianza
|
|
637
|
+
sigma_clp = np.sqrt(varianza_clp)
|
|
638
|
+
media_run = fund_returns.expanding(min_periods=2).mean()
|
|
639
|
+
media_usd = self._expandir_serie(usd_return.expanding(min_periods=2).mean(), fund_returns.columns)
|
|
640
|
+
media_clp = (1 + media_run) * (1 + media_usd) - 1
|
|
641
|
+
|
|
642
|
+
chunks = []
|
|
643
|
+
for values, metrica in [
|
|
644
|
+
(varianza_clp, "varianza_clp"),
|
|
645
|
+
(sigma_clp, "sigma_clp"),
|
|
646
|
+
]:
|
|
647
|
+
chunk = self._formato_resultado(
|
|
648
|
+
values=values,
|
|
649
|
+
retorno=retorno,
|
|
650
|
+
metrica=metrica,
|
|
651
|
+
decay=None,
|
|
652
|
+
dato99=None,
|
|
653
|
+
datos=datos
|
|
654
|
+
)
|
|
655
|
+
if not chunk.empty:
|
|
656
|
+
chunks.append(chunk)
|
|
657
|
+
|
|
658
|
+
chunk = self._metricas_var(
|
|
659
|
+
sigma=sigma_clp,
|
|
660
|
+
media=media_clp,
|
|
661
|
+
retorno=retorno,
|
|
662
|
+
decay=None,
|
|
663
|
+
dato99=None,
|
|
664
|
+
datos=datos,
|
|
665
|
+
incluir_valor_null=False,
|
|
666
|
+
sufijo="clp"
|
|
667
|
+
)
|
|
668
|
+
if not chunk.empty:
|
|
669
|
+
chunks.append(chunk)
|
|
670
|
+
|
|
671
|
+
if not chunks:
|
|
672
|
+
return pd.DataFrame()
|
|
673
|
+
return pd.concat(chunks, ignore_index=True)
|
|
674
|
+
|
|
675
|
+
def _metricas_clp_decay(self, returns, retorno, decay):
|
|
676
|
+
if "USDCLP" not in returns.columns:
|
|
677
|
+
return pd.DataFrame()
|
|
678
|
+
|
|
679
|
+
usd_runs = self._runs_usd(returns)
|
|
680
|
+
if not usd_runs:
|
|
681
|
+
return pd.DataFrame()
|
|
682
|
+
|
|
683
|
+
dato99 = self._dato99(decay)
|
|
684
|
+
fund_returns = returns[usd_runs]
|
|
685
|
+
usd_return = returns["USDCLP"]
|
|
686
|
+
covarianza, datos = self._covarianza_decay_acumulada(fund_returns, usd_return, decay)
|
|
687
|
+
varianza_run = self._varianza_decay_acumulada(fund_returns, decay)
|
|
688
|
+
varianza_usd = self._expandir_serie(
|
|
689
|
+
self._varianza_decay_acumulada(returns[["USDCLP"]], decay)["USDCLP"],
|
|
690
|
+
fund_returns.columns
|
|
691
|
+
)
|
|
692
|
+
varianza_clp = varianza_run + varianza_usd + 2 * covarianza
|
|
693
|
+
varianza_clp = varianza_clp.mask(datos < dato99)
|
|
694
|
+
sigma_clp = np.sqrt(varianza_clp)
|
|
695
|
+
media_run = self._media_decay_acumulada(fund_returns, decay)
|
|
696
|
+
media_usd = self._expandir_serie(
|
|
697
|
+
self._media_decay_acumulada(returns[["USDCLP"]], decay)["USDCLP"],
|
|
698
|
+
fund_returns.columns
|
|
699
|
+
)
|
|
700
|
+
media_clp = ((1 + media_run) * (1 + media_usd) - 1).mask(datos < dato99)
|
|
701
|
+
|
|
702
|
+
chunks = []
|
|
703
|
+
for values, metrica in [
|
|
704
|
+
(varianza_clp, "varianza_clp"),
|
|
705
|
+
(sigma_clp, "sigma_clp"),
|
|
706
|
+
]:
|
|
707
|
+
chunk = self._formato_resultado(
|
|
708
|
+
values=values,
|
|
709
|
+
retorno=retorno,
|
|
710
|
+
metrica=metrica,
|
|
711
|
+
decay=decay,
|
|
712
|
+
dato99=dato99,
|
|
713
|
+
datos=datos,
|
|
714
|
+
incluir_valor_null=True
|
|
715
|
+
)
|
|
716
|
+
if not chunk.empty:
|
|
717
|
+
chunks.append(chunk)
|
|
718
|
+
|
|
719
|
+
chunk = self._metricas_var(
|
|
720
|
+
sigma=sigma_clp,
|
|
721
|
+
media=media_clp,
|
|
722
|
+
retorno=retorno,
|
|
723
|
+
decay=decay,
|
|
724
|
+
dato99=dato99,
|
|
725
|
+
datos=datos,
|
|
726
|
+
incluir_valor_null=True,
|
|
727
|
+
sufijo="clp"
|
|
728
|
+
)
|
|
729
|
+
if not chunk.empty:
|
|
730
|
+
chunks.append(chunk)
|
|
731
|
+
|
|
732
|
+
if not chunks:
|
|
733
|
+
return pd.DataFrame()
|
|
734
|
+
return pd.concat(chunks, ignore_index=True)
|
|
735
|
+
|
|
736
|
+
def calcular(self):
|
|
737
|
+
chunks = []
|
|
738
|
+
for retorno, df in self.data.items():
|
|
739
|
+
returns, decay_cols = self._normalizar_data(df)
|
|
740
|
+
fund_returns = self._retornos_fondos(returns)
|
|
741
|
+
retorno = str(retorno).upper()
|
|
742
|
+
if returns.empty or fund_returns.empty:
|
|
743
|
+
continue
|
|
744
|
+
|
|
745
|
+
for decay in self._decays(decay_cols):
|
|
746
|
+
for metric in [
|
|
747
|
+
self._varianza_decay,
|
|
748
|
+
self._volatilidad_decay,
|
|
749
|
+
self._value_at_risk_decay,
|
|
750
|
+
]:
|
|
751
|
+
chunk = metric(fund_returns, retorno, decay)
|
|
752
|
+
if not chunk.empty:
|
|
753
|
+
chunks.append(chunk)
|
|
754
|
+
|
|
755
|
+
chunk = self._covarianza_usd_decay(returns, retorno, decay)
|
|
756
|
+
if not chunk.empty:
|
|
757
|
+
chunks.append(chunk)
|
|
758
|
+
|
|
759
|
+
chunk = self._metricas_clp_decay(returns, retorno, decay)
|
|
760
|
+
if not chunk.empty:
|
|
761
|
+
chunks.append(chunk)
|
|
762
|
+
|
|
763
|
+
for metric in [
|
|
764
|
+
self._varianza,
|
|
765
|
+
self._volatilidad,
|
|
766
|
+
self._value_at_risk,
|
|
767
|
+
]:
|
|
768
|
+
chunk = metric(fund_returns, retorno)
|
|
769
|
+
if not chunk.empty:
|
|
770
|
+
chunks.append(chunk)
|
|
771
|
+
|
|
772
|
+
chunk = self._covarianza_usd(returns, retorno)
|
|
773
|
+
if not chunk.empty:
|
|
774
|
+
chunks.append(chunk)
|
|
775
|
+
|
|
776
|
+
chunk = self._metricas_clp(returns, retorno)
|
|
777
|
+
if not chunk.empty:
|
|
778
|
+
chunks.append(chunk)
|
|
779
|
+
|
|
780
|
+
if not chunks:
|
|
781
|
+
return pd.DataFrame(columns=["FECHA", "RUN", "MONEDA", "METRICA", "DECAY", "DATO99", "RETORNO", "DATOS", "VALOR"])
|
|
782
|
+
|
|
783
|
+
result = pd.concat(chunks, ignore_index=True)
|
|
784
|
+
return result.sort_values(["FECHA", "RUN", "RETORNO", "METRICA", "DECAY"], na_position="last").reset_index(drop=True)
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
@dataclass
|
|
788
|
+
class MetricasExpost_Relativas:
|
|
789
|
+
year : str
|
|
790
|
+
data : Dict[str, pd.DataFrame] = field(default_factory=dict)
|
|
791
|
+
competidores : pd.DataFrame = field(default_factory=pd.DataFrame)
|
|
792
|
+
fondos_vigentes : pd.DataFrame = field(default_factory=pd.DataFrame)
|
|
793
|
+
target : Optional[str] = None
|
|
794
|
+
decays : Optional[List[float]] = None
|
|
795
|
+
moneda_default : str = "CLP"
|
|
796
|
+
result : pd.DataFrame = field(default_factory=pd.DataFrame)
|
|
797
|
+
|
|
798
|
+
def __post_init__(self):
|
|
799
|
+
self.year = str(self.year)
|
|
800
|
+
self._monedas = self._mapa_monedas()
|
|
801
|
+
self.result = self.calcular()
|
|
802
|
+
|
|
803
|
+
def _mapa_monedas(self):
|
|
804
|
+
if self.fondos_vigentes.empty or "RUN" not in self.fondos_vigentes.columns:
|
|
805
|
+
return {}
|
|
806
|
+
|
|
807
|
+
moneda_col = next(
|
|
808
|
+
(col for col in ["MONEDA", "MONEDA_SERIE", "MONEDA_CUOTA"] if col in self.fondos_vigentes.columns),
|
|
809
|
+
None
|
|
810
|
+
)
|
|
811
|
+
if moneda_col is None:
|
|
812
|
+
return {}
|
|
813
|
+
|
|
814
|
+
monedas = self.fondos_vigentes[["RUN", moneda_col]].dropna().copy()
|
|
815
|
+
monedas["RUN"] = monedas["RUN"].astype(str)
|
|
816
|
+
return monedas.drop_duplicates("RUN").set_index("RUN")[moneda_col].to_dict()
|
|
817
|
+
|
|
818
|
+
def _columna_target(self):
|
|
819
|
+
if self.competidores.empty:
|
|
820
|
+
return None
|
|
821
|
+
if "RUN_TARGET" in self.competidores.columns:
|
|
822
|
+
return "RUN_TARGET"
|
|
823
|
+
if self.target is not None and f"RUN_{self.target}" in self.competidores.columns:
|
|
824
|
+
return f"RUN_{self.target}"
|
|
825
|
+
|
|
826
|
+
run_cols = [col for col in self.competidores.columns if str(col).upper().startswith("RUN_")]
|
|
827
|
+
if run_cols:
|
|
828
|
+
return run_cols[0]
|
|
829
|
+
return None
|
|
830
|
+
|
|
831
|
+
def _columna_competidor(self):
|
|
832
|
+
for col in ["COMPETIDOR", "COMPETIDORES", "CPMETIDORES", "RUN_COMPETIDOR"]:
|
|
833
|
+
if col in self.competidores.columns:
|
|
834
|
+
return col
|
|
835
|
+
return None
|
|
836
|
+
|
|
837
|
+
def _grupos_competidores(self):
|
|
838
|
+
target_col = self._columna_target()
|
|
839
|
+
competitor_col = self._columna_competidor()
|
|
840
|
+
if target_col is None or competitor_col is None:
|
|
841
|
+
return {}
|
|
842
|
+
|
|
843
|
+
comp = self.competidores[[target_col, competitor_col]].dropna().copy()
|
|
844
|
+
comp[target_col] = comp[target_col].astype(str)
|
|
845
|
+
comp[competitor_col] = comp[competitor_col].astype(str)
|
|
846
|
+
|
|
847
|
+
grupos = {}
|
|
848
|
+
for run_target, group in comp.groupby(target_col):
|
|
849
|
+
competitors = group[competitor_col].drop_duplicates().tolist()
|
|
850
|
+
universo = [str(run_target)] + [run for run in competitors if run != str(run_target)]
|
|
851
|
+
grupos[str(run_target)] = universo
|
|
852
|
+
return grupos
|
|
853
|
+
|
|
854
|
+
@staticmethod
|
|
855
|
+
def _es_columna_decay(col):
|
|
856
|
+
return str(col).upper().startswith("DECAY")
|
|
857
|
+
|
|
858
|
+
@staticmethod
|
|
859
|
+
def _parse_decay(col):
|
|
860
|
+
value = str(col).upper().replace("DECAY", "").replace("_", ".")
|
|
861
|
+
return float(value) / 100
|
|
862
|
+
|
|
863
|
+
@staticmethod
|
|
864
|
+
def _decay_label(decay):
|
|
865
|
+
pct = decay * 100
|
|
866
|
+
if float(pct).is_integer():
|
|
867
|
+
return int(pct)
|
|
868
|
+
return pct
|
|
869
|
+
|
|
870
|
+
@staticmethod
|
|
871
|
+
def _dato99(decay):
|
|
872
|
+
acumulado = 0.0
|
|
873
|
+
dato = 0
|
|
874
|
+
while acumulado < 0.999:
|
|
875
|
+
acumulado += (1 - decay) * (decay ** dato)
|
|
876
|
+
dato += 1
|
|
877
|
+
return dato
|
|
878
|
+
|
|
879
|
+
def _decays(self, decay_cols):
|
|
880
|
+
if self.decays is not None:
|
|
881
|
+
return [float(decay) for decay in self.decays]
|
|
882
|
+
return [self._parse_decay(col) for col in decay_cols]
|
|
883
|
+
|
|
884
|
+
def _normalizar_data(self, df: pd.DataFrame):
|
|
885
|
+
df = df.copy()
|
|
886
|
+
if "FECHA" in df.columns:
|
|
887
|
+
df["FECHA"] = pd.to_datetime(df["FECHA"])
|
|
888
|
+
df = df.set_index("FECHA")
|
|
889
|
+
else:
|
|
890
|
+
df.index = pd.to_datetime(df.index)
|
|
891
|
+
|
|
892
|
+
df = df.sort_index()
|
|
893
|
+
decay_cols = [col for col in df.columns if self._es_columna_decay(col)]
|
|
894
|
+
ret_cols = [col for col in df.columns if col not in decay_cols]
|
|
895
|
+
returns = df[ret_cols].apply(pd.to_numeric, errors="coerce")
|
|
896
|
+
returns = returns[[col for col in returns.columns if str(col).upper() != "USDCLP"]]
|
|
897
|
+
returns.columns = returns.columns.astype(str)
|
|
898
|
+
return returns, decay_cols
|
|
899
|
+
|
|
900
|
+
@staticmethod
|
|
901
|
+
def _tracking_error_decay(active_return: pd.Series, decay: float):
|
|
902
|
+
values = active_return.to_numpy(dtype=float, copy=True)
|
|
903
|
+
valid = ~np.isnan(values)
|
|
904
|
+
squared = np.where(valid, values * values, 0.0)
|
|
905
|
+
out = np.empty(len(values), dtype=float)
|
|
906
|
+
state = 0.0
|
|
907
|
+
|
|
908
|
+
for i, value in enumerate(squared):
|
|
909
|
+
state *= decay
|
|
910
|
+
state += (1 - decay) * value
|
|
911
|
+
out[i] = state
|
|
912
|
+
|
|
913
|
+
out[np.cumsum(valid) == 0] = np.nan
|
|
914
|
+
return pd.Series(np.sqrt(out), index=active_return.index)
|
|
915
|
+
|
|
916
|
+
def _formato_tracking(self, values: pd.DataFrame, datos: pd.DataFrame, retorno, decay, dato99, incluir_valor_null):
|
|
917
|
+
year_mask = values.index.year.astype(str) == self.year
|
|
918
|
+
values = values.loc[year_mask]
|
|
919
|
+
datos = datos.loc[year_mask]
|
|
920
|
+
|
|
921
|
+
if incluir_valor_null:
|
|
922
|
+
stacked = values.reset_index()
|
|
923
|
+
stacked = stacked.rename(columns={stacked.columns[0]: "FECHA"})
|
|
924
|
+
stacked = stacked.melt(id_vars="FECHA", var_name="RUN_KEY", value_name="VALOR")
|
|
925
|
+
else:
|
|
926
|
+
stacked = values.stack().dropna().rename("VALOR").reset_index()
|
|
927
|
+
stacked.columns = ["FECHA", "RUN_KEY", "VALOR"]
|
|
928
|
+
|
|
929
|
+
if stacked.empty:
|
|
930
|
+
return self._empty_result()
|
|
931
|
+
|
|
932
|
+
datos_stacked = datos.stack().rename("DATOS").reset_index()
|
|
933
|
+
datos_stacked.columns = ["FECHA", "RUN_KEY", "DATOS"]
|
|
934
|
+
stacked = stacked.merge(datos_stacked, on=["FECHA", "RUN_KEY"], how="left")
|
|
935
|
+
stacked = stacked[stacked["DATOS"] > 0].copy()
|
|
936
|
+
if stacked.empty:
|
|
937
|
+
return self._empty_result()
|
|
938
|
+
|
|
939
|
+
keys = stacked["RUN_KEY"].str.split("|", n=1, expand=True)
|
|
940
|
+
stacked["RUN"] = keys[1]
|
|
941
|
+
stacked["MONEDA"] = stacked["RUN"].map(self._monedas).fillna(self.moneda_default)
|
|
942
|
+
stacked["METRICA"] = "tracking_error"
|
|
943
|
+
stacked["DECAY"] = self._decay_label(decay) if decay is not None else pd.NA
|
|
944
|
+
stacked["DATO99"] = dato99 if dato99 is not None else pd.NA
|
|
945
|
+
stacked["RETORNO"] = str(retorno).upper()
|
|
946
|
+
stacked["DATOS"] = stacked["DATOS"].astype("Int64")
|
|
947
|
+
return stacked[[
|
|
948
|
+
"FECHA", "RUN", "MONEDA", "METRICA", "DECAY",
|
|
949
|
+
"DATO99", "RETORNO", "DATOS", "VALOR"
|
|
950
|
+
]]
|
|
951
|
+
|
|
952
|
+
@staticmethod
|
|
953
|
+
def _empty_result():
|
|
954
|
+
return pd.DataFrame(columns=[
|
|
955
|
+
"FECHA", "RUN", "MONEDA", "METRICA", "DECAY",
|
|
956
|
+
"DATO99", "RETORNO", "DATOS", "VALOR"
|
|
957
|
+
])
|
|
958
|
+
|
|
959
|
+
def _tracking_grupos(self, returns: pd.DataFrame, grupos: Dict[str, List[str]], retorno):
|
|
960
|
+
values = {}
|
|
961
|
+
datos = {}
|
|
962
|
+
for run_target, universo in grupos.items():
|
|
963
|
+
disponibles = [run for run in universo if run in returns.columns]
|
|
964
|
+
if len(disponibles) < 2:
|
|
965
|
+
continue
|
|
966
|
+
|
|
967
|
+
group_returns = returns[disponibles]
|
|
968
|
+
for run in disponibles:
|
|
969
|
+
peers = [peer for peer in disponibles if peer != run]
|
|
970
|
+
benchmark = group_returns[peers].mean(axis=1, skipna=True)
|
|
971
|
+
active_return = group_returns[run] - benchmark
|
|
972
|
+
values[f"{run_target}|{run}"] = active_return.expanding(min_periods=2).std(ddof=1)
|
|
973
|
+
datos[f"{run_target}|{run}"] = group_returns[run].notna().cumsum()
|
|
974
|
+
|
|
975
|
+
if not values:
|
|
976
|
+
return self._empty_result()
|
|
977
|
+
|
|
978
|
+
values = pd.DataFrame(values, index=returns.index)
|
|
979
|
+
datos = pd.DataFrame(datos, index=returns.index)
|
|
980
|
+
return self._formato_tracking(
|
|
981
|
+
values=values,
|
|
982
|
+
datos=datos,
|
|
983
|
+
retorno=retorno,
|
|
984
|
+
decay=None,
|
|
985
|
+
dato99=None,
|
|
986
|
+
incluir_valor_null=False
|
|
987
|
+
)
|
|
988
|
+
|
|
989
|
+
def _tracking_grupos_decay(self, returns: pd.DataFrame, grupos: Dict[str, List[str]], retorno, decay):
|
|
990
|
+
dato99 = self._dato99(decay)
|
|
991
|
+
values = {}
|
|
992
|
+
datos = {}
|
|
993
|
+
for run_target, universo in grupos.items():
|
|
994
|
+
disponibles = [run for run in universo if run in returns.columns]
|
|
995
|
+
if len(disponibles) < 2:
|
|
996
|
+
continue
|
|
997
|
+
|
|
998
|
+
group_returns = returns[disponibles]
|
|
999
|
+
for run in disponibles:
|
|
1000
|
+
peers = [peer for peer in disponibles if peer != run]
|
|
1001
|
+
benchmark = group_returns[peers].mean(axis=1, skipna=True)
|
|
1002
|
+
active_return = group_returns[run] - benchmark
|
|
1003
|
+
count = group_returns[run].notna().cumsum()
|
|
1004
|
+
te = self._tracking_error_decay(active_return, decay).mask(count < dato99)
|
|
1005
|
+
values[f"{run_target}|{run}"] = te
|
|
1006
|
+
datos[f"{run_target}|{run}"] = count
|
|
1007
|
+
|
|
1008
|
+
if not values:
|
|
1009
|
+
return self._empty_result()
|
|
1010
|
+
|
|
1011
|
+
values = pd.DataFrame(values, index=returns.index)
|
|
1012
|
+
datos = pd.DataFrame(datos, index=returns.index)
|
|
1013
|
+
return self._formato_tracking(
|
|
1014
|
+
values=values,
|
|
1015
|
+
datos=datos,
|
|
1016
|
+
retorno=retorno,
|
|
1017
|
+
decay=decay,
|
|
1018
|
+
dato99=dato99,
|
|
1019
|
+
incluir_valor_null=True
|
|
1020
|
+
)
|
|
1021
|
+
|
|
1022
|
+
def calcular(self):
|
|
1023
|
+
grupos = self._grupos_competidores()
|
|
1024
|
+
if not grupos:
|
|
1025
|
+
return self._empty_result()
|
|
1026
|
+
|
|
1027
|
+
chunks = []
|
|
1028
|
+
for retorno, df in self.data.items():
|
|
1029
|
+
returns, decay_cols = self._normalizar_data(df)
|
|
1030
|
+
if returns.empty:
|
|
1031
|
+
continue
|
|
1032
|
+
|
|
1033
|
+
for decay in self._decays(decay_cols):
|
|
1034
|
+
chunk = self._tracking_grupos_decay(returns, grupos, retorno, decay)
|
|
1035
|
+
if not chunk.empty:
|
|
1036
|
+
chunks.append(chunk)
|
|
1037
|
+
|
|
1038
|
+
chunk = self._tracking_grupos(returns, grupos, retorno)
|
|
1039
|
+
if not chunk.empty:
|
|
1040
|
+
chunks.append(chunk)
|
|
1041
|
+
|
|
1042
|
+
if not chunks:
|
|
1043
|
+
return self._empty_result()
|
|
1044
|
+
|
|
1045
|
+
result = pd.concat(chunks, ignore_index=True)
|
|
1046
|
+
return result.sort_values(
|
|
1047
|
+
["FECHA", "RUN", "RETORNO", "METRICA", "DECAY"],
|
|
1048
|
+
na_position="last"
|
|
1049
|
+
).reset_index(drop=True)
|
|
1050
|
+
|
|
1051
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lambda_risk
|
|
3
|
-
Version: 0.0.0.
|
|
3
|
+
Version: 0.0.0.6
|
|
4
4
|
Description-Content-Type: text/markdown
|
|
5
5
|
Requires-Dist: pandas>=2.2.3
|
|
6
6
|
Requires-Dist: requests>=2.32.3
|
|
@@ -10,6 +10,7 @@ Requires-Dist: PyQuantimClient
|
|
|
10
10
|
Requires-Dist: holidays
|
|
11
11
|
Requires-Dist: pyarrow
|
|
12
12
|
Requires-Dist: fastparquet
|
|
13
|
+
Requires-Dist: bcchapi
|
|
13
14
|
Dynamic: description
|
|
14
15
|
Dynamic: description-content-type
|
|
15
16
|
Dynamic: requires-dist
|
|
@@ -6,7 +6,7 @@ with open("README.md", "r") as f:
|
|
|
6
6
|
|
|
7
7
|
setup(
|
|
8
8
|
name='lambda_risk',
|
|
9
|
-
version='0.0.0.
|
|
9
|
+
version='0.0.0.6',
|
|
10
10
|
packages=find_packages(),
|
|
11
11
|
include_package_data=True,
|
|
12
12
|
install_requires=[
|
|
@@ -17,7 +17,8 @@ setup(
|
|
|
17
17
|
'PyQuantimClient',
|
|
18
18
|
'holidays',
|
|
19
19
|
'pyarrow',
|
|
20
|
-
'fastparquet'
|
|
20
|
+
'fastparquet',
|
|
21
|
+
'bcchapi'
|
|
21
22
|
|
|
22
23
|
],
|
|
23
24
|
long_description=description,
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
import numpy as np
|
|
3
|
-
from scipy.stats import norm
|
|
4
|
-
from datetime import datetime, timedelta
|
|
5
|
-
from dataclasses import dataclass, field
|
|
6
|
-
from typing import List, Dict, Optional
|
|
7
|
-
from .client import Datamart, FrameworkRiesgo
|
|
8
|
-
|
|
9
|
-
@dataclass
|
|
10
|
-
class ExpostRisk:
|
|
11
|
-
|
|
12
|
-
def ewma_varianza(self,returns:pd.DataFrame,factor:float) -> float:
|
|
13
|
-
pass
|
|
14
|
-
|
|
15
|
-
def decay_varianza(self,returns:pd.DataFrame,factor:float) -> float:
|
|
16
|
-
pass
|
|
17
|
-
|
|
18
|
-
def volatilidad(self,returns:pd.DataFrame,ewma:bool=False, decay:bool=False) -> float:
|
|
19
|
-
pass
|
|
20
|
-
|
|
21
|
-
def value_at_risk(self, confidence_level:float, sigma:float, mu:float=0.0) -> float:
|
|
22
|
-
pass
|
|
23
|
-
|
|
24
|
-
def tracking_error(self, ret_target:pd.DataFrame, ret_bmk:pd.DataFrame, ewma:bool=False, decay:bool=False) -> float:
|
|
25
|
-
pass
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@dataclass
|
|
29
|
-
class Retornos:
|
|
30
|
-
data : pd.DataFrame
|
|
31
|
-
agf_target : str
|
|
32
|
-
year : str
|
|
33
|
-
bmk_categ : str
|
|
34
|
-
dtd : bool = False
|
|
35
|
-
wtw : bool = False
|
|
36
|
-
rolling_window : Optional[dict] = None
|
|
37
|
-
drop_weekends : bool = True
|
|
38
|
-
len_data : str = '1Y'
|
|
39
|
-
result : Dict[str, pd.DataFrame] = None
|
|
40
|
-
|
|
41
|
-
def runs_target(self):
|
|
42
|
-
...
|
|
43
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|