dquant 1.2.1__tar.gz → 1.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dquant-1.2.1/src/DQuant.egg-info → dquant-1.2.3}/PKG-INFO +1 -1
- {dquant-1.2.1 → dquant-1.2.3}/pyproject.toml +1 -1
- {dquant-1.2.1 → dquant-1.2.3/src/DQuant.egg-info}/PKG-INFO +1 -1
- dquant-1.2.3/src/dquant/metrics.py +8 -0
- {dquant-1.2.1 → dquant-1.2.3}/src/dquant/models.py +77 -32
- dquant-1.2.1/src/dquant/metrics.py +0 -10
- {dquant-1.2.1 → dquant-1.2.3}/LICENSE +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/README.md +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/setup.cfg +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/src/DQuant.egg-info/SOURCES.txt +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/src/DQuant.egg-info/dependency_links.txt +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/src/DQuant.egg-info/requires.txt +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/src/DQuant.egg-info/top_level.txt +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/src/__init__.py +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/src/dquant/__init__.py +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/src/dquant/get_data.py +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/src/dquant/visual.py +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/test/test.py +0 -0
- {dquant-1.2.1 → dquant-1.2.3}/test/test_load.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dquant
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.3
|
|
4
4
|
Summary: DQuant is an open-source Python library for automated volatility forecasting of financial time series. It handles all stages of model construction, from raw prices to the final forecast.
|
|
5
5
|
Author: Denis Makarov
|
|
6
6
|
Project-URL: Homepage, https://dquant.space
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dquant
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.3
|
|
4
4
|
Summary: DQuant is an open-source Python library for automated volatility forecasting of financial time series. It handles all stages of model construction, from raw prices to the final forecast.
|
|
5
5
|
Author: Denis Makarov
|
|
6
6
|
Project-URL: Homepage, https://dquant.space
|
|
@@ -12,7 +12,7 @@ import numpy as np
|
|
|
12
12
|
import xgboost
|
|
13
13
|
from sklearn.ensemble import GradientBoostingRegressor
|
|
14
14
|
from sklearn.model_selection import train_test_split
|
|
15
|
-
from sklearn.metrics import mean_squared_error, r2_score
|
|
15
|
+
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
|
|
16
16
|
from .metrics import qlike_score
|
|
17
17
|
from sklearn.preprocessing import StandardScaler
|
|
18
18
|
from typing import Tuple
|
|
@@ -72,9 +72,9 @@ class FichEn:
|
|
|
72
72
|
raw_windows_X = []
|
|
73
73
|
raw_windows_y = []
|
|
74
74
|
|
|
75
|
-
for i in range(window_in + 1, len(data) - window_out +
|
|
75
|
+
for i in range(window_in + 1, len(data) - (window_out + 2)):
|
|
76
76
|
x_window = data.iloc[i - window_in: i]
|
|
77
|
-
y_window = data.iloc[i
|
|
77
|
+
y_window = data.iloc[i: i + window_out+1]
|
|
78
78
|
|
|
79
79
|
raw_windows_X.append(x_window)
|
|
80
80
|
raw_windows_y.append(y_window)
|
|
@@ -563,12 +563,15 @@ class FichEn:
|
|
|
563
563
|
|
|
564
564
|
self.train_errors = []
|
|
565
565
|
self.val_errors = []
|
|
566
|
+
self.train_mae = []
|
|
567
|
+
self.val_mae = []
|
|
566
568
|
self.train_qlike = []
|
|
567
569
|
self.val_qlike = []
|
|
568
570
|
self.train_r2 = []
|
|
569
571
|
self.val_r2 = []
|
|
570
572
|
|
|
571
573
|
self.best_val_error = float('inf')
|
|
574
|
+
self.best_val_mae = float('inf')
|
|
572
575
|
self.best_val_qlike = float('inf')
|
|
573
576
|
self.best_r2 = -float('inf')
|
|
574
577
|
self.patience_counter = 0
|
|
@@ -581,6 +584,8 @@ class FichEn:
|
|
|
581
584
|
self.dquantprint(f'{i} trees')
|
|
582
585
|
t_error = 0
|
|
583
586
|
v_error = 0
|
|
587
|
+
t_mae = 0
|
|
588
|
+
v_mae = 0
|
|
584
589
|
t_qlike = 0
|
|
585
590
|
v_qlike = 0
|
|
586
591
|
t_r2 = 0
|
|
@@ -600,6 +605,7 @@ class FichEn:
|
|
|
600
605
|
valid_mask = ~pd.isna(y_h) if hasattr(y_h, 'isna') else ~np.isnan(y_h)
|
|
601
606
|
X_h = X_scaled[valid_mask]
|
|
602
607
|
y_h_clean = y_h[valid_mask]
|
|
608
|
+
y_h_clean_orig = self.scaler_y.inverse_transform(y_h_clean.reshape(-1, 1)).ravel()
|
|
603
609
|
|
|
604
610
|
if i != 1:
|
|
605
611
|
self.models[h_idx].set_params(n_estimators=i)
|
|
@@ -615,24 +621,39 @@ class FichEn:
|
|
|
615
621
|
valid_mask = ~pd.isna(y_h_v) if hasattr(y_h_v, 'isna') else ~np.isnan(y_h_v)
|
|
616
622
|
X_h_v = X_test_scaled[valid_mask]
|
|
617
623
|
y_h_v_clean = y_h_v[valid_mask]
|
|
624
|
+
y_h_v_clean_orig = self.scaler_y.inverse_transform(y_h_v_clean.reshape(-1, 1)).ravel()
|
|
618
625
|
if i != 1:
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
626
|
+
pred_train = self.models[h_idx].predict(X_h)
|
|
627
|
+
pred_val = self.models[h_idx].predict(X_h_v)
|
|
628
|
+
pred_train_orig = self.scaler_y.inverse_transform(pred_train.reshape(-1, 1)).ravel()
|
|
629
|
+
pred_val_orig = self.scaler_y.inverse_transform(pred_val.reshape(-1, 1)).ravel()
|
|
630
|
+
t_error += mean_squared_error(y_h_clean, pred_train)
|
|
631
|
+
v_error += mean_squared_error(y_h_v_clean, pred_val)
|
|
632
|
+
t_mae += mean_absolute_error(y_h_clean, pred_train)
|
|
633
|
+
v_mae += mean_absolute_error(y_h_v_clean, pred_val)
|
|
634
|
+
t_qlike += qlike_score(y_h_clean_orig, pred_train_orig)
|
|
635
|
+
v_qlike += qlike_score(y_h_v_clean_orig, pred_val_orig)
|
|
636
|
+
t_r2 += r2_score(y_h_clean, pred_train)
|
|
637
|
+
v_r2 += r2_score(y_h_v_clean, pred_val)
|
|
625
638
|
else:
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
639
|
+
pred_train = model.predict(X_h)
|
|
640
|
+
pred_val = model.predict(X_h_v)
|
|
641
|
+
pred_train_orig = self.scaler_y.inverse_transform(pred_train.reshape(-1, 1)).ravel()
|
|
642
|
+
pred_val_orig = self.scaler_y.inverse_transform(pred_val.reshape(-1, 1)).ravel()
|
|
643
|
+
t_error += mean_squared_error(y_h_clean, pred_train)
|
|
644
|
+
v_error += mean_squared_error(y_h_v_clean, pred_val)
|
|
645
|
+
t_mae += mean_absolute_error(y_h_clean, pred_train)
|
|
646
|
+
v_mae += mean_absolute_error(y_h_v_clean, pred_val)
|
|
647
|
+
t_qlike += qlike_score(y_h_clean_orig, pred_train_orig)
|
|
648
|
+
v_qlike += qlike_score(y_h_v_clean_orig, pred_val_orig)
|
|
649
|
+
t_r2 += r2_score(y_h_clean, pred_train)
|
|
650
|
+
v_r2 += r2_score(y_h_v_clean, pred_val)
|
|
632
651
|
|
|
633
652
|
|
|
634
653
|
var_test_error = float(t_error)/horizon
|
|
635
654
|
var_val_error = float(v_error)/horizon
|
|
655
|
+
var_test_mae = float(t_mae) / horizon
|
|
656
|
+
var_val_mae = float(v_mae) / horizon
|
|
636
657
|
var_test_qlike = float(t_qlike) / horizon
|
|
637
658
|
var_val_qlike = float(v_qlike) / horizon
|
|
638
659
|
var_test_r2 = float(t_r2)/horizon
|
|
@@ -640,10 +661,20 @@ class FichEn:
|
|
|
640
661
|
|
|
641
662
|
if self.early_stopping:
|
|
642
663
|
if len(self.val_errors) > 0:
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
664
|
+
if self.loss == "MAE":
|
|
665
|
+
current_min = min(self.val_mae)
|
|
666
|
+
best_so_far = min(self.best_val_mae, current_min)
|
|
667
|
+
no_improvement_count = len(self.val_mae) - self.val_mae.index(best_so_far) - 1
|
|
668
|
+
elif self.loss == "MSE":
|
|
669
|
+
current_min = min(self.val_errors)
|
|
670
|
+
best_so_far = min(self.best_val_error, current_min)
|
|
671
|
+
no_improvement_count = len(self.val_errors) - self.val_errors.index(best_so_far) - 1
|
|
672
|
+
elif self.loss == "QLIKE":
|
|
673
|
+
current_min = min(self.val_qlike)
|
|
674
|
+
best_so_far = min(self.best_val_qlike, current_min)
|
|
675
|
+
no_improvement_count = len(self.val_qlike) - self.val_qlike.index(best_so_far) - 1
|
|
676
|
+
else:
|
|
677
|
+
raise "Unavailable loss function"
|
|
647
678
|
|
|
648
679
|
if no_improvement_count >= self.patience:
|
|
649
680
|
self.dquantprint(f'Early stopping at {i} trees (no improvement for {self.patience} steps)')
|
|
@@ -655,6 +686,8 @@ class FichEn:
|
|
|
655
686
|
|
|
656
687
|
self.train_errors.append(var_test_error)
|
|
657
688
|
self.val_errors.append(var_val_error)
|
|
689
|
+
self.train_mae.append(var_test_mae)
|
|
690
|
+
self.val_mae.append(var_val_mae)
|
|
658
691
|
self.train_qlike.append(var_test_qlike)
|
|
659
692
|
self.val_qlike.append(var_val_qlike)
|
|
660
693
|
self.train_r2.append(var_test_r2)
|
|
@@ -663,6 +696,8 @@ class FichEn:
|
|
|
663
696
|
self.dquantprint('Validation QLIKE: ', var_val_qlike)
|
|
664
697
|
self.dquantprint('Train MSE: ', var_test_error)
|
|
665
698
|
self.dquantprint('Validation MSE: ', var_val_error)
|
|
699
|
+
self.dquantprint('Train MAE: ', var_test_mae)
|
|
700
|
+
self.dquantprint('Validation MAE: ', var_val_mae)
|
|
666
701
|
self.dquantprint('Train r2: ', var_test_r2)
|
|
667
702
|
self.dquantprint('Validation r2: ', var_val_r2)
|
|
668
703
|
self.dquantprint(f"{time.time() - start} seconds spent")
|
|
@@ -1439,7 +1474,8 @@ class FichEn:
|
|
|
1439
1474
|
|
|
1440
1475
|
|
|
1441
1476
|
class VolClustGB(FichEn):
|
|
1442
|
-
def __init__(self, sett, early_stopping=True, output=True):
|
|
1477
|
+
def __init__(self, sett, early_stopping=True, output=True, loss="MAE"):
|
|
1478
|
+
self.loss = loss
|
|
1443
1479
|
self.output = output
|
|
1444
1480
|
self.models = []
|
|
1445
1481
|
self.scaler = StandardScaler()
|
|
@@ -1462,7 +1498,8 @@ class VolClustGB(FichEn):
|
|
|
1462
1498
|
}
|
|
1463
1499
|
self.meta = {
|
|
1464
1500
|
"model_type": "gb",
|
|
1465
|
-
"model_settings": self.default_sett
|
|
1501
|
+
"model_settings": self.default_sett,
|
|
1502
|
+
"model_loss": loss
|
|
1466
1503
|
}
|
|
1467
1504
|
if sett == {}:
|
|
1468
1505
|
self.base_model = GradientBoostingRegressor(**self.default_sett)
|
|
@@ -1599,7 +1636,8 @@ class VolClustGB(FichEn):
|
|
|
1599
1636
|
|
|
1600
1637
|
|
|
1601
1638
|
class VolClustXGB(FichEn):
|
|
1602
|
-
def __init__(self, sett, early_stopping=True, output=True,
|
|
1639
|
+
def __init__(self, sett, early_stopping=True, output=True, loss="QLIKE"):
|
|
1640
|
+
self.loss = loss
|
|
1603
1641
|
self.output = output
|
|
1604
1642
|
self.models = []
|
|
1605
1643
|
self.scaler = StandardScaler()
|
|
@@ -1623,15 +1661,18 @@ class VolClustXGB(FichEn):
|
|
|
1623
1661
|
'device': 'cpu'
|
|
1624
1662
|
}
|
|
1625
1663
|
|
|
1626
|
-
if
|
|
1664
|
+
if loss == "MSE":
|
|
1627
1665
|
self.default_sett['objective'] = 'reg:squarederror'
|
|
1666
|
+
elif loss == "MAE":
|
|
1667
|
+
self.default_sett['objective'] = 'reg:absoluteerror'
|
|
1628
1668
|
|
|
1629
1669
|
self.meta = {
|
|
1630
1670
|
"model_type": "xgb",
|
|
1631
|
-
"model_settings": self.default_sett
|
|
1671
|
+
"model_settings": self.default_sett,
|
|
1672
|
+
"model_loss": loss
|
|
1632
1673
|
}
|
|
1633
1674
|
if sett == {}:
|
|
1634
|
-
if
|
|
1675
|
+
if loss == "QLIKE":
|
|
1635
1676
|
self.base_model = xgboost.XGBRegressor(**self.default_sett, objective=self.qlike_obj)
|
|
1636
1677
|
else:
|
|
1637
1678
|
self.base_model = xgboost.XGBRegressor(**self.default_sett)
|
|
@@ -1640,7 +1681,7 @@ class VolClustXGB(FichEn):
|
|
|
1640
1681
|
if sett['objective']: del sett['objective']
|
|
1641
1682
|
except KeyError:
|
|
1642
1683
|
pass
|
|
1643
|
-
if
|
|
1684
|
+
if loss == "QLIKE":
|
|
1644
1685
|
self.base_model = xgboost.XGBRegressor(**sett, objective=self.qlike_obj)
|
|
1645
1686
|
else:
|
|
1646
1687
|
self.base_model = xgboost.XGBRegressor(**sett)
|
|
@@ -1772,7 +1813,8 @@ class VolClustXGB(FichEn):
|
|
|
1772
1813
|
|
|
1773
1814
|
|
|
1774
1815
|
class VolClustLightGBM(FichEn):
|
|
1775
|
-
def __init__(self, sett, early_stopping=True, output=True,
|
|
1816
|
+
def __init__(self, sett, early_stopping=True, output=True, loss="QLIKE"):
|
|
1817
|
+
self.loss = loss
|
|
1776
1818
|
self.output = output
|
|
1777
1819
|
self.models = []
|
|
1778
1820
|
self.scaler = StandardScaler()
|
|
@@ -1798,15 +1840,18 @@ class VolClustLightGBM(FichEn):
|
|
|
1798
1840
|
'boosting_type': 'gbdt'
|
|
1799
1841
|
}
|
|
1800
1842
|
|
|
1801
|
-
if
|
|
1802
|
-
self.default_sett['objective'] = '
|
|
1843
|
+
if loss == "MSE":
|
|
1844
|
+
self.default_sett['objective'] = 'mse'
|
|
1845
|
+
elif loss == "MAE":
|
|
1846
|
+
self.default_sett['objective'] = 'mae'
|
|
1803
1847
|
|
|
1804
1848
|
self.meta = {
|
|
1805
1849
|
"model_type": "lgbm",
|
|
1806
|
-
"model_settings": self.default_sett
|
|
1850
|
+
"model_settings": self.default_sett,
|
|
1851
|
+
"models_loss": loss
|
|
1807
1852
|
}
|
|
1808
1853
|
if sett == {}:
|
|
1809
|
-
if
|
|
1854
|
+
if loss == "QLIKE":
|
|
1810
1855
|
self.base_model = lgb.LGBMRegressor(**self.default_sett, objective=self.qlike_obj)
|
|
1811
1856
|
else:
|
|
1812
1857
|
self.base_model = lgb.LGBMRegressor(**self.default_sett)
|
|
@@ -1815,7 +1860,7 @@ class VolClustLightGBM(FichEn):
|
|
|
1815
1860
|
if sett['objective']: del sett['objective']
|
|
1816
1861
|
except KeyError:
|
|
1817
1862
|
pass
|
|
1818
|
-
if
|
|
1863
|
+
if loss == "QLIKE":
|
|
1819
1864
|
self.base_model = lgb.LGBMRegressor(**sett, objective=self.qlike_obj)
|
|
1820
1865
|
else:
|
|
1821
1866
|
self.base_model = lgb.LGBMRegressor(**sett)
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def qlike_score(y_true, y_pred):
|
|
5
|
-
y_true = np.asarray(y_true, dtype=np.float64)
|
|
6
|
-
y_pred = np.asarray(y_pred, dtype=np.float64)
|
|
7
|
-
eps = 1e-10
|
|
8
|
-
y_pred = np.clip(y_pred, eps, None)
|
|
9
|
-
loss = np.log(y_pred) + y_true / y_pred
|
|
10
|
-
return np.mean(loss)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|