dquant 1.2.1__tar.gz → 1.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dquant
3
- Version: 1.2.1
3
+ Version: 1.2.3
4
4
  Summary: DQuant is an open-source Python library for automated volatility forecasting of financial time series. It handles all stages of model construction, from raw prices to the final forecast.
5
5
  Author: Denis Makarov
6
6
  Project-URL: Homepage, https://dquant.space
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dquant"
7
- version = "1.2.1"
7
+ version = "1.2.3"
8
8
  authors = [
9
9
  { name="Denis Makarov" },
10
10
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dquant
3
- Version: 1.2.1
3
+ Version: 1.2.3
4
4
  Summary: DQuant is an open-source Python library for automated volatility forecasting of financial time series. It handles all stages of model construction, from raw prices to the final forecast.
5
5
  Author: Denis Makarov
6
6
  Project-URL: Homepage, https://dquant.space
@@ -0,0 +1,8 @@
1
+ import numpy as np
2
+
3
+
4
+ def qlike_score(y_true, y_pred):
5
+ sigma2_true = y_true
6
+ sigma2_pred = np.maximum(y_pred, 1e-10)
7
+ return np.mean(np.log(sigma2_pred) + sigma2_true / sigma2_pred)
8
+
@@ -12,7 +12,7 @@ import numpy as np
12
12
  import xgboost
13
13
  from sklearn.ensemble import GradientBoostingRegressor
14
14
  from sklearn.model_selection import train_test_split
15
- from sklearn.metrics import mean_squared_error, r2_score
15
+ from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
16
16
  from .metrics import qlike_score
17
17
  from sklearn.preprocessing import StandardScaler
18
18
  from typing import Tuple
@@ -72,9 +72,9 @@ class FichEn:
72
72
  raw_windows_X = []
73
73
  raw_windows_y = []
74
74
 
75
- for i in range(window_in + 1, len(data) - window_out + 1):
75
+ for i in range(window_in + 1, len(data) - (window_out + 2)):
76
76
  x_window = data.iloc[i - window_in: i]
77
- y_window = data.iloc[i - 1: i + window_out]
77
+ y_window = data.iloc[i: i + window_out+1]
78
78
 
79
79
  raw_windows_X.append(x_window)
80
80
  raw_windows_y.append(y_window)
@@ -563,12 +563,15 @@ class FichEn:
563
563
 
564
564
  self.train_errors = []
565
565
  self.val_errors = []
566
+ self.train_mae = []
567
+ self.val_mae = []
566
568
  self.train_qlike = []
567
569
  self.val_qlike = []
568
570
  self.train_r2 = []
569
571
  self.val_r2 = []
570
572
 
571
573
  self.best_val_error = float('inf')
574
+ self.best_val_mae = float('inf')
572
575
  self.best_val_qlike = float('inf')
573
576
  self.best_r2 = -float('inf')
574
577
  self.patience_counter = 0
@@ -581,6 +584,8 @@ class FichEn:
581
584
  self.dquantprint(f'{i} trees')
582
585
  t_error = 0
583
586
  v_error = 0
587
+ t_mae = 0
588
+ v_mae = 0
584
589
  t_qlike = 0
585
590
  v_qlike = 0
586
591
  t_r2 = 0
@@ -600,6 +605,7 @@ class FichEn:
600
605
  valid_mask = ~pd.isna(y_h) if hasattr(y_h, 'isna') else ~np.isnan(y_h)
601
606
  X_h = X_scaled[valid_mask]
602
607
  y_h_clean = y_h[valid_mask]
608
+ y_h_clean_orig = self.scaler_y.inverse_transform(y_h_clean.reshape(-1, 1)).ravel()
603
609
 
604
610
  if i != 1:
605
611
  self.models[h_idx].set_params(n_estimators=i)
@@ -615,24 +621,39 @@ class FichEn:
615
621
  valid_mask = ~pd.isna(y_h_v) if hasattr(y_h_v, 'isna') else ~np.isnan(y_h_v)
616
622
  X_h_v = X_test_scaled[valid_mask]
617
623
  y_h_v_clean = y_h_v[valid_mask]
624
+ y_h_v_clean_orig = self.scaler_y.inverse_transform(y_h_v_clean.reshape(-1, 1)).ravel()
618
625
  if i != 1:
619
- t_error += mean_squared_error(y_h_clean, self.models[h_idx].predict(X_h))
620
- v_error += mean_squared_error(y_h_v_clean, self.models[h_idx].predict(X_h_v))
621
- t_qlike += qlike_score(y_h_clean, self.models[h_idx].predict(X_h))
622
- v_qlike += qlike_score(y_h_v_clean, self.models[h_idx].predict(X_h_v))
623
- t_r2 += r2_score(y_h_clean, self.models[h_idx].predict(X_h))
624
- v_r2 += r2_score(y_h_v_clean, self.models[h_idx].predict(X_h_v))
626
+ pred_train = self.models[h_idx].predict(X_h)
627
+ pred_val = self.models[h_idx].predict(X_h_v)
628
+ pred_train_orig = self.scaler_y.inverse_transform(pred_train.reshape(-1, 1)).ravel()
629
+ pred_val_orig = self.scaler_y.inverse_transform(pred_val.reshape(-1, 1)).ravel()
630
+ t_error += mean_squared_error(y_h_clean, pred_train)
631
+ v_error += mean_squared_error(y_h_v_clean, pred_val)
632
+ t_mae += mean_absolute_error(y_h_clean, pred_train)
633
+ v_mae += mean_absolute_error(y_h_v_clean, pred_val)
634
+ t_qlike += qlike_score(y_h_clean_orig, pred_train_orig)
635
+ v_qlike += qlike_score(y_h_v_clean_orig, pred_val_orig)
636
+ t_r2 += r2_score(y_h_clean, pred_train)
637
+ v_r2 += r2_score(y_h_v_clean, pred_val)
625
638
  else:
626
- t_error += mean_squared_error(y_h_clean, model.predict(X_h))
627
- v_error += mean_squared_error(y_h_v_clean, model.predict(X_h_v))
628
- t_qlike += qlike_score(y_h_clean, model.predict(X_h))
629
- v_qlike += qlike_score(y_h_v_clean, model.predict(X_h_v))
630
- t_r2 += r2_score(y_h_clean, model.predict(X_h))
631
- v_r2 += r2_score(y_h_v_clean, model.predict(X_h_v))
639
+ pred_train = model.predict(X_h)
640
+ pred_val = model.predict(X_h_v)
641
+ pred_train_orig = self.scaler_y.inverse_transform(pred_train.reshape(-1, 1)).ravel()
642
+ pred_val_orig = self.scaler_y.inverse_transform(pred_val.reshape(-1, 1)).ravel()
643
+ t_error += mean_squared_error(y_h_clean, pred_train)
644
+ v_error += mean_squared_error(y_h_v_clean, pred_val)
645
+ t_mae += mean_absolute_error(y_h_clean, pred_train)
646
+ v_mae += mean_absolute_error(y_h_v_clean, pred_val)
647
+ t_qlike += qlike_score(y_h_clean_orig, pred_train_orig)
648
+ v_qlike += qlike_score(y_h_v_clean_orig, pred_val_orig)
649
+ t_r2 += r2_score(y_h_clean, pred_train)
650
+ v_r2 += r2_score(y_h_v_clean, pred_val)
632
651
 
633
652
 
634
653
  var_test_error = float(t_error)/horizon
635
654
  var_val_error = float(v_error)/horizon
655
+ var_test_mae = float(t_mae) / horizon
656
+ var_val_mae = float(v_mae) / horizon
636
657
  var_test_qlike = float(t_qlike) / horizon
637
658
  var_val_qlike = float(v_qlike) / horizon
638
659
  var_test_r2 = float(t_r2)/horizon
@@ -640,10 +661,20 @@ class FichEn:
640
661
 
641
662
  if self.early_stopping:
642
663
  if len(self.val_errors) > 0:
643
- current_min = min(self.val_errors)
644
- best_so_far = min(self.best_val_error, current_min)
645
-
646
- no_improvement_count = len(self.val_errors) - self.val_errors.index(best_so_far) - 1
664
+ if self.loss == "MAE":
665
+ current_min = min(self.val_mae)
666
+ best_so_far = min(self.best_val_mae, current_min)
667
+ no_improvement_count = len(self.val_mae) - self.val_mae.index(best_so_far) - 1
668
+ elif self.loss == "MSE":
669
+ current_min = min(self.val_errors)
670
+ best_so_far = min(self.best_val_error, current_min)
671
+ no_improvement_count = len(self.val_errors) - self.val_errors.index(best_so_far) - 1
672
+ elif self.loss == "QLIKE":
673
+ current_min = min(self.val_qlike)
674
+ best_so_far = min(self.best_val_qlike, current_min)
675
+ no_improvement_count = len(self.val_qlike) - self.val_qlike.index(best_so_far) - 1
676
+ else:
677
+ raise "Unavailable loss function"
647
678
 
648
679
  if no_improvement_count >= self.patience:
649
680
  self.dquantprint(f'Early stopping at {i} trees (no improvement for {self.patience} steps)')
@@ -655,6 +686,8 @@ class FichEn:
655
686
 
656
687
  self.train_errors.append(var_test_error)
657
688
  self.val_errors.append(var_val_error)
689
+ self.train_mae.append(var_test_mae)
690
+ self.val_mae.append(var_val_mae)
658
691
  self.train_qlike.append(var_test_qlike)
659
692
  self.val_qlike.append(var_val_qlike)
660
693
  self.train_r2.append(var_test_r2)
@@ -663,6 +696,8 @@ class FichEn:
663
696
  self.dquantprint('Validation QLIKE: ', var_val_qlike)
664
697
  self.dquantprint('Train MSE: ', var_test_error)
665
698
  self.dquantprint('Validation MSE: ', var_val_error)
699
+ self.dquantprint('Train MAE: ', var_test_mae)
700
+ self.dquantprint('Validation MAE: ', var_val_mae)
666
701
  self.dquantprint('Train r2: ', var_test_r2)
667
702
  self.dquantprint('Validation r2: ', var_val_r2)
668
703
  self.dquantprint(f"{time.time() - start} seconds spent")
@@ -1439,7 +1474,8 @@ class FichEn:
1439
1474
 
1440
1475
 
1441
1476
  class VolClustGB(FichEn):
1442
- def __init__(self, sett, early_stopping=True, output=True):
1477
+ def __init__(self, sett, early_stopping=True, output=True, loss="MAE"):
1478
+ self.loss = loss
1443
1479
  self.output = output
1444
1480
  self.models = []
1445
1481
  self.scaler = StandardScaler()
@@ -1462,7 +1498,8 @@ class VolClustGB(FichEn):
1462
1498
  }
1463
1499
  self.meta = {
1464
1500
  "model_type": "gb",
1465
- "model_settings": self.default_sett
1501
+ "model_settings": self.default_sett,
1502
+ "model_loss": loss
1466
1503
  }
1467
1504
  if sett == {}:
1468
1505
  self.base_model = GradientBoostingRegressor(**self.default_sett)
@@ -1599,7 +1636,8 @@ class VolClustGB(FichEn):
1599
1636
 
1600
1637
 
1601
1638
  class VolClustXGB(FichEn):
1602
- def __init__(self, sett, early_stopping=True, output=True, qlike=True):
1639
+ def __init__(self, sett, early_stopping=True, output=True, loss="QLIKE"):
1640
+ self.loss = loss
1603
1641
  self.output = output
1604
1642
  self.models = []
1605
1643
  self.scaler = StandardScaler()
@@ -1623,15 +1661,18 @@ class VolClustXGB(FichEn):
1623
1661
  'device': 'cpu'
1624
1662
  }
1625
1663
 
1626
- if qlike == False:
1664
+ if loss == "MSE":
1627
1665
  self.default_sett['objective'] = 'reg:squarederror'
1666
+ elif loss == "MAE":
1667
+ self.default_sett['objective'] = 'reg:absoluteerror'
1628
1668
 
1629
1669
  self.meta = {
1630
1670
  "model_type": "xgb",
1631
- "model_settings": self.default_sett
1671
+ "model_settings": self.default_sett,
1672
+ "model_loss": loss
1632
1673
  }
1633
1674
  if sett == {}:
1634
- if qlike:
1675
+ if loss == "QLIKE":
1635
1676
  self.base_model = xgboost.XGBRegressor(**self.default_sett, objective=self.qlike_obj)
1636
1677
  else:
1637
1678
  self.base_model = xgboost.XGBRegressor(**self.default_sett)
@@ -1640,7 +1681,7 @@ class VolClustXGB(FichEn):
1640
1681
  if sett['objective']: del sett['objective']
1641
1682
  except KeyError:
1642
1683
  pass
1643
- if qlike:
1684
+ if loss == "QLIKE":
1644
1685
  self.base_model = xgboost.XGBRegressor(**sett, objective=self.qlike_obj)
1645
1686
  else:
1646
1687
  self.base_model = xgboost.XGBRegressor(**sett)
@@ -1772,7 +1813,8 @@ class VolClustXGB(FichEn):
1772
1813
 
1773
1814
 
1774
1815
  class VolClustLightGBM(FichEn):
1775
- def __init__(self, sett, early_stopping=True, output=True, qlike=True):
1816
+ def __init__(self, sett, early_stopping=True, output=True, loss="QLIKE"):
1817
+ self.loss = loss
1776
1818
  self.output = output
1777
1819
  self.models = []
1778
1820
  self.scaler = StandardScaler()
@@ -1798,15 +1840,18 @@ class VolClustLightGBM(FichEn):
1798
1840
  'boosting_type': 'gbdt'
1799
1841
  }
1800
1842
 
1801
- if qlike == False:
1802
- self.default_sett['objective'] = 'regression'
1843
+ if loss == "MSE":
1844
+ self.default_sett['objective'] = 'mse'
1845
+ elif loss == "MAE":
1846
+ self.default_sett['objective'] = 'mae'
1803
1847
 
1804
1848
  self.meta = {
1805
1849
  "model_type": "lgbm",
1806
- "model_settings": self.default_sett
1850
+ "model_settings": self.default_sett,
1851
+ "models_loss": loss
1807
1852
  }
1808
1853
  if sett == {}:
1809
- if qlike:
1854
+ if loss == "QLIKE":
1810
1855
  self.base_model = lgb.LGBMRegressor(**self.default_sett, objective=self.qlike_obj)
1811
1856
  else:
1812
1857
  self.base_model = lgb.LGBMRegressor(**self.default_sett)
@@ -1815,7 +1860,7 @@ class VolClustLightGBM(FichEn):
1815
1860
  if sett['objective']: del sett['objective']
1816
1861
  except KeyError:
1817
1862
  pass
1818
- if qlike:
1863
+ if loss == "QLIKE":
1819
1864
  self.base_model = lgb.LGBMRegressor(**sett, objective=self.qlike_obj)
1820
1865
  else:
1821
1866
  self.base_model = lgb.LGBMRegressor(**sett)
@@ -1,10 +0,0 @@
1
- import numpy as np
2
-
3
-
4
- def qlike_score(y_true, y_pred):
5
- y_true = np.asarray(y_true, dtype=np.float64)
6
- y_pred = np.asarray(y_pred, dtype=np.float64)
7
- eps = 1e-10
8
- y_pred = np.clip(y_pred, eps, None)
9
- loss = np.log(y_pred) + y_true / y_pred
10
- return np.mean(loss)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes