dquant 1.2.4__tar.gz → 1.3.0b0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dquant-1.2.4/src/DQuant.egg-info → dquant-1.3.0b0}/PKG-INFO +1 -1
- {dquant-1.2.4 → dquant-1.3.0b0}/pyproject.toml +1 -1
- {dquant-1.2.4 → dquant-1.3.0b0/src/DQuant.egg-info}/PKG-INFO +1 -1
- {dquant-1.2.4 → dquant-1.3.0b0}/src/dquant/models.py +60 -307
- {dquant-1.2.4 → dquant-1.3.0b0}/src/dquant/visual.py +4 -28
- {dquant-1.2.4 → dquant-1.3.0b0}/LICENSE +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/README.md +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/setup.cfg +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/src/DQuant.egg-info/SOURCES.txt +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/src/DQuant.egg-info/dependency_links.txt +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/src/DQuant.egg-info/requires.txt +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/src/DQuant.egg-info/top_level.txt +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/src/__init__.py +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/src/dquant/__init__.py +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/src/dquant/get_data.py +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/src/dquant/metrics.py +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/test/test.py +0 -0
- {dquant-1.2.4 → dquant-1.3.0b0}/test/test_load.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dquant
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0b0
|
|
4
4
|
Summary: DQuant is an open-source Python library for automated volatility forecasting of financial time series. It handles all stages of model construction, from raw prices to the final forecast.
|
|
5
5
|
Author: Denis Makarov
|
|
6
6
|
Project-URL: Homepage, https://dquant.space
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dquant
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0b0
|
|
4
4
|
Summary: DQuant is an open-source Python library for automated volatility forecasting of financial time series. It handles all stages of model construction, from raw prices to the final forecast.
|
|
5
5
|
Author: Denis Makarov
|
|
6
6
|
Project-URL: Homepage, https://dquant.space
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import joblib
|
|
3
2
|
import re
|
|
4
3
|
import onnxruntime as ort
|
|
5
4
|
import os
|
|
@@ -10,14 +9,12 @@ from .visual import Visualization
|
|
|
10
9
|
import time as time
|
|
11
10
|
import numpy as np
|
|
12
11
|
import xgboost
|
|
13
|
-
from sklearn.
|
|
12
|
+
from sklearn.base import clone
|
|
14
13
|
from sklearn.model_selection import train_test_split
|
|
15
14
|
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
|
|
16
15
|
from .metrics import qlike_score
|
|
17
|
-
from sklearn.preprocessing import StandardScaler
|
|
18
16
|
from typing import Tuple
|
|
19
17
|
import pandas as pd
|
|
20
|
-
from skl2onnx import convert_sklearn
|
|
21
18
|
from skl2onnx.common.data_types import FloatTensorType
|
|
22
19
|
import warnings
|
|
23
20
|
warnings.filterwarnings('ignore', message='X does not have valid feature names')
|
|
@@ -356,7 +353,7 @@ class FichEn:
|
|
|
356
353
|
|
|
357
354
|
return np.array(tr_values)
|
|
358
355
|
|
|
359
|
-
def forward(self, data, feature_list, trees, input_bars, horizon, trees_count, show_results=False, feature_func=None, target_func=None):
|
|
356
|
+
def forward(self, data, feature_list, trees, train_window_size, input_bars, horizon, trees_count, show_results=False, feature_func=None, target_func=None):
|
|
360
357
|
self.input_bars = input_bars
|
|
361
358
|
self.horizon = horizon
|
|
362
359
|
self.trees_count = trees_count
|
|
@@ -365,6 +362,15 @@ class FichEn:
|
|
|
365
362
|
"horizon": self.horizon,
|
|
366
363
|
"trees_count": self.trees_count
|
|
367
364
|
}
|
|
365
|
+
if self.loss == "MAE":
|
|
366
|
+
loss_f = mean_absolute_error
|
|
367
|
+
elif self.loss == "MSE":
|
|
368
|
+
loss_f = mean_squared_error
|
|
369
|
+
elif self.loss == "QLIKE":
|
|
370
|
+
loss_f = qlike_score
|
|
371
|
+
else:
|
|
372
|
+
raise "Unavailable loss function"
|
|
373
|
+
|
|
368
374
|
x, y = self._DataSplitting(data, input_bars, horizon, True)
|
|
369
375
|
XX = []
|
|
370
376
|
YY = []
|
|
@@ -402,7 +408,6 @@ class FichEn:
|
|
|
402
408
|
x = np.array(XX)
|
|
403
409
|
y = np.array(YY)
|
|
404
410
|
|
|
405
|
-
train_window_size = input_bars
|
|
406
411
|
start_val_idx = train_window_size
|
|
407
412
|
|
|
408
413
|
total_iterations = len(x) - start_val_idx
|
|
@@ -415,8 +420,6 @@ class FichEn:
|
|
|
415
420
|
|
|
416
421
|
all_train_errors = []
|
|
417
422
|
all_val_errors = []
|
|
418
|
-
all_train_r2 = []
|
|
419
|
-
all_val_r2 = []
|
|
420
423
|
|
|
421
424
|
if isinstance(horizon, int):
|
|
422
425
|
horizon_list = list(range(horizon))
|
|
@@ -435,74 +438,53 @@ class FichEn:
|
|
|
435
438
|
X_val = x[val_idx:val_idx + 1] # form (1, n_features)
|
|
436
439
|
y_val_true = y[val_idx] # form (horizon,)
|
|
437
440
|
|
|
438
|
-
# === Normalization ===
|
|
439
|
-
scaler_X = StandardScaler()
|
|
440
|
-
scaler_y_local = StandardScaler()
|
|
441
|
-
|
|
442
|
-
X_train_scaled = scaler_X.fit_transform(X_train)
|
|
443
|
-
y_train_scaled = scaler_y_local.fit_transform(y_train)
|
|
444
|
-
|
|
445
|
-
X_val_scaled = scaler_X.transform(X_val)
|
|
446
|
-
|
|
447
441
|
# === training for each horizon ===
|
|
448
442
|
model_ex = self.base_model.__class__(**self.base_model.get_params())
|
|
449
443
|
model_ex.set_params(n_estimators=trees)
|
|
450
444
|
models_temp = []
|
|
451
445
|
for h_idx in horizon_list:
|
|
452
|
-
if h_idx >=
|
|
446
|
+
if h_idx >= y_train.shape[1]:
|
|
453
447
|
continue
|
|
454
448
|
|
|
455
|
-
y_h =
|
|
449
|
+
y_h = y_train[:, h_idx]
|
|
456
450
|
|
|
457
|
-
model = model_ex
|
|
458
|
-
model.fit(
|
|
451
|
+
model = clone(model_ex)
|
|
452
|
+
model.fit(X_train, y_h)
|
|
459
453
|
models_temp.append(model)
|
|
460
454
|
|
|
461
455
|
# === Foracesting train ===
|
|
462
456
|
train_preds_list = []
|
|
463
457
|
for model in models_temp:
|
|
464
|
-
train_preds_list.append(model.predict(
|
|
458
|
+
train_preds_list.append(model.predict(X_train))
|
|
465
459
|
train_preds = np.column_stack(train_preds_list) # (train_windows, horizon)
|
|
466
460
|
|
|
467
461
|
# Forecasting on validation data
|
|
468
462
|
val_preds_list = []
|
|
469
463
|
for model in models_temp:
|
|
470
|
-
val_preds_list.append(model.predict(
|
|
464
|
+
val_preds_list.append(model.predict(X_val))
|
|
471
465
|
val_preds = np.array(val_preds_list).flatten() # (horizon,)
|
|
472
466
|
|
|
473
|
-
|
|
474
|
-
y_train_inv = scaler_y_local.inverse_transform(y_train_scaled)
|
|
475
|
-
train_preds_inv = scaler_y_local.inverse_transform(train_preds)
|
|
476
|
-
|
|
477
|
-
y_val_true_inv = y_val_true
|
|
478
|
-
val_preds_inv = scaler_y_local.inverse_transform(val_preds.reshape(1, -1)).flatten()
|
|
479
|
-
|
|
480
467
|
# === Metrics ===
|
|
481
|
-
train_error =
|
|
482
|
-
val_error =
|
|
483
|
-
train_r2 = r2_score(y_train_inv.flatten(), train_preds_inv.flatten())
|
|
484
|
-
val_r2 = r2_score(y_val_true_inv, val_preds_inv)
|
|
468
|
+
train_error = loss_f(y_train.flatten(), train_preds.flatten())
|
|
469
|
+
val_error = loss_f(y_val_true, val_preds)
|
|
485
470
|
|
|
486
471
|
all_train_errors.append(train_error)
|
|
487
472
|
all_val_errors.append(val_error)
|
|
488
|
-
all_train_r2.append(train_r2)
|
|
489
|
-
all_val_r2.append(val_r2)
|
|
490
473
|
|
|
491
474
|
# === Progress bar ===
|
|
492
475
|
percent = (iter_num / total_iterations) * 100
|
|
493
476
|
filled = int(percent / 2)
|
|
494
477
|
bar = '█' * filled + '░' * (50 - filled)
|
|
495
478
|
self.dquantprint(
|
|
496
|
-
f'\rWalk-Forward: |{bar}| {percent:.1f}% - Iteration {iter_num}/{total_iterations} - Val
|
|
479
|
+
f'\rWalk-Forward: |{bar}| {percent:.1f}% - Iteration {iter_num}/{total_iterations} - Val {self.loss}: {val_error:.6f} - need time: {(time.time()-start_it)*(total_iterations-iter_num)} seconds',
|
|
497
480
|
end='', flush=True)
|
|
498
481
|
|
|
499
482
|
|
|
500
|
-
self.dquantprint(f"Mean validation error (
|
|
501
|
-
self.dquantprint(f"Mean validation R²: {np.mean(all_val_r2):.4f} +/- {np.std(all_val_r2):.4f}")
|
|
483
|
+
self.dquantprint(f"Mean validation error ({self.loss}): {np.mean(all_val_errors):.6f} +/- {np.std(all_val_errors):.6f}")
|
|
502
484
|
self.dquantprint(f"Maximum validation error: {np.max(all_val_errors):.6f}")
|
|
503
485
|
self.dquantprint(f"Minimum validation error: {np.min(all_val_errors):.6f}")
|
|
504
486
|
if show_results:
|
|
505
|
-
self.V.forward_validation_errors(all_val_errors
|
|
487
|
+
self.V.forward_validation_errors(all_val_errors)
|
|
506
488
|
return
|
|
507
489
|
|
|
508
490
|
def fit(self, data, feature_list, input_bars, horizon, trees_count, show_results=False, feature_func=None, target_func=None):
|
|
@@ -514,6 +496,15 @@ class FichEn:
|
|
|
514
496
|
"horizon": self.horizon,
|
|
515
497
|
"trees_count": self.trees_count
|
|
516
498
|
}
|
|
499
|
+
if self.loss == "MAE":
|
|
500
|
+
loss_f = mean_absolute_error
|
|
501
|
+
elif self.loss == "MSE":
|
|
502
|
+
loss_f = mean_squared_error
|
|
503
|
+
elif self.loss == "QLIKE":
|
|
504
|
+
loss_f = qlike_score
|
|
505
|
+
else:
|
|
506
|
+
raise "Unavailable loss function"
|
|
507
|
+
|
|
517
508
|
x, y = self._DataSplitting(data, input_bars, horizon, True)
|
|
518
509
|
XX = []
|
|
519
510
|
YY = []
|
|
@@ -553,33 +544,14 @@ class FichEn:
|
|
|
553
544
|
x = np.array(XX)
|
|
554
545
|
y = np.array(YY)
|
|
555
546
|
|
|
556
|
-
|
|
557
547
|
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=False, random_state=42)
|
|
558
|
-
X_scaled = self.scaler.fit_transform(X_train)
|
|
559
|
-
X_test_scaled = self.scaler.transform(X_test)
|
|
560
|
-
#Y_scaled = self.scaler_y.fit_transform(y_train)
|
|
561
|
-
#Y_test_scaled = self.scaler_y.transform(y_test)
|
|
562
|
-
"""if hasattr(y, 'ndim') and y.ndim == 1:
|
|
563
|
-
self.X_shape = 1
|
|
564
|
-
#y_2d = y.values.reshape(-1, 1) if hasattr(y, 'values') else y.reshape(-1, 1)
|
|
565
|
-
else:
|
|
566
|
-
self.X_shape = y.shape[1]"""
|
|
567
548
|
|
|
568
549
|
self.X_shape = x.shape[1]
|
|
569
550
|
|
|
570
551
|
self.train_errors = []
|
|
571
552
|
self.val_errors = []
|
|
572
|
-
self.train_mae = []
|
|
573
|
-
self.val_mae = []
|
|
574
|
-
self.train_qlike = []
|
|
575
|
-
self.val_qlike = []
|
|
576
|
-
self.train_r2 = []
|
|
577
|
-
self.val_r2 = []
|
|
578
553
|
|
|
579
554
|
self.best_val_error = float('inf')
|
|
580
|
-
self.best_val_mae = float('inf')
|
|
581
|
-
self.best_val_qlike = float('inf')
|
|
582
|
-
self.best_r2 = -float('inf')
|
|
583
555
|
self.patience_counter = 0
|
|
584
556
|
self.patience = 3
|
|
585
557
|
|
|
@@ -590,12 +562,6 @@ class FichEn:
|
|
|
590
562
|
self.dquantprint(f'{i} trees')
|
|
591
563
|
t_error = 0
|
|
592
564
|
v_error = 0
|
|
593
|
-
t_mae = 0
|
|
594
|
-
v_mae = 0
|
|
595
|
-
t_qlike = 0
|
|
596
|
-
v_qlike = 0
|
|
597
|
-
t_r2 = 0
|
|
598
|
-
v_r2 = 0
|
|
599
565
|
if isinstance(horizon, int):
|
|
600
566
|
horizon_list = list(range(horizon))
|
|
601
567
|
else:
|
|
@@ -609,9 +575,8 @@ class FichEn:
|
|
|
609
575
|
y_h = y_train.iloc[:, h_idx] if hasattr(y_train, 'iloc') else y_train[:, h_idx]
|
|
610
576
|
|
|
611
577
|
valid_mask = ~pd.isna(y_h) if hasattr(y_h, 'isna') else ~np.isnan(y_h)
|
|
612
|
-
X_h =
|
|
578
|
+
X_h = X_train[valid_mask]
|
|
613
579
|
y_h_clean = y_h[valid_mask]
|
|
614
|
-
#y_h_clean_orig = self.scaler_y.inverse_transform(y_h_clean.reshape(-1, 1)).ravel()
|
|
615
580
|
|
|
616
581
|
if i != 1:
|
|
617
582
|
self.models[h_idx].set_params(n_estimators=i)
|
|
@@ -625,94 +590,49 @@ class FichEn:
|
|
|
625
590
|
y_h_v = y_test.iloc[:, h_idx] if hasattr(y_test, 'iloc') else y_test[:, h_idx]
|
|
626
591
|
|
|
627
592
|
valid_mask = ~pd.isna(y_h_v) if hasattr(y_h_v, 'isna') else ~np.isnan(y_h_v)
|
|
628
|
-
X_h_v =
|
|
593
|
+
X_h_v = X_test[valid_mask]
|
|
629
594
|
y_h_v_clean = y_h_v[valid_mask]
|
|
630
|
-
#y_h_v_clean_orig = self.scaler_y.inverse_transform(y_h_v_clean.reshape(-1, 1)).ravel()
|
|
631
595
|
if i != 1:
|
|
632
596
|
pred_train = self.models[h_idx].predict(X_h)
|
|
633
597
|
pred_val = self.models[h_idx].predict(X_h_v)
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
v_error += mean_squared_error(y_h_v_clean, pred_val)
|
|
638
|
-
t_mae += mean_absolute_error(y_h_clean, pred_train)
|
|
639
|
-
v_mae += mean_absolute_error(y_h_v_clean, pred_val)
|
|
640
|
-
t_qlike += qlike_score(y_h_clean, pred_train)
|
|
641
|
-
v_qlike += qlike_score(y_h_v_clean, pred_val)
|
|
642
|
-
t_r2 += r2_score(y_h_clean, pred_train)
|
|
643
|
-
v_r2 += r2_score(y_h_v_clean, pred_val)
|
|
598
|
+
|
|
599
|
+
t_error += loss_f(y_h_clean, pred_train)
|
|
600
|
+
v_error += loss_f(y_h_v_clean, pred_val)
|
|
644
601
|
else:
|
|
645
602
|
pred_train = model.predict(X_h)
|
|
646
603
|
pred_val = model.predict(X_h_v)
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
v_error += mean_squared_error(y_h_v_clean, pred_val)
|
|
651
|
-
t_mae += mean_absolute_error(y_h_clean, pred_train)
|
|
652
|
-
v_mae += mean_absolute_error(y_h_v_clean, pred_val)
|
|
653
|
-
t_qlike += qlike_score(y_h_clean, pred_train)
|
|
654
|
-
v_qlike += qlike_score(y_h_v_clean, pred_val)
|
|
655
|
-
t_r2 += r2_score(y_h_clean, pred_train)
|
|
656
|
-
v_r2 += r2_score(y_h_v_clean, pred_val)
|
|
604
|
+
|
|
605
|
+
t_error += loss_f(y_h_clean, pred_train)
|
|
606
|
+
v_error += loss_f(y_h_v_clean, pred_val)
|
|
657
607
|
|
|
658
608
|
|
|
659
609
|
var_test_error = float(t_error)/horizon
|
|
660
610
|
var_val_error = float(v_error)/horizon
|
|
661
|
-
var_test_mae = float(t_mae) / horizon
|
|
662
|
-
var_val_mae = float(v_mae) / horizon
|
|
663
|
-
var_test_qlike = float(t_qlike) / horizon
|
|
664
|
-
var_val_qlike = float(v_qlike) / horizon
|
|
665
|
-
var_test_r2 = float(t_r2)/horizon
|
|
666
|
-
var_val_r2 = float(v_r2)/horizon
|
|
667
611
|
|
|
668
612
|
if self.early_stopping:
|
|
669
613
|
if len(self.val_errors) > 0:
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
no_improvement_count = len(self.val_mae) - self.val_mae.index(best_so_far) - 1
|
|
674
|
-
elif self.loss == "MSE":
|
|
675
|
-
current_min = min(self.val_errors)
|
|
676
|
-
best_so_far = min(self.best_val_error, current_min)
|
|
677
|
-
no_improvement_count = len(self.val_errors) - self.val_errors.index(best_so_far) - 1
|
|
678
|
-
elif self.loss == "QLIKE":
|
|
679
|
-
current_min = min(self.val_qlike)
|
|
680
|
-
best_so_far = min(self.best_val_qlike, current_min)
|
|
681
|
-
no_improvement_count = len(self.val_qlike) - self.val_qlike.index(best_so_far) - 1
|
|
682
|
-
else:
|
|
683
|
-
raise "Unavailable loss function"
|
|
614
|
+
current_min = min(self.val_errors)
|
|
615
|
+
best_so_far = min(self.best_val_error, current_min)
|
|
616
|
+
no_improvement_count = len(self.val_errors) - self.val_errors.index(best_so_far) - 1
|
|
684
617
|
|
|
685
618
|
if no_improvement_count >= self.patience:
|
|
686
619
|
self.dquantprint(f'Early stopping at {i} trees (no improvement for {self.patience} steps)')
|
|
687
620
|
if show_results:
|
|
688
|
-
self.V.show_errors(self.train_errors, self.val_errors
|
|
689
|
-
self.train_r2, self.val_r2)
|
|
621
|
+
self.V.show_errors(self.train_errors, self.val_errors)
|
|
690
622
|
self.is_fitted = True
|
|
691
623
|
return
|
|
692
624
|
|
|
693
625
|
self.train_errors.append(var_test_error)
|
|
694
626
|
self.val_errors.append(var_val_error)
|
|
695
|
-
self.
|
|
696
|
-
self.
|
|
697
|
-
self.train_qlike.append(var_test_qlike)
|
|
698
|
-
self.val_qlike.append(var_val_qlike)
|
|
699
|
-
self.train_r2.append(var_test_r2)
|
|
700
|
-
self.val_r2.append(var_val_r2)
|
|
701
|
-
self.dquantprint('Train QLIKE: ', var_test_qlike)
|
|
702
|
-
self.dquantprint('Validation QLIKE: ', var_val_qlike)
|
|
703
|
-
self.dquantprint('Train MSE: ', var_test_error)
|
|
704
|
-
self.dquantprint('Validation MSE: ', var_val_error)
|
|
705
|
-
self.dquantprint('Train MAE: ', var_test_mae)
|
|
706
|
-
self.dquantprint('Validation MAE: ', var_val_mae)
|
|
707
|
-
self.dquantprint('Train r2: ', var_test_r2)
|
|
708
|
-
self.dquantprint('Validation r2: ', var_val_r2)
|
|
627
|
+
self.dquantprint(f'Train {self.loss}: ', var_test_error)
|
|
628
|
+
self.dquantprint(f'Validation {self.loss}: ', var_val_error)
|
|
709
629
|
self.dquantprint(f"{time.time() - start} seconds spent")
|
|
710
630
|
|
|
711
631
|
except KeyboardInterrupt:
|
|
712
632
|
self.dquantprint("\nTraining interrupted by Ctrl+C!")
|
|
713
633
|
|
|
714
634
|
if show_results:
|
|
715
|
-
self.V.show_errors(self.train_errors, self.val_errors
|
|
635
|
+
self.V.show_errors(self.train_errors, self.val_errors)
|
|
716
636
|
|
|
717
637
|
self.dquantprint('model is trained')
|
|
718
638
|
self.is_fitted = True
|
|
@@ -777,11 +697,10 @@ class FichEn:
|
|
|
777
697
|
X = X.astype(np.float32)
|
|
778
698
|
if len(X.shape) == 1:
|
|
779
699
|
X = X.reshape(1, -1)
|
|
780
|
-
X_scaled = self.scaler.transform(X)
|
|
781
700
|
|
|
782
701
|
predictions = [] #jj
|
|
783
702
|
for model in self.models:
|
|
784
|
-
pred = model.predict(
|
|
703
|
+
pred = model.predict(X)
|
|
785
704
|
if len(pred.shape) > 0 and pred.shape[0] > 1:
|
|
786
705
|
predictions.append(pred)
|
|
787
706
|
else:
|
|
@@ -820,7 +739,7 @@ class FichEn:
|
|
|
820
739
|
|
|
821
740
|
|
|
822
741
|
def show_train_results(self):
|
|
823
|
-
self.V.show_errors(self.train_errors, self.val_errors
|
|
742
|
+
self.V.show_errors(self.train_errors, self.val_errors)
|
|
824
743
|
|
|
825
744
|
|
|
826
745
|
def save_mql5(self, name):
|
|
@@ -1479,175 +1398,11 @@ class FichEn:
|
|
|
1479
1398
|
|
|
1480
1399
|
|
|
1481
1400
|
|
|
1482
|
-
class VolClustGB(FichEn):
|
|
1483
|
-
def __init__(self, sett, early_stopping=True, output=True, loss="MAE"):
|
|
1484
|
-
self.loss = loss
|
|
1485
|
-
self.output = output
|
|
1486
|
-
self.models = []
|
|
1487
|
-
self.scaler = StandardScaler()
|
|
1488
|
-
#self.scaler_y = StandardScaler()
|
|
1489
|
-
self.X_shape = 0
|
|
1490
|
-
self.is_fitted = False
|
|
1491
|
-
self.onnx_load = False
|
|
1492
|
-
self.early_stopping = early_stopping
|
|
1493
|
-
self.V = Visualization('dark')
|
|
1494
|
-
self.default_sett = {
|
|
1495
|
-
'loss': 'squared_error',
|
|
1496
|
-
'learning_rate': 0.01,
|
|
1497
|
-
'n_estimators': 1,
|
|
1498
|
-
'max_depth': 3,
|
|
1499
|
-
'min_samples_split': 5,
|
|
1500
|
-
'min_samples_leaf': 2,
|
|
1501
|
-
'subsample': 0.8,
|
|
1502
|
-
'random_state': 42,
|
|
1503
|
-
'warm_start': True
|
|
1504
|
-
}
|
|
1505
|
-
self.meta = {
|
|
1506
|
-
"model_type": "gb",
|
|
1507
|
-
"model_settings": self.default_sett,
|
|
1508
|
-
"model_loss": loss
|
|
1509
|
-
}
|
|
1510
|
-
if sett == {}:
|
|
1511
|
-
self.base_model = GradientBoostingRegressor(**self.default_sett)
|
|
1512
|
-
else:
|
|
1513
|
-
self.base_model = GradientBoostingRegressor(**sett)
|
|
1514
|
-
|
|
1515
|
-
def save(self, name, type_to_save='default'):
|
|
1516
|
-
if type_to_save == 'default':
|
|
1517
|
-
os.makedirs(name, exist_ok=True)
|
|
1518
|
-
initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
|
|
1519
|
-
|
|
1520
|
-
file_path = os.path.join(name, f"{name}_features.json")
|
|
1521
|
-
with open(file_path, 'w', encoding='utf-8') as f:
|
|
1522
|
-
json.dump(self.feature_list, f, ensure_ascii=False, indent=2)
|
|
1523
|
-
|
|
1524
|
-
self.meta = {
|
|
1525
|
-
"model_type": "gb",
|
|
1526
|
-
"model_settings": self.default_sett,
|
|
1527
|
-
"input_bars": self.input_bars,
|
|
1528
|
-
"horizon": self.horizon,
|
|
1529
|
-
"trees_count": self.trees_count,
|
|
1530
|
-
}
|
|
1531
|
-
file_path = os.path.join(name, f"{name}_model_settings.json")
|
|
1532
|
-
with open(file_path, 'w', encoding='utf-8') as f:
|
|
1533
|
-
json.dump(self.meta, f, ensure_ascii=False, indent=2)
|
|
1534
|
-
|
|
1535
|
-
if hasattr(self, 'scaler'):
|
|
1536
|
-
scaler_path = os.path.join(name, f"{name}_scaler.pkl")
|
|
1537
|
-
joblib.dump(self.scaler, scaler_path)
|
|
1538
|
-
|
|
1539
|
-
"""if hasattr(self, 'scaler_y'):
|
|
1540
|
-
scaler_path = os.path.join(name, f"{name}_scaler_y.pkl")
|
|
1541
|
-
joblib.dump(self.scaler_y, scaler_path)"""
|
|
1542
|
-
|
|
1543
|
-
for i in range(len(self.models)):
|
|
1544
|
-
onx = convert_sklearn(self.models[i], initial_types=initial_type, target_opset=12)
|
|
1545
|
-
|
|
1546
|
-
file_path = os.path.join(name, f"{name}_{i}.onnx")
|
|
1547
|
-
|
|
1548
|
-
with open(file_path, "wb") as f:
|
|
1549
|
-
f.write(onx.SerializeToString())
|
|
1550
|
-
elif type_to_save == 'mql5':
|
|
1551
|
-
self.save_mql5(name)
|
|
1552
|
-
onnx_dir = os.path.join(name, f"{name}_onnx")
|
|
1553
|
-
os.makedirs(onnx_dir, exist_ok=True)
|
|
1554
|
-
self.dquantprint(f"Directory for ONNX files created: {onnx_dir}")
|
|
1555
|
-
|
|
1556
|
-
initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
|
|
1557
|
-
|
|
1558
|
-
if hasattr(self, 'scaler') and self.scaler is not None:
|
|
1559
|
-
scaler_path = os.path.join(onnx_dir, f"{name}_scaler.pkl")
|
|
1560
|
-
joblib.dump(self.scaler, scaler_path)
|
|
1561
|
-
self.dquantprint(f"Scaler is saved in {scaler_path}")
|
|
1562
|
-
|
|
1563
|
-
"""if hasattr(self, 'scaler_y') and self.scaler_y is not None:
|
|
1564
|
-
scaler_path = os.path.join(onnx_dir, f"{name}_scaler_y.pkl")
|
|
1565
|
-
joblib.dump(self.scaler_y, scaler_path)
|
|
1566
|
-
self.dquantprint(f"Scalery is saved in {scaler_path}")"""
|
|
1567
|
-
|
|
1568
|
-
for i in range(len(self.models)):
|
|
1569
|
-
onx = convert_sklearn(self.models[i], initial_types=initial_type, target_opset=12)
|
|
1570
|
-
file_path = os.path.join(onnx_dir, f"{name}_{i}.onnx")
|
|
1571
|
-
with open(file_path, "wb") as f:
|
|
1572
|
-
f.write(onx.SerializeToString())
|
|
1573
|
-
self.dquantprint(f"Model {i} is saved in {file_path}")
|
|
1574
|
-
|
|
1575
|
-
self.dquantprint(f"All operations in directory '{name}' completed successfully!")
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
def load(self, name):
|
|
1579
|
-
self.loaded_models = []
|
|
1580
|
-
|
|
1581
|
-
if not os.path.exists(name):
|
|
1582
|
-
raise FileNotFoundError(f"Directory {name} not found")
|
|
1583
|
-
|
|
1584
|
-
try:
|
|
1585
|
-
file_path = os.path.join(name, f"{name}_features.json")
|
|
1586
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
1587
|
-
self.feature_list = json.load(f)
|
|
1588
|
-
except FileNotFoundError:
|
|
1589
|
-
self.dquantprint(f'Model {name} is not valid, file {name}_features.json is not found')
|
|
1590
|
-
return
|
|
1591
|
-
|
|
1592
|
-
try:
|
|
1593
|
-
file_path = os.path.join(name, f"{name}_model_settings.json")
|
|
1594
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
1595
|
-
self.meta = json.load(f)
|
|
1596
|
-
except FileNotFoundError:
|
|
1597
|
-
self.dquantprint(f'Model {name} is not valid, file {name}_model_settings.json is not found')
|
|
1598
|
-
return
|
|
1599
|
-
|
|
1600
|
-
if self.meta['model_type'] != 'gb':
|
|
1601
|
-
raise ValueError(f"Wrong model type, expected gb and not a {self.meta['model_type']}")
|
|
1602
|
-
|
|
1603
|
-
scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
|
|
1604
|
-
if scaler_files:
|
|
1605
|
-
scaler_path = os.path.join(name, scaler_files[0])
|
|
1606
|
-
self.scaler = joblib.load(scaler_path)
|
|
1607
|
-
|
|
1608
|
-
"""scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler_y.pkl')]
|
|
1609
|
-
if scaler_files:
|
|
1610
|
-
scaler_path = os.path.join(name, scaler_files[0])"""
|
|
1611
|
-
#self.scaler_y = joblib.load(scaler_path)
|
|
1612
|
-
|
|
1613
|
-
model_files = [f for f in os.listdir(name) if f.endswith('.onnx')]
|
|
1614
|
-
|
|
1615
|
-
if not model_files:
|
|
1616
|
-
raise FileNotFoundError(f"No .onnx files found in directory {name}")
|
|
1617
|
-
|
|
1618
|
-
model_files.sort()
|
|
1619
|
-
ml = len(model_files)
|
|
1620
|
-
numbers = {}
|
|
1621
|
-
for f in model_files:
|
|
1622
|
-
match = re.search(r'_(\d+)\.onnx$', f)
|
|
1623
|
-
if match:
|
|
1624
|
-
num = int(match.group(1))
|
|
1625
|
-
numbers[num] = f
|
|
1626
|
-
|
|
1627
|
-
model_files = []
|
|
1628
|
-
for i in range(ml):
|
|
1629
|
-
model_files.append(numbers[i])
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
for model_file in model_files:
|
|
1633
|
-
model_path = os.path.join(name, model_file)
|
|
1634
|
-
session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
|
|
1635
|
-
|
|
1636
|
-
input_info = session.get_inputs()[0]
|
|
1637
|
-
self.loaded_models.append(session)
|
|
1638
|
-
|
|
1639
|
-
self.onnx_load = True
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
1401
|
class VolClustXGB(FichEn):
|
|
1645
1402
|
def __init__(self, sett, early_stopping=True, output=True, loss="QLIKE"):
|
|
1646
1403
|
self.loss = loss
|
|
1647
1404
|
self.output = output
|
|
1648
1405
|
self.models = []
|
|
1649
|
-
self.scaler = StandardScaler()
|
|
1650
|
-
#self.scaler_y = StandardScaler()
|
|
1651
1406
|
self.X_shape = 0
|
|
1652
1407
|
self.is_fitted = False
|
|
1653
1408
|
self.onnx_load = False
|
|
@@ -1713,9 +1468,9 @@ class VolClustXGB(FichEn):
|
|
|
1713
1468
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
1714
1469
|
json.dump(self.meta, f, ensure_ascii=False, indent=2)
|
|
1715
1470
|
|
|
1716
|
-
if hasattr(self, 'scaler'):
|
|
1471
|
+
"""if hasattr(self, 'scaler'):
|
|
1717
1472
|
scaler_path = os.path.join(name, f"{name}_scaler.pkl")
|
|
1718
|
-
joblib.dump(self.scaler, scaler_path)
|
|
1473
|
+
joblib.dump(self.scaler, scaler_path)"""
|
|
1719
1474
|
|
|
1720
1475
|
"""if hasattr(self, 'scaler_y'):
|
|
1721
1476
|
scaler_path = os.path.join(name, f"{name}_scaler_y.pkl")
|
|
@@ -1734,10 +1489,10 @@ class VolClustXGB(FichEn):
|
|
|
1734
1489
|
|
|
1735
1490
|
initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
|
|
1736
1491
|
|
|
1737
|
-
if hasattr(self, 'scaler') and self.scaler is not None:
|
|
1492
|
+
"""if hasattr(self, 'scaler') and self.scaler is not None:
|
|
1738
1493
|
scaler_path = os.path.join(onnx_dir, f"{name}_scaler.pkl")
|
|
1739
1494
|
joblib.dump(self.scaler, scaler_path)
|
|
1740
|
-
self.dquantprint(f"Scaler is saved in {scaler_path}")
|
|
1495
|
+
self.dquantprint(f"Scaler is saved in {scaler_path}")"""
|
|
1741
1496
|
|
|
1742
1497
|
"""if hasattr(self, 'scaler_y') and self.scaler_y is not None:
|
|
1743
1498
|
scaler_path = os.path.join(onnx_dir, f"{name}_scaler_y.pkl")
|
|
@@ -1779,10 +1534,10 @@ class VolClustXGB(FichEn):
|
|
|
1779
1534
|
if self.meta['model_type'] != 'xgb':
|
|
1780
1535
|
raise ValueError(f"Wrong model type, expected xgb and not a {self.meta['model_type']}")
|
|
1781
1536
|
|
|
1782
|
-
scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
|
|
1537
|
+
"""scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
|
|
1783
1538
|
if scaler_files:
|
|
1784
1539
|
scaler_path = os.path.join(name, scaler_files[0])
|
|
1785
|
-
self.scaler = joblib.load(scaler_path)
|
|
1540
|
+
self.scaler = joblib.load(scaler_path)"""
|
|
1786
1541
|
|
|
1787
1542
|
"""scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler_y.pkl')]
|
|
1788
1543
|
if scaler_files:
|
|
@@ -1823,8 +1578,6 @@ class VolClustLightGBM(FichEn):
|
|
|
1823
1578
|
self.loss = loss
|
|
1824
1579
|
self.output = output
|
|
1825
1580
|
self.models = []
|
|
1826
|
-
self.scaler = StandardScaler()
|
|
1827
|
-
#self.scaler_y = StandardScaler()
|
|
1828
1581
|
self.X_shape = 0
|
|
1829
1582
|
self.is_fitted = False
|
|
1830
1583
|
self.onnx_load = False
|
|
@@ -1892,9 +1645,9 @@ class VolClustLightGBM(FichEn):
|
|
|
1892
1645
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
1893
1646
|
json.dump(self.meta, f, ensure_ascii=False, indent=2)
|
|
1894
1647
|
|
|
1895
|
-
if hasattr(self, 'scaler'):
|
|
1648
|
+
"""if hasattr(self, 'scaler'):
|
|
1896
1649
|
scaler_path = os.path.join(name, f"{name}_scaler.pkl")
|
|
1897
|
-
joblib.dump(self.scaler, scaler_path)
|
|
1650
|
+
joblib.dump(self.scaler, scaler_path)"""
|
|
1898
1651
|
|
|
1899
1652
|
"""if hasattr(self, 'scaler_y'):
|
|
1900
1653
|
scaler_path = os.path.join(name, f"{name}_scaler_y.pkl")
|
|
@@ -1913,10 +1666,10 @@ class VolClustLightGBM(FichEn):
|
|
|
1913
1666
|
|
|
1914
1667
|
initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
|
|
1915
1668
|
|
|
1916
|
-
if hasattr(self, 'scaler') and self.scaler is not None:
|
|
1669
|
+
"""if hasattr(self, 'scaler') and self.scaler is not None:
|
|
1917
1670
|
scaler_path = os.path.join(onnx_dir, f"{name}_scaler.pkl")
|
|
1918
1671
|
joblib.dump(self.scaler, scaler_path)
|
|
1919
|
-
self.dquantprint(f"Scaler is saved in {scaler_path}")
|
|
1672
|
+
self.dquantprint(f"Scaler is saved in {scaler_path}")"""
|
|
1920
1673
|
|
|
1921
1674
|
"""if hasattr(self, 'scaler_y') and self.scaler_y is not None:
|
|
1922
1675
|
scaler_path = os.path.join(onnx_dir, f"{name}_scaler_y.pkl")
|
|
@@ -1958,10 +1711,10 @@ class VolClustLightGBM(FichEn):
|
|
|
1958
1711
|
if self.meta['model_type'] != 'lgbm':
|
|
1959
1712
|
raise ValueError(f"Wrong model type, expected lgbm and not a {self.meta['model_type']}")
|
|
1960
1713
|
|
|
1961
|
-
scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
|
|
1714
|
+
"""scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
|
|
1962
1715
|
if scaler_files:
|
|
1963
1716
|
scaler_path = os.path.join(name, scaler_files[0])
|
|
1964
|
-
self.scaler = joblib.load(scaler_path)
|
|
1717
|
+
self.scaler = joblib.load(scaler_path)"""
|
|
1965
1718
|
|
|
1966
1719
|
"""scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler_y.pkl')]
|
|
1967
1720
|
if scaler_files:
|
|
@@ -314,8 +314,8 @@ class Visualization:
|
|
|
314
314
|
|
|
315
315
|
plt.show()
|
|
316
316
|
|
|
317
|
-
def forward_validation_errors(self, mse_errors,
|
|
318
|
-
fig, (ax1
|
|
317
|
+
def forward_validation_errors(self, mse_errors, save_path=None):
|
|
318
|
+
fig, (ax1) = plt.subplots(1, 1, figsize=(15, 6))
|
|
319
319
|
|
|
320
320
|
ax1.plot(list(mse_errors), label='Train Loss',
|
|
321
321
|
color=self.config['colors']['primary'])
|
|
@@ -327,17 +327,6 @@ class Visualization:
|
|
|
327
327
|
self.__style_axes(ax1)
|
|
328
328
|
self.__style_legend(ax1)
|
|
329
329
|
|
|
330
|
-
ax2.plot(list(r2_errors), label='Train R²',
|
|
331
|
-
color=self.config['colors']['primary'])
|
|
332
|
-
|
|
333
|
-
ax2.set_xlabel('Trees')
|
|
334
|
-
ax2.set_ylabel('R² Score')
|
|
335
|
-
ax2.set_title('R² Score over Trees')
|
|
336
|
-
ax2.grid(True)
|
|
337
|
-
|
|
338
|
-
self.__style_axes(ax2)
|
|
339
|
-
self.__style_legend(ax2)
|
|
340
|
-
|
|
341
330
|
plt.tight_layout()
|
|
342
331
|
|
|
343
332
|
if save_path:
|
|
@@ -346,8 +335,8 @@ class Visualization:
|
|
|
346
335
|
plt.show()
|
|
347
336
|
|
|
348
337
|
|
|
349
|
-
def show_errors(self, train_errors, val_errors,
|
|
350
|
-
fig, (ax1
|
|
338
|
+
def show_errors(self, train_errors, val_errors, save_path=None):
|
|
339
|
+
fig, (ax1) = plt.subplots(1, 1, figsize=(15, 6))
|
|
351
340
|
|
|
352
341
|
ax1.plot(list(train_errors), label='Train Loss',
|
|
353
342
|
color=self.config['colors']['primary'])
|
|
@@ -360,19 +349,6 @@ class Visualization:
|
|
|
360
349
|
|
|
361
350
|
self.__style_axes(ax1)
|
|
362
351
|
self.__style_legend(ax1)
|
|
363
|
-
|
|
364
|
-
ax2.plot(list(train_r2), label='Train R²',
|
|
365
|
-
color=self.config['colors']['primary'])
|
|
366
|
-
ax2.plot(list(val_r2), label='Validation R²',
|
|
367
|
-
color=self.config['colors']['secondary'])
|
|
368
|
-
ax2.set_xlabel('Trees')
|
|
369
|
-
ax2.set_ylabel('R² Score')
|
|
370
|
-
ax2.set_title('R² Score over Trees')
|
|
371
|
-
ax2.grid(True)
|
|
372
|
-
|
|
373
|
-
self.__style_axes(ax2)
|
|
374
|
-
self.__style_legend(ax2)
|
|
375
|
-
|
|
376
352
|
plt.tight_layout()
|
|
377
353
|
|
|
378
354
|
if save_path:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|