dquant 1.2.3__tar.gz → 1.3.0b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dquant
3
- Version: 1.2.3
3
+ Version: 1.3.0b0
4
4
  Summary: DQuant is an open-source Python library for automated volatility forecasting of financial time series. It handles all stages of model construction, from raw prices to the final forecast.
5
5
  Author: Denis Makarov
6
6
  Project-URL: Homepage, https://dquant.space
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dquant"
7
- version = "1.2.3"
7
+ version = "1.3.0-beta"
8
8
  authors = [
9
9
  { name="Denis Makarov" },
10
10
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dquant
3
- Version: 1.2.3
3
+ Version: 1.3.0b0
4
4
  Summary: DQuant is an open-source Python library for automated volatility forecasting of financial time series. It handles all stages of model construction, from raw prices to the final forecast.
5
5
  Author: Denis Makarov
6
6
  Project-URL: Homepage, https://dquant.space
@@ -1,5 +1,4 @@
1
1
  import json
2
- import joblib
3
2
  import re
4
3
  import onnxruntime as ort
5
4
  import os
@@ -10,14 +9,12 @@ from .visual import Visualization
10
9
  import time as time
11
10
  import numpy as np
12
11
  import xgboost
13
- from sklearn.ensemble import GradientBoostingRegressor
12
+ from sklearn.base import clone
14
13
  from sklearn.model_selection import train_test_split
15
14
  from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
16
15
  from .metrics import qlike_score
17
- from sklearn.preprocessing import StandardScaler
18
16
  from typing import Tuple
19
17
  import pandas as pd
20
- from skl2onnx import convert_sklearn
21
18
  from skl2onnx.common.data_types import FloatTensorType
22
19
  import warnings
23
20
  warnings.filterwarnings('ignore', message='X does not have valid feature names')
@@ -356,7 +353,7 @@ class FichEn:
356
353
 
357
354
  return np.array(tr_values)
358
355
 
359
- def forward(self, data, feature_list, trees, input_bars, horizon, trees_count, show_results=False, feature_func=None, target_func=None):
356
+ def forward(self, data, feature_list, trees, train_window_size, input_bars, horizon, trees_count, show_results=False, feature_func=None, target_func=None):
360
357
  self.input_bars = input_bars
361
358
  self.horizon = horizon
362
359
  self.trees_count = trees_count
@@ -365,6 +362,15 @@ class FichEn:
365
362
  "horizon": self.horizon,
366
363
  "trees_count": self.trees_count
367
364
  }
365
+ if self.loss == "MAE":
366
+ loss_f = mean_absolute_error
367
+ elif self.loss == "MSE":
368
+ loss_f = mean_squared_error
369
+ elif self.loss == "QLIKE":
370
+ loss_f = qlike_score
371
+ else:
372
+ raise "Unavailable loss function"
373
+
368
374
  x, y = self._DataSplitting(data, input_bars, horizon, True)
369
375
  XX = []
370
376
  YY = []
@@ -402,7 +408,6 @@ class FichEn:
402
408
  x = np.array(XX)
403
409
  y = np.array(YY)
404
410
 
405
- train_window_size = input_bars
406
411
  start_val_idx = train_window_size
407
412
 
408
413
  total_iterations = len(x) - start_val_idx
@@ -415,8 +420,6 @@ class FichEn:
415
420
 
416
421
  all_train_errors = []
417
422
  all_val_errors = []
418
- all_train_r2 = []
419
- all_val_r2 = []
420
423
 
421
424
  if isinstance(horizon, int):
422
425
  horizon_list = list(range(horizon))
@@ -435,74 +438,53 @@ class FichEn:
435
438
  X_val = x[val_idx:val_idx + 1] # form (1, n_features)
436
439
  y_val_true = y[val_idx] # form (horizon,)
437
440
 
438
- # === Normalization ===
439
- scaler_X = StandardScaler()
440
- scaler_y_local = StandardScaler()
441
-
442
- X_train_scaled = scaler_X.fit_transform(X_train)
443
- y_train_scaled = scaler_y_local.fit_transform(y_train)
444
-
445
- X_val_scaled = scaler_X.transform(X_val)
446
-
447
441
  # === training for each horizon ===
448
442
  model_ex = self.base_model.__class__(**self.base_model.get_params())
449
443
  model_ex.set_params(n_estimators=trees)
450
444
  models_temp = []
451
445
  for h_idx in horizon_list:
452
- if h_idx >= y_train_scaled.shape[1]:
446
+ if h_idx >= y_train.shape[1]:
453
447
  continue
454
448
 
455
- y_h = y_train_scaled[:, h_idx]
449
+ y_h = y_train[:, h_idx]
456
450
 
457
- model = model_ex
458
- model.fit(X_train_scaled, y_h)
451
+ model = clone(model_ex)
452
+ model.fit(X_train, y_h)
459
453
  models_temp.append(model)
460
454
 
461
455
  # === Foracesting train ===
462
456
  train_preds_list = []
463
457
  for model in models_temp:
464
- train_preds_list.append(model.predict(X_train_scaled))
458
+ train_preds_list.append(model.predict(X_train))
465
459
  train_preds = np.column_stack(train_preds_list) # (train_windows, horizon)
466
460
 
467
461
  # Forecasting on validation data
468
462
  val_preds_list = []
469
463
  for model in models_temp:
470
- val_preds_list.append(model.predict(X_val_scaled))
464
+ val_preds_list.append(model.predict(X_val))
471
465
  val_preds = np.array(val_preds_list).flatten() # (horizon,)
472
466
 
473
-
474
- y_train_inv = scaler_y_local.inverse_transform(y_train_scaled)
475
- train_preds_inv = scaler_y_local.inverse_transform(train_preds)
476
-
477
- y_val_true_inv = y_val_true
478
- val_preds_inv = scaler_y_local.inverse_transform(val_preds.reshape(1, -1)).flatten()
479
-
480
467
  # === Metrics ===
481
- train_error = mean_squared_error(y_train_inv.flatten(), train_preds_inv.flatten())
482
- val_error = mean_squared_error(y_val_true_inv, val_preds_inv)
483
- train_r2 = r2_score(y_train_inv.flatten(), train_preds_inv.flatten())
484
- val_r2 = r2_score(y_val_true_inv, val_preds_inv)
468
+ train_error = loss_f(y_train.flatten(), train_preds.flatten())
469
+ val_error = loss_f(y_val_true, val_preds)
485
470
 
486
471
  all_train_errors.append(train_error)
487
472
  all_val_errors.append(val_error)
488
- all_train_r2.append(train_r2)
489
- all_val_r2.append(val_r2)
490
473
 
491
474
  # === Progress bar ===
492
475
  percent = (iter_num / total_iterations) * 100
493
476
  filled = int(percent / 2)
494
477
  bar = '█' * filled + '░' * (50 - filled)
495
478
  self.dquantprint(
496
- f'\rWalk-Forward: |{bar}| {percent:.1f}% - Iteration {iter_num}/{total_iterations} - Val MSE: {val_error:.6f} - need time: {(time.time()-start_it)*(total_iterations-iter_num)} seconds',
479
+ f'\rWalk-Forward: |{bar}| {percent:.1f}% - Iteration {iter_num}/{total_iterations} - Val {self.loss}: {val_error:.6f} - need time: {(time.time()-start_it)*(total_iterations-iter_num)} seconds',
497
480
  end='', flush=True)
498
481
 
499
482
 
500
- self.dquantprint(f"Mean validation error (MSE): {np.mean(all_val_errors):.6f} +/- {np.std(all_val_errors):.6f}")
501
- self.dquantprint(f"Mean validation R²: {np.mean(all_val_r2):.4f} +/- {np.std(all_val_r2):.4f}")
483
+ self.dquantprint(f"Mean validation error ({self.loss}): {np.mean(all_val_errors):.6f} +/- {np.std(all_val_errors):.6f}")
502
484
  self.dquantprint(f"Maximum validation error: {np.max(all_val_errors):.6f}")
503
485
  self.dquantprint(f"Minimum validation error: {np.min(all_val_errors):.6f}")
504
486
  if show_results:
505
- self.V.forward_validation_errors(all_val_errors, all_val_r2)
487
+ self.V.forward_validation_errors(all_val_errors)
506
488
  return
507
489
 
508
490
  def fit(self, data, feature_list, input_bars, horizon, trees_count, show_results=False, feature_func=None, target_func=None):
@@ -514,6 +496,15 @@ class FichEn:
514
496
  "horizon": self.horizon,
515
497
  "trees_count": self.trees_count
516
498
  }
499
+ if self.loss == "MAE":
500
+ loss_f = mean_absolute_error
501
+ elif self.loss == "MSE":
502
+ loss_f = mean_squared_error
503
+ elif self.loss == "QLIKE":
504
+ loss_f = qlike_score
505
+ else:
506
+ raise "Unavailable loss function"
507
+
517
508
  x, y = self._DataSplitting(data, input_bars, horizon, True)
518
509
  XX = []
519
510
  YY = []
@@ -553,27 +544,14 @@ class FichEn:
553
544
  x = np.array(XX)
554
545
  y = np.array(YY)
555
546
 
556
-
557
547
  X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=False, random_state=42)
558
- X_scaled = self.scaler.fit_transform(X_train)
559
- X_test_scaled = self.scaler.transform(X_test)
560
- Y_scaled = self.scaler_y.fit_transform(y_train)
561
- Y_test_scaled = self.scaler_y.transform(y_test)
548
+
562
549
  self.X_shape = x.shape[1]
563
550
 
564
551
  self.train_errors = []
565
552
  self.val_errors = []
566
- self.train_mae = []
567
- self.val_mae = []
568
- self.train_qlike = []
569
- self.val_qlike = []
570
- self.train_r2 = []
571
- self.val_r2 = []
572
553
 
573
554
  self.best_val_error = float('inf')
574
- self.best_val_mae = float('inf')
575
- self.best_val_qlike = float('inf')
576
- self.best_r2 = -float('inf')
577
555
  self.patience_counter = 0
578
556
  self.patience = 3
579
557
 
@@ -584,28 +562,21 @@ class FichEn:
584
562
  self.dquantprint(f'{i} trees')
585
563
  t_error = 0
586
564
  v_error = 0
587
- t_mae = 0
588
- v_mae = 0
589
- t_qlike = 0
590
- v_qlike = 0
591
- t_r2 = 0
592
- v_r2 = 0
593
565
  if isinstance(horizon, int):
594
566
  horizon_list = list(range(horizon))
595
567
  else:
596
568
  horizon_list = horizon
597
- if len(Y_scaled.shape) == 2 and Y_scaled.shape[1] > 0:
569
+ if len(y_train.shape) == 2 and y_train.shape[1] > 0:
598
570
  for h_idx, h in enumerate(horizon_list):
599
- if h_idx >= Y_scaled.shape[1]:
571
+ if h_idx >= y_train.shape[1]:
600
572
  self.dquantprint(f"Warning: horizon {h} extends beyond y, skipping")
601
573
  continue
602
574
 
603
- y_h = Y_scaled.iloc[:, h_idx] if hasattr(Y_scaled, 'iloc') else Y_scaled[:, h_idx]
575
+ y_h = y_train.iloc[:, h_idx] if hasattr(y_train, 'iloc') else y_train[:, h_idx]
604
576
 
605
577
  valid_mask = ~pd.isna(y_h) if hasattr(y_h, 'isna') else ~np.isnan(y_h)
606
- X_h = X_scaled[valid_mask]
578
+ X_h = X_train[valid_mask]
607
579
  y_h_clean = y_h[valid_mask]
608
- y_h_clean_orig = self.scaler_y.inverse_transform(y_h_clean.reshape(-1, 1)).ravel()
609
580
 
610
581
  if i != 1:
611
582
  self.models[h_idx].set_params(n_estimators=i)
@@ -616,97 +587,52 @@ class FichEn:
616
587
  model.fit(X_h, y_h_clean)
617
588
  self.models.append(model)
618
589
 
619
- y_h_v = Y_test_scaled.iloc[:, h_idx] if hasattr(Y_test_scaled, 'iloc') else Y_test_scaled[:, h_idx]
590
+ y_h_v = y_test.iloc[:, h_idx] if hasattr(y_test, 'iloc') else y_test[:, h_idx]
620
591
 
621
592
  valid_mask = ~pd.isna(y_h_v) if hasattr(y_h_v, 'isna') else ~np.isnan(y_h_v)
622
- X_h_v = X_test_scaled[valid_mask]
593
+ X_h_v = X_test[valid_mask]
623
594
  y_h_v_clean = y_h_v[valid_mask]
624
- y_h_v_clean_orig = self.scaler_y.inverse_transform(y_h_v_clean.reshape(-1, 1)).ravel()
625
595
  if i != 1:
626
596
  pred_train = self.models[h_idx].predict(X_h)
627
597
  pred_val = self.models[h_idx].predict(X_h_v)
628
- pred_train_orig = self.scaler_y.inverse_transform(pred_train.reshape(-1, 1)).ravel()
629
- pred_val_orig = self.scaler_y.inverse_transform(pred_val.reshape(-1, 1)).ravel()
630
- t_error += mean_squared_error(y_h_clean, pred_train)
631
- v_error += mean_squared_error(y_h_v_clean, pred_val)
632
- t_mae += mean_absolute_error(y_h_clean, pred_train)
633
- v_mae += mean_absolute_error(y_h_v_clean, pred_val)
634
- t_qlike += qlike_score(y_h_clean_orig, pred_train_orig)
635
- v_qlike += qlike_score(y_h_v_clean_orig, pred_val_orig)
636
- t_r2 += r2_score(y_h_clean, pred_train)
637
- v_r2 += r2_score(y_h_v_clean, pred_val)
598
+
599
+ t_error += loss_f(y_h_clean, pred_train)
600
+ v_error += loss_f(y_h_v_clean, pred_val)
638
601
  else:
639
602
  pred_train = model.predict(X_h)
640
603
  pred_val = model.predict(X_h_v)
641
- pred_train_orig = self.scaler_y.inverse_transform(pred_train.reshape(-1, 1)).ravel()
642
- pred_val_orig = self.scaler_y.inverse_transform(pred_val.reshape(-1, 1)).ravel()
643
- t_error += mean_squared_error(y_h_clean, pred_train)
644
- v_error += mean_squared_error(y_h_v_clean, pred_val)
645
- t_mae += mean_absolute_error(y_h_clean, pred_train)
646
- v_mae += mean_absolute_error(y_h_v_clean, pred_val)
647
- t_qlike += qlike_score(y_h_clean_orig, pred_train_orig)
648
- v_qlike += qlike_score(y_h_v_clean_orig, pred_val_orig)
649
- t_r2 += r2_score(y_h_clean, pred_train)
650
- v_r2 += r2_score(y_h_v_clean, pred_val)
604
+
605
+ t_error += loss_f(y_h_clean, pred_train)
606
+ v_error += loss_f(y_h_v_clean, pred_val)
651
607
 
652
608
 
653
609
  var_test_error = float(t_error)/horizon
654
610
  var_val_error = float(v_error)/horizon
655
- var_test_mae = float(t_mae) / horizon
656
- var_val_mae = float(v_mae) / horizon
657
- var_test_qlike = float(t_qlike) / horizon
658
- var_val_qlike = float(v_qlike) / horizon
659
- var_test_r2 = float(t_r2)/horizon
660
- var_val_r2 = float(v_r2)/horizon
661
611
 
662
612
  if self.early_stopping:
663
613
  if len(self.val_errors) > 0:
664
- if self.loss == "MAE":
665
- current_min = min(self.val_mae)
666
- best_so_far = min(self.best_val_mae, current_min)
667
- no_improvement_count = len(self.val_mae) - self.val_mae.index(best_so_far) - 1
668
- elif self.loss == "MSE":
669
- current_min = min(self.val_errors)
670
- best_so_far = min(self.best_val_error, current_min)
671
- no_improvement_count = len(self.val_errors) - self.val_errors.index(best_so_far) - 1
672
- elif self.loss == "QLIKE":
673
- current_min = min(self.val_qlike)
674
- best_so_far = min(self.best_val_qlike, current_min)
675
- no_improvement_count = len(self.val_qlike) - self.val_qlike.index(best_so_far) - 1
676
- else:
677
- raise "Unavailable loss function"
614
+ current_min = min(self.val_errors)
615
+ best_so_far = min(self.best_val_error, current_min)
616
+ no_improvement_count = len(self.val_errors) - self.val_errors.index(best_so_far) - 1
678
617
 
679
618
  if no_improvement_count >= self.patience:
680
619
  self.dquantprint(f'Early stopping at {i} trees (no improvement for {self.patience} steps)')
681
620
  if show_results:
682
- self.V.show_errors(self.train_errors, self.val_errors,
683
- self.train_r2, self.val_r2)
621
+ self.V.show_errors(self.train_errors, self.val_errors)
684
622
  self.is_fitted = True
685
623
  return
686
624
 
687
625
  self.train_errors.append(var_test_error)
688
626
  self.val_errors.append(var_val_error)
689
- self.train_mae.append(var_test_mae)
690
- self.val_mae.append(var_val_mae)
691
- self.train_qlike.append(var_test_qlike)
692
- self.val_qlike.append(var_val_qlike)
693
- self.train_r2.append(var_test_r2)
694
- self.val_r2.append(var_val_r2)
695
- self.dquantprint('Train QLIKE: ', var_test_qlike)
696
- self.dquantprint('Validation QLIKE: ', var_val_qlike)
697
- self.dquantprint('Train MSE: ', var_test_error)
698
- self.dquantprint('Validation MSE: ', var_val_error)
699
- self.dquantprint('Train MAE: ', var_test_mae)
700
- self.dquantprint('Validation MAE: ', var_val_mae)
701
- self.dquantprint('Train r2: ', var_test_r2)
702
- self.dquantprint('Validation r2: ', var_val_r2)
627
+ self.dquantprint(f'Train {self.loss}: ', var_test_error)
628
+ self.dquantprint(f'Validation {self.loss}: ', var_val_error)
703
629
  self.dquantprint(f"{time.time() - start} seconds spent")
704
630
 
705
631
  except KeyboardInterrupt:
706
632
  self.dquantprint("\nTraining interrupted by Ctrl+C!")
707
633
 
708
634
  if show_results:
709
- self.V.show_errors(self.train_errors, self.val_errors, self.train_r2, self.val_r2)
635
+ self.V.show_errors(self.train_errors, self.val_errors)
710
636
 
711
637
  self.dquantprint('model is trained')
712
638
  self.is_fitted = True
@@ -748,7 +674,7 @@ class FichEn:
748
674
  pred_array = pred_array.T
749
675
  elif pred_array.shape[0] > 1 and pred_array.shape[1] == 30:
750
676
  pred_array = pred_array[0:1, :]
751
- predictions = self.scaler_y.inverse_transform(pred_array).flatten()
677
+ predictions = pred_array.flatten()
752
678
 
753
679
  if show:
754
680
  epsilon = 1e-10
@@ -771,11 +697,10 @@ class FichEn:
771
697
  X = X.astype(np.float32)
772
698
  if len(X.shape) == 1:
773
699
  X = X.reshape(1, -1)
774
- X_scaled = self.scaler.transform(X)
775
700
 
776
701
  predictions = [] #jj
777
702
  for model in self.models:
778
- pred = model.predict(X_scaled)
703
+ pred = model.predict(X)
779
704
  if len(pred.shape) > 0 and pred.shape[0] > 1:
780
705
  predictions.append(pred)
781
706
  else:
@@ -792,7 +717,7 @@ class FichEn:
792
717
  pred_array = pred_array.T
793
718
  elif pred_array.shape[0] > 1 and pred_array.shape[1] == 30:
794
719
  pred_array = pred_array[0:1, :]
795
- predictions = self.scaler_y.inverse_transform(pred_array).flatten()
720
+ predictions = pred_array.flatten()
796
721
 
797
722
  if show:
798
723
  epsilon = 1e-10
@@ -814,7 +739,7 @@ class FichEn:
814
739
 
815
740
 
816
741
  def show_train_results(self):
817
- self.V.show_errors(self.train_errors, self.val_errors, self.train_r2, self.val_r2)
742
+ self.V.show_errors(self.train_errors, self.val_errors)
818
743
 
819
744
 
820
745
  def save_mql5(self, name):
@@ -826,13 +751,13 @@ class FichEn:
826
751
  mean_str = ','.join(str(x) for x in scaler_data['mean'])
827
752
  std_str = ','.join(str(x) for x in scaler_data['std'])
828
753
 
829
- scaler_data_y = {
754
+ """scaler_data_y = {
830
755
  "mean": self.scaler_y.mean_.tolist() if self.scaler_y.mean_ is not None else [],
831
756
  "std": self.scaler_y.scale_.tolist() if self.scaler_y.scale_ is not None else [],
832
757
  "var": self.scaler_y.var_.tolist() if self.scaler_y.var_ is not None else []
833
758
  }
834
759
  mean_str_y = ','.join(str(x) for x in scaler_data_y['mean'])
835
- std_str_y = ','.join(str(x) for x in scaler_data_y['std'])
760
+ std_str_y = ','.join(str(x) for x in scaler_data_y['std'])"""
836
761
 
837
762
 
838
763
  os.makedirs(name, exist_ok=True)
@@ -864,8 +789,8 @@ class FichEn:
864
789
  f.write(f"double mean_[] = {{{mean_str}}};\n\n")
865
790
  f.write(f"double std_[] = {{{std_str}}};\n\n")
866
791
 
867
- f.write(f"double mean_y[] = {{{mean_str_y}}};\n\n")
868
- f.write(f"double std_y[] = {{{std_str_y}}};\n\n")
792
+ #f.write(f"double mean_y[] = {{{mean_str_y}}};\n\n")
793
+ #f.write(f"double std_y[] = {{{std_str_y}}};\n\n")
869
794
 
870
795
  f.write("//--- indicator buffers\n")
871
796
  f.write("double past_vol[];\n")
@@ -1473,175 +1398,11 @@ class FichEn:
1473
1398
 
1474
1399
 
1475
1400
 
1476
- class VolClustGB(FichEn):
1477
- def __init__(self, sett, early_stopping=True, output=True, loss="MAE"):
1478
- self.loss = loss
1479
- self.output = output
1480
- self.models = []
1481
- self.scaler = StandardScaler()
1482
- self.scaler_y = StandardScaler()
1483
- self.X_shape = 0
1484
- self.is_fitted = False
1485
- self.onnx_load = False
1486
- self.early_stopping = early_stopping
1487
- self.V = Visualization('dark')
1488
- self.default_sett = {
1489
- 'loss': 'squared_error',
1490
- 'learning_rate': 0.01,
1491
- 'n_estimators': 1,
1492
- 'max_depth': 3,
1493
- 'min_samples_split': 5,
1494
- 'min_samples_leaf': 2,
1495
- 'subsample': 0.8,
1496
- 'random_state': 42,
1497
- 'warm_start': True
1498
- }
1499
- self.meta = {
1500
- "model_type": "gb",
1501
- "model_settings": self.default_sett,
1502
- "model_loss": loss
1503
- }
1504
- if sett == {}:
1505
- self.base_model = GradientBoostingRegressor(**self.default_sett)
1506
- else:
1507
- self.base_model = GradientBoostingRegressor(**sett)
1508
-
1509
- def save(self, name, type_to_save='default'):
1510
- if type_to_save == 'default':
1511
- os.makedirs(name, exist_ok=True)
1512
- initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
1513
-
1514
- file_path = os.path.join(name, f"{name}_features.json")
1515
- with open(file_path, 'w', encoding='utf-8') as f:
1516
- json.dump(self.feature_list, f, ensure_ascii=False, indent=2)
1517
-
1518
- self.meta = {
1519
- "model_type": "gb",
1520
- "model_settings": self.default_sett,
1521
- "input_bars": self.input_bars,
1522
- "horizon": self.horizon,
1523
- "trees_count": self.trees_count,
1524
- }
1525
- file_path = os.path.join(name, f"{name}_model_settings.json")
1526
- with open(file_path, 'w', encoding='utf-8') as f:
1527
- json.dump(self.meta, f, ensure_ascii=False, indent=2)
1528
-
1529
- if hasattr(self, 'scaler'):
1530
- scaler_path = os.path.join(name, f"{name}_scaler.pkl")
1531
- joblib.dump(self.scaler, scaler_path)
1532
-
1533
- if hasattr(self, 'scaler_y'):
1534
- scaler_path = os.path.join(name, f"{name}_scaler_y.pkl")
1535
- joblib.dump(self.scaler_y, scaler_path)
1536
-
1537
- for i in range(len(self.models)):
1538
- onx = convert_sklearn(self.models[i], initial_types=initial_type, target_opset=12)
1539
-
1540
- file_path = os.path.join(name, f"{name}_{i}.onnx")
1541
-
1542
- with open(file_path, "wb") as f:
1543
- f.write(onx.SerializeToString())
1544
- elif type_to_save == 'mql5':
1545
- self.save_mql5(name)
1546
- onnx_dir = os.path.join(name, f"{name}_onnx")
1547
- os.makedirs(onnx_dir, exist_ok=True)
1548
- self.dquantprint(f"Directory for ONNX files created: {onnx_dir}")
1549
-
1550
- initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
1551
-
1552
- if hasattr(self, 'scaler') and self.scaler is not None:
1553
- scaler_path = os.path.join(onnx_dir, f"{name}_scaler.pkl")
1554
- joblib.dump(self.scaler, scaler_path)
1555
- self.dquantprint(f"Scaler is saved in {scaler_path}")
1556
-
1557
- if hasattr(self, 'scaler_y') and self.scaler_y is not None:
1558
- scaler_path = os.path.join(onnx_dir, f"{name}_scaler_y.pkl")
1559
- joblib.dump(self.scaler_y, scaler_path)
1560
- self.dquantprint(f"Scalery is saved in {scaler_path}")
1561
-
1562
- for i in range(len(self.models)):
1563
- onx = convert_sklearn(self.models[i], initial_types=initial_type, target_opset=12)
1564
- file_path = os.path.join(onnx_dir, f"{name}_{i}.onnx")
1565
- with open(file_path, "wb") as f:
1566
- f.write(onx.SerializeToString())
1567
- self.dquantprint(f"Model {i} is saved in {file_path}")
1568
-
1569
- self.dquantprint(f"All operations in directory '{name}' completed successfully!")
1570
-
1571
-
1572
- def load(self, name):
1573
- self.loaded_models = []
1574
-
1575
- if not os.path.exists(name):
1576
- raise FileNotFoundError(f"Directory {name} not found")
1577
-
1578
- try:
1579
- file_path = os.path.join(name, f"{name}_features.json")
1580
- with open(file_path, 'r', encoding='utf-8') as f:
1581
- self.feature_list = json.load(f)
1582
- except FileNotFoundError:
1583
- self.dquantprint(f'Model {name} is not valid, file {name}_features.json is not found')
1584
- return
1585
-
1586
- try:
1587
- file_path = os.path.join(name, f"{name}_model_settings.json")
1588
- with open(file_path, 'r', encoding='utf-8') as f:
1589
- self.meta = json.load(f)
1590
- except FileNotFoundError:
1591
- self.dquantprint(f'Model {name} is not valid, file {name}_model_settings.json is not found')
1592
- return
1593
-
1594
- if self.meta['model_type'] != 'gb':
1595
- raise ValueError(f"Wrong model type, expected gb and not a {self.meta['model_type']}")
1596
-
1597
- scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
1598
- if scaler_files:
1599
- scaler_path = os.path.join(name, scaler_files[0])
1600
- self.scaler = joblib.load(scaler_path)
1601
-
1602
- scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler_y.pkl')]
1603
- if scaler_files:
1604
- scaler_path = os.path.join(name, scaler_files[0])
1605
- self.scaler_y = joblib.load(scaler_path)
1606
-
1607
- model_files = [f for f in os.listdir(name) if f.endswith('.onnx')]
1608
-
1609
- if not model_files:
1610
- raise FileNotFoundError(f"No .onnx files found in directory {name}")
1611
-
1612
- model_files.sort()
1613
- ml = len(model_files)
1614
- numbers = {}
1615
- for f in model_files:
1616
- match = re.search(r'_(\d+)\.onnx$', f)
1617
- if match:
1618
- num = int(match.group(1))
1619
- numbers[num] = f
1620
-
1621
- model_files = []
1622
- for i in range(ml):
1623
- model_files.append(numbers[i])
1624
-
1625
-
1626
- for model_file in model_files:
1627
- model_path = os.path.join(name, model_file)
1628
- session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
1629
-
1630
- input_info = session.get_inputs()[0]
1631
- self.loaded_models.append(session)
1632
-
1633
- self.onnx_load = True
1634
-
1635
-
1636
-
1637
-
1638
1401
  class VolClustXGB(FichEn):
1639
1402
  def __init__(self, sett, early_stopping=True, output=True, loss="QLIKE"):
1640
1403
  self.loss = loss
1641
1404
  self.output = output
1642
1405
  self.models = []
1643
- self.scaler = StandardScaler()
1644
- self.scaler_y = StandardScaler()
1645
1406
  self.X_shape = 0
1646
1407
  self.is_fitted = False
1647
1408
  self.onnx_load = False
@@ -1707,13 +1468,13 @@ class VolClustXGB(FichEn):
1707
1468
  with open(file_path, 'w', encoding='utf-8') as f:
1708
1469
  json.dump(self.meta, f, ensure_ascii=False, indent=2)
1709
1470
 
1710
- if hasattr(self, 'scaler'):
1471
+ """if hasattr(self, 'scaler'):
1711
1472
  scaler_path = os.path.join(name, f"{name}_scaler.pkl")
1712
- joblib.dump(self.scaler, scaler_path)
1473
+ joblib.dump(self.scaler, scaler_path)"""
1713
1474
 
1714
- if hasattr(self, 'scaler_y'):
1475
+ """if hasattr(self, 'scaler_y'):
1715
1476
  scaler_path = os.path.join(name, f"{name}_scaler_y.pkl")
1716
- joblib.dump(self.scaler_y, scaler_path)
1477
+ joblib.dump(self.scaler_y, scaler_path)"""
1717
1478
 
1718
1479
  for i in range(len(self.models)):
1719
1480
  onx = onnxmltools.convert_xgboost(self.models[i], initial_types=initial_type, target_opset=12)
@@ -1728,15 +1489,15 @@ class VolClustXGB(FichEn):
1728
1489
 
1729
1490
  initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
1730
1491
 
1731
- if hasattr(self, 'scaler') and self.scaler is not None:
1492
+ """if hasattr(self, 'scaler') and self.scaler is not None:
1732
1493
  scaler_path = os.path.join(onnx_dir, f"{name}_scaler.pkl")
1733
1494
  joblib.dump(self.scaler, scaler_path)
1734
- self.dquantprint(f"Scaler is saved in {scaler_path}")
1495
+ self.dquantprint(f"Scaler is saved in {scaler_path}")"""
1735
1496
 
1736
- if hasattr(self, 'scaler_y') and self.scaler_y is not None:
1497
+ """if hasattr(self, 'scaler_y') and self.scaler_y is not None:
1737
1498
  scaler_path = os.path.join(onnx_dir, f"{name}_scaler_y.pkl")
1738
1499
  joblib.dump(self.scaler_y, scaler_path)
1739
- self.dquantprint(f"Scalery is saved in {scaler_path}")
1500
+ self.dquantprint(f"Scalery is saved in {scaler_path}")"""
1740
1501
 
1741
1502
  for i in range(len(self.models)):
1742
1503
  onx = onnxmltools.convert_xgboost(self.models[i], initial_types=initial_type, target_opset=9)
@@ -1773,15 +1534,15 @@ class VolClustXGB(FichEn):
1773
1534
  if self.meta['model_type'] != 'xgb':
1774
1535
  raise ValueError(f"Wrong model type, expected xgb and not a {self.meta['model_type']}")
1775
1536
 
1776
- scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
1537
+ """scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
1777
1538
  if scaler_files:
1778
1539
  scaler_path = os.path.join(name, scaler_files[0])
1779
- self.scaler = joblib.load(scaler_path)
1540
+ self.scaler = joblib.load(scaler_path)"""
1780
1541
 
1781
- scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler_y.pkl')]
1542
+ """scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler_y.pkl')]
1782
1543
  if scaler_files:
1783
1544
  scaler_path = os.path.join(name, scaler_files[0])
1784
- self.scaler_y = joblib.load(scaler_path)
1545
+ #self.scaler_y = joblib.load(scaler_path)"""
1785
1546
 
1786
1547
  model_files = [f for f in os.listdir(name) if f.endswith('.onnx')]
1787
1548
 
@@ -1817,8 +1578,6 @@ class VolClustLightGBM(FichEn):
1817
1578
  self.loss = loss
1818
1579
  self.output = output
1819
1580
  self.models = []
1820
- self.scaler = StandardScaler()
1821
- self.scaler_y = StandardScaler()
1822
1581
  self.X_shape = 0
1823
1582
  self.is_fitted = False
1824
1583
  self.onnx_load = False
@@ -1886,13 +1645,13 @@ class VolClustLightGBM(FichEn):
1886
1645
  with open(file_path, 'w', encoding='utf-8') as f:
1887
1646
  json.dump(self.meta, f, ensure_ascii=False, indent=2)
1888
1647
 
1889
- if hasattr(self, 'scaler'):
1648
+ """if hasattr(self, 'scaler'):
1890
1649
  scaler_path = os.path.join(name, f"{name}_scaler.pkl")
1891
- joblib.dump(self.scaler, scaler_path)
1650
+ joblib.dump(self.scaler, scaler_path)"""
1892
1651
 
1893
- if hasattr(self, 'scaler_y'):
1652
+ """if hasattr(self, 'scaler_y'):
1894
1653
  scaler_path = os.path.join(name, f"{name}_scaler_y.pkl")
1895
- joblib.dump(self.scaler_y, scaler_path)
1654
+ joblib.dump(self.scaler_y, scaler_path)"""
1896
1655
 
1897
1656
  for i in range(len(self.models)):
1898
1657
  onx = onnxmltools.convert_lightgbm(self.models[i], initial_types=initial_type, zipmap=False,
@@ -1907,15 +1666,15 @@ class VolClustLightGBM(FichEn):
1907
1666
 
1908
1667
  initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
1909
1668
 
1910
- if hasattr(self, 'scaler') and self.scaler is not None:
1669
+ """if hasattr(self, 'scaler') and self.scaler is not None:
1911
1670
  scaler_path = os.path.join(onnx_dir, f"{name}_scaler.pkl")
1912
1671
  joblib.dump(self.scaler, scaler_path)
1913
- self.dquantprint(f"Scaler is saved in {scaler_path}")
1672
+ self.dquantprint(f"Scaler is saved in {scaler_path}")"""
1914
1673
 
1915
- if hasattr(self, 'scaler_y') and self.scaler_y is not None:
1674
+ """if hasattr(self, 'scaler_y') and self.scaler_y is not None:
1916
1675
  scaler_path = os.path.join(onnx_dir, f"{name}_scaler_y.pkl")
1917
1676
  joblib.dump(self.scaler_y, scaler_path)
1918
- self.dquantprint(f"Scalery is saved in {scaler_path}")
1677
+ self.dquantprint(f"Scalery is saved in {scaler_path}")"""
1919
1678
 
1920
1679
  for i in range(len(self.models)):
1921
1680
  onx = onnxmltools.convert_lightgbm(self.models[i], initial_types=initial_type, zipmap=False,
@@ -1952,15 +1711,15 @@ class VolClustLightGBM(FichEn):
1952
1711
  if self.meta['model_type'] != 'lgbm':
1953
1712
  raise ValueError(f"Wrong model type, expected lgbm and not a {self.meta['model_type']}")
1954
1713
 
1955
- scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
1714
+ """scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
1956
1715
  if scaler_files:
1957
1716
  scaler_path = os.path.join(name, scaler_files[0])
1958
- self.scaler = joblib.load(scaler_path)
1717
+ self.scaler = joblib.load(scaler_path)"""
1959
1718
 
1960
- scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler_y.pkl')]
1719
+ """scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler_y.pkl')]
1961
1720
  if scaler_files:
1962
1721
  scaler_path = os.path.join(name, scaler_files[0])
1963
- self.scaler_y = joblib.load(scaler_path)
1722
+ #self.scaler_y = joblib.load(scaler_path)"""
1964
1723
 
1965
1724
  model_files = [f for f in os.listdir(name) if f.endswith('.onnx')]
1966
1725
 
@@ -314,8 +314,8 @@ class Visualization:
314
314
 
315
315
  plt.show()
316
316
 
317
- def forward_validation_errors(self, mse_errors, r2_errors, save_path=None):
318
- fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
317
+ def forward_validation_errors(self, mse_errors, save_path=None):
318
+ fig, (ax1) = plt.subplots(1, 1, figsize=(15, 6))
319
319
 
320
320
  ax1.plot(list(mse_errors), label='Train Loss',
321
321
  color=self.config['colors']['primary'])
@@ -327,17 +327,6 @@ class Visualization:
327
327
  self.__style_axes(ax1)
328
328
  self.__style_legend(ax1)
329
329
 
330
- ax2.plot(list(r2_errors), label='Train R²',
331
- color=self.config['colors']['primary'])
332
-
333
- ax2.set_xlabel('Trees')
334
- ax2.set_ylabel('R² Score')
335
- ax2.set_title('R² Score over Trees')
336
- ax2.grid(True)
337
-
338
- self.__style_axes(ax2)
339
- self.__style_legend(ax2)
340
-
341
330
  plt.tight_layout()
342
331
 
343
332
  if save_path:
@@ -346,8 +335,8 @@ class Visualization:
346
335
  plt.show()
347
336
 
348
337
 
349
- def show_errors(self, train_errors, val_errors, train_r2, val_r2, save_path=None):
350
- fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
338
+ def show_errors(self, train_errors, val_errors, save_path=None):
339
+ fig, (ax1) = plt.subplots(1, 1, figsize=(15, 6))
351
340
 
352
341
  ax1.plot(list(train_errors), label='Train Loss',
353
342
  color=self.config['colors']['primary'])
@@ -360,19 +349,6 @@ class Visualization:
360
349
 
361
350
  self.__style_axes(ax1)
362
351
  self.__style_legend(ax1)
363
-
364
- ax2.plot(list(train_r2), label='Train R²',
365
- color=self.config['colors']['primary'])
366
- ax2.plot(list(val_r2), label='Validation R²',
367
- color=self.config['colors']['secondary'])
368
- ax2.set_xlabel('Trees')
369
- ax2.set_ylabel('R² Score')
370
- ax2.set_title('R² Score over Trees')
371
- ax2.grid(True)
372
-
373
- self.__style_axes(ax2)
374
- self.__style_legend(ax2)
375
-
376
352
  plt.tight_layout()
377
353
 
378
354
  if save_path:
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes