dquant 1.2.4__tar.gz → 1.3.0b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dquant
3
- Version: 1.2.4
3
+ Version: 1.3.0b0
4
4
  Summary: DQuant is an open-source Python library for automated volatility forecasting of financial time series. It handles all stages of model construction, from raw prices to the final forecast.
5
5
  Author: Denis Makarov
6
6
  Project-URL: Homepage, https://dquant.space
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dquant"
7
- version = "1.2.4"
7
+ version = "1.3.0-beta"
8
8
  authors = [
9
9
  { name="Denis Makarov" },
10
10
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dquant
3
- Version: 1.2.4
3
+ Version: 1.3.0b0
4
4
  Summary: DQuant is an open-source Python library for automated volatility forecasting of financial time series. It handles all stages of model construction, from raw prices to the final forecast.
5
5
  Author: Denis Makarov
6
6
  Project-URL: Homepage, https://dquant.space
@@ -1,5 +1,4 @@
1
1
  import json
2
- import joblib
3
2
  import re
4
3
  import onnxruntime as ort
5
4
  import os
@@ -10,14 +9,12 @@ from .visual import Visualization
10
9
  import time as time
11
10
  import numpy as np
12
11
  import xgboost
13
- from sklearn.ensemble import GradientBoostingRegressor
12
+ from sklearn.base import clone
14
13
  from sklearn.model_selection import train_test_split
15
14
  from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
16
15
  from .metrics import qlike_score
17
- from sklearn.preprocessing import StandardScaler
18
16
  from typing import Tuple
19
17
  import pandas as pd
20
- from skl2onnx import convert_sklearn
21
18
  from skl2onnx.common.data_types import FloatTensorType
22
19
  import warnings
23
20
  warnings.filterwarnings('ignore', message='X does not have valid feature names')
@@ -356,7 +353,7 @@ class FichEn:
356
353
 
357
354
  return np.array(tr_values)
358
355
 
359
- def forward(self, data, feature_list, trees, input_bars, horizon, trees_count, show_results=False, feature_func=None, target_func=None):
356
+ def forward(self, data, feature_list, trees, train_window_size, input_bars, horizon, trees_count, show_results=False, feature_func=None, target_func=None):
360
357
  self.input_bars = input_bars
361
358
  self.horizon = horizon
362
359
  self.trees_count = trees_count
@@ -365,6 +362,15 @@ class FichEn:
365
362
  "horizon": self.horizon,
366
363
  "trees_count": self.trees_count
367
364
  }
365
+ if self.loss == "MAE":
366
+ loss_f = mean_absolute_error
367
+ elif self.loss == "MSE":
368
+ loss_f = mean_squared_error
369
+ elif self.loss == "QLIKE":
370
+ loss_f = qlike_score
371
+ else:
372
+ raise "Unavailable loss function"
373
+
368
374
  x, y = self._DataSplitting(data, input_bars, horizon, True)
369
375
  XX = []
370
376
  YY = []
@@ -402,7 +408,6 @@ class FichEn:
402
408
  x = np.array(XX)
403
409
  y = np.array(YY)
404
410
 
405
- train_window_size = input_bars
406
411
  start_val_idx = train_window_size
407
412
 
408
413
  total_iterations = len(x) - start_val_idx
@@ -415,8 +420,6 @@ class FichEn:
415
420
 
416
421
  all_train_errors = []
417
422
  all_val_errors = []
418
- all_train_r2 = []
419
- all_val_r2 = []
420
423
 
421
424
  if isinstance(horizon, int):
422
425
  horizon_list = list(range(horizon))
@@ -435,74 +438,53 @@ class FichEn:
435
438
  X_val = x[val_idx:val_idx + 1] # form (1, n_features)
436
439
  y_val_true = y[val_idx] # form (horizon,)
437
440
 
438
- # === Normalization ===
439
- scaler_X = StandardScaler()
440
- scaler_y_local = StandardScaler()
441
-
442
- X_train_scaled = scaler_X.fit_transform(X_train)
443
- y_train_scaled = scaler_y_local.fit_transform(y_train)
444
-
445
- X_val_scaled = scaler_X.transform(X_val)
446
-
447
441
  # === training for each horizon ===
448
442
  model_ex = self.base_model.__class__(**self.base_model.get_params())
449
443
  model_ex.set_params(n_estimators=trees)
450
444
  models_temp = []
451
445
  for h_idx in horizon_list:
452
- if h_idx >= y_train_scaled.shape[1]:
446
+ if h_idx >= y_train.shape[1]:
453
447
  continue
454
448
 
455
- y_h = y_train_scaled[:, h_idx]
449
+ y_h = y_train[:, h_idx]
456
450
 
457
- model = model_ex
458
- model.fit(X_train_scaled, y_h)
451
+ model = clone(model_ex)
452
+ model.fit(X_train, y_h)
459
453
  models_temp.append(model)
460
454
 
461
455
  # === Foracesting train ===
462
456
  train_preds_list = []
463
457
  for model in models_temp:
464
- train_preds_list.append(model.predict(X_train_scaled))
458
+ train_preds_list.append(model.predict(X_train))
465
459
  train_preds = np.column_stack(train_preds_list) # (train_windows, horizon)
466
460
 
467
461
  # Forecasting on validation data
468
462
  val_preds_list = []
469
463
  for model in models_temp:
470
- val_preds_list.append(model.predict(X_val_scaled))
464
+ val_preds_list.append(model.predict(X_val))
471
465
  val_preds = np.array(val_preds_list).flatten() # (horizon,)
472
466
 
473
-
474
- y_train_inv = scaler_y_local.inverse_transform(y_train_scaled)
475
- train_preds_inv = scaler_y_local.inverse_transform(train_preds)
476
-
477
- y_val_true_inv = y_val_true
478
- val_preds_inv = scaler_y_local.inverse_transform(val_preds.reshape(1, -1)).flatten()
479
-
480
467
  # === Metrics ===
481
- train_error = mean_squared_error(y_train_inv.flatten(), train_preds_inv.flatten())
482
- val_error = mean_squared_error(y_val_true_inv, val_preds_inv)
483
- train_r2 = r2_score(y_train_inv.flatten(), train_preds_inv.flatten())
484
- val_r2 = r2_score(y_val_true_inv, val_preds_inv)
468
+ train_error = loss_f(y_train.flatten(), train_preds.flatten())
469
+ val_error = loss_f(y_val_true, val_preds)
485
470
 
486
471
  all_train_errors.append(train_error)
487
472
  all_val_errors.append(val_error)
488
- all_train_r2.append(train_r2)
489
- all_val_r2.append(val_r2)
490
473
 
491
474
  # === Progress bar ===
492
475
  percent = (iter_num / total_iterations) * 100
493
476
  filled = int(percent / 2)
494
477
  bar = '█' * filled + '░' * (50 - filled)
495
478
  self.dquantprint(
496
- f'\rWalk-Forward: |{bar}| {percent:.1f}% - Iteration {iter_num}/{total_iterations} - Val MSE: {val_error:.6f} - need time: {(time.time()-start_it)*(total_iterations-iter_num)} seconds',
479
+ f'\rWalk-Forward: |{bar}| {percent:.1f}% - Iteration {iter_num}/{total_iterations} - Val {self.loss}: {val_error:.6f} - need time: {(time.time()-start_it)*(total_iterations-iter_num)} seconds',
497
480
  end='', flush=True)
498
481
 
499
482
 
500
- self.dquantprint(f"Mean validation error (MSE): {np.mean(all_val_errors):.6f} +/- {np.std(all_val_errors):.6f}")
501
- self.dquantprint(f"Mean validation R²: {np.mean(all_val_r2):.4f} +/- {np.std(all_val_r2):.4f}")
483
+ self.dquantprint(f"Mean validation error ({self.loss}): {np.mean(all_val_errors):.6f} +/- {np.std(all_val_errors):.6f}")
502
484
  self.dquantprint(f"Maximum validation error: {np.max(all_val_errors):.6f}")
503
485
  self.dquantprint(f"Minimum validation error: {np.min(all_val_errors):.6f}")
504
486
  if show_results:
505
- self.V.forward_validation_errors(all_val_errors, all_val_r2)
487
+ self.V.forward_validation_errors(all_val_errors)
506
488
  return
507
489
 
508
490
  def fit(self, data, feature_list, input_bars, horizon, trees_count, show_results=False, feature_func=None, target_func=None):
@@ -514,6 +496,15 @@ class FichEn:
514
496
  "horizon": self.horizon,
515
497
  "trees_count": self.trees_count
516
498
  }
499
+ if self.loss == "MAE":
500
+ loss_f = mean_absolute_error
501
+ elif self.loss == "MSE":
502
+ loss_f = mean_squared_error
503
+ elif self.loss == "QLIKE":
504
+ loss_f = qlike_score
505
+ else:
506
+ raise "Unavailable loss function"
507
+
517
508
  x, y = self._DataSplitting(data, input_bars, horizon, True)
518
509
  XX = []
519
510
  YY = []
@@ -553,33 +544,14 @@ class FichEn:
553
544
  x = np.array(XX)
554
545
  y = np.array(YY)
555
546
 
556
-
557
547
  X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=False, random_state=42)
558
- X_scaled = self.scaler.fit_transform(X_train)
559
- X_test_scaled = self.scaler.transform(X_test)
560
- #Y_scaled = self.scaler_y.fit_transform(y_train)
561
- #Y_test_scaled = self.scaler_y.transform(y_test)
562
- """if hasattr(y, 'ndim') and y.ndim == 1:
563
- self.X_shape = 1
564
- #y_2d = y.values.reshape(-1, 1) if hasattr(y, 'values') else y.reshape(-1, 1)
565
- else:
566
- self.X_shape = y.shape[1]"""
567
548
 
568
549
  self.X_shape = x.shape[1]
569
550
 
570
551
  self.train_errors = []
571
552
  self.val_errors = []
572
- self.train_mae = []
573
- self.val_mae = []
574
- self.train_qlike = []
575
- self.val_qlike = []
576
- self.train_r2 = []
577
- self.val_r2 = []
578
553
 
579
554
  self.best_val_error = float('inf')
580
- self.best_val_mae = float('inf')
581
- self.best_val_qlike = float('inf')
582
- self.best_r2 = -float('inf')
583
555
  self.patience_counter = 0
584
556
  self.patience = 3
585
557
 
@@ -590,12 +562,6 @@ class FichEn:
590
562
  self.dquantprint(f'{i} trees')
591
563
  t_error = 0
592
564
  v_error = 0
593
- t_mae = 0
594
- v_mae = 0
595
- t_qlike = 0
596
- v_qlike = 0
597
- t_r2 = 0
598
- v_r2 = 0
599
565
  if isinstance(horizon, int):
600
566
  horizon_list = list(range(horizon))
601
567
  else:
@@ -609,9 +575,8 @@ class FichEn:
609
575
  y_h = y_train.iloc[:, h_idx] if hasattr(y_train, 'iloc') else y_train[:, h_idx]
610
576
 
611
577
  valid_mask = ~pd.isna(y_h) if hasattr(y_h, 'isna') else ~np.isnan(y_h)
612
- X_h = X_scaled[valid_mask]
578
+ X_h = X_train[valid_mask]
613
579
  y_h_clean = y_h[valid_mask]
614
- #y_h_clean_orig = self.scaler_y.inverse_transform(y_h_clean.reshape(-1, 1)).ravel()
615
580
 
616
581
  if i != 1:
617
582
  self.models[h_idx].set_params(n_estimators=i)
@@ -625,94 +590,49 @@ class FichEn:
625
590
  y_h_v = y_test.iloc[:, h_idx] if hasattr(y_test, 'iloc') else y_test[:, h_idx]
626
591
 
627
592
  valid_mask = ~pd.isna(y_h_v) if hasattr(y_h_v, 'isna') else ~np.isnan(y_h_v)
628
- X_h_v = X_test_scaled[valid_mask]
593
+ X_h_v = X_test[valid_mask]
629
594
  y_h_v_clean = y_h_v[valid_mask]
630
- #y_h_v_clean_orig = self.scaler_y.inverse_transform(y_h_v_clean.reshape(-1, 1)).ravel()
631
595
  if i != 1:
632
596
  pred_train = self.models[h_idx].predict(X_h)
633
597
  pred_val = self.models[h_idx].predict(X_h_v)
634
- #pred_train_orig = self.scaler_y.inverse_transform(pred_train.reshape(-1, 1)).ravel()
635
- #pred_val_orig = self.scaler_y.inverse_transform(pred_val.reshape(-1, 1)).ravel()
636
- t_error += mean_squared_error(y_h_clean, pred_train)
637
- v_error += mean_squared_error(y_h_v_clean, pred_val)
638
- t_mae += mean_absolute_error(y_h_clean, pred_train)
639
- v_mae += mean_absolute_error(y_h_v_clean, pred_val)
640
- t_qlike += qlike_score(y_h_clean, pred_train)
641
- v_qlike += qlike_score(y_h_v_clean, pred_val)
642
- t_r2 += r2_score(y_h_clean, pred_train)
643
- v_r2 += r2_score(y_h_v_clean, pred_val)
598
+
599
+ t_error += loss_f(y_h_clean, pred_train)
600
+ v_error += loss_f(y_h_v_clean, pred_val)
644
601
  else:
645
602
  pred_train = model.predict(X_h)
646
603
  pred_val = model.predict(X_h_v)
647
- #pred_train_orig = self.scaler_y.inverse_transform(pred_train.reshape(-1, 1)).ravel()
648
- #pred_val_orig = self.scaler_y.inverse_transform(pred_val.reshape(-1, 1)).ravel()
649
- t_error += mean_squared_error(y_h_clean, pred_train)
650
- v_error += mean_squared_error(y_h_v_clean, pred_val)
651
- t_mae += mean_absolute_error(y_h_clean, pred_train)
652
- v_mae += mean_absolute_error(y_h_v_clean, pred_val)
653
- t_qlike += qlike_score(y_h_clean, pred_train)
654
- v_qlike += qlike_score(y_h_v_clean, pred_val)
655
- t_r2 += r2_score(y_h_clean, pred_train)
656
- v_r2 += r2_score(y_h_v_clean, pred_val)
604
+
605
+ t_error += loss_f(y_h_clean, pred_train)
606
+ v_error += loss_f(y_h_v_clean, pred_val)
657
607
 
658
608
 
659
609
  var_test_error = float(t_error)/horizon
660
610
  var_val_error = float(v_error)/horizon
661
- var_test_mae = float(t_mae) / horizon
662
- var_val_mae = float(v_mae) / horizon
663
- var_test_qlike = float(t_qlike) / horizon
664
- var_val_qlike = float(v_qlike) / horizon
665
- var_test_r2 = float(t_r2)/horizon
666
- var_val_r2 = float(v_r2)/horizon
667
611
 
668
612
  if self.early_stopping:
669
613
  if len(self.val_errors) > 0:
670
- if self.loss == "MAE":
671
- current_min = min(self.val_mae)
672
- best_so_far = min(self.best_val_mae, current_min)
673
- no_improvement_count = len(self.val_mae) - self.val_mae.index(best_so_far) - 1
674
- elif self.loss == "MSE":
675
- current_min = min(self.val_errors)
676
- best_so_far = min(self.best_val_error, current_min)
677
- no_improvement_count = len(self.val_errors) - self.val_errors.index(best_so_far) - 1
678
- elif self.loss == "QLIKE":
679
- current_min = min(self.val_qlike)
680
- best_so_far = min(self.best_val_qlike, current_min)
681
- no_improvement_count = len(self.val_qlike) - self.val_qlike.index(best_so_far) - 1
682
- else:
683
- raise "Unavailable loss function"
614
+ current_min = min(self.val_errors)
615
+ best_so_far = min(self.best_val_error, current_min)
616
+ no_improvement_count = len(self.val_errors) - self.val_errors.index(best_so_far) - 1
684
617
 
685
618
  if no_improvement_count >= self.patience:
686
619
  self.dquantprint(f'Early stopping at {i} trees (no improvement for {self.patience} steps)')
687
620
  if show_results:
688
- self.V.show_errors(self.train_errors, self.val_errors,
689
- self.train_r2, self.val_r2)
621
+ self.V.show_errors(self.train_errors, self.val_errors)
690
622
  self.is_fitted = True
691
623
  return
692
624
 
693
625
  self.train_errors.append(var_test_error)
694
626
  self.val_errors.append(var_val_error)
695
- self.train_mae.append(var_test_mae)
696
- self.val_mae.append(var_val_mae)
697
- self.train_qlike.append(var_test_qlike)
698
- self.val_qlike.append(var_val_qlike)
699
- self.train_r2.append(var_test_r2)
700
- self.val_r2.append(var_val_r2)
701
- self.dquantprint('Train QLIKE: ', var_test_qlike)
702
- self.dquantprint('Validation QLIKE: ', var_val_qlike)
703
- self.dquantprint('Train MSE: ', var_test_error)
704
- self.dquantprint('Validation MSE: ', var_val_error)
705
- self.dquantprint('Train MAE: ', var_test_mae)
706
- self.dquantprint('Validation MAE: ', var_val_mae)
707
- self.dquantprint('Train r2: ', var_test_r2)
708
- self.dquantprint('Validation r2: ', var_val_r2)
627
+ self.dquantprint(f'Train {self.loss}: ', var_test_error)
628
+ self.dquantprint(f'Validation {self.loss}: ', var_val_error)
709
629
  self.dquantprint(f"{time.time() - start} seconds spent")
710
630
 
711
631
  except KeyboardInterrupt:
712
632
  self.dquantprint("\nTraining interrupted by Ctrl+C!")
713
633
 
714
634
  if show_results:
715
- self.V.show_errors(self.train_errors, self.val_errors, self.train_r2, self.val_r2)
635
+ self.V.show_errors(self.train_errors, self.val_errors)
716
636
 
717
637
  self.dquantprint('model is trained')
718
638
  self.is_fitted = True
@@ -777,11 +697,10 @@ class FichEn:
777
697
  X = X.astype(np.float32)
778
698
  if len(X.shape) == 1:
779
699
  X = X.reshape(1, -1)
780
- X_scaled = self.scaler.transform(X)
781
700
 
782
701
  predictions = [] #jj
783
702
  for model in self.models:
784
- pred = model.predict(X_scaled)
703
+ pred = model.predict(X)
785
704
  if len(pred.shape) > 0 and pred.shape[0] > 1:
786
705
  predictions.append(pred)
787
706
  else:
@@ -820,7 +739,7 @@ class FichEn:
820
739
 
821
740
 
822
741
  def show_train_results(self):
823
- self.V.show_errors(self.train_errors, self.val_errors, self.train_r2, self.val_r2)
742
+ self.V.show_errors(self.train_errors, self.val_errors)
824
743
 
825
744
 
826
745
  def save_mql5(self, name):
@@ -1479,175 +1398,11 @@ class FichEn:
1479
1398
 
1480
1399
 
1481
1400
 
1482
- class VolClustGB(FichEn):
1483
- def __init__(self, sett, early_stopping=True, output=True, loss="MAE"):
1484
- self.loss = loss
1485
- self.output = output
1486
- self.models = []
1487
- self.scaler = StandardScaler()
1488
- #self.scaler_y = StandardScaler()
1489
- self.X_shape = 0
1490
- self.is_fitted = False
1491
- self.onnx_load = False
1492
- self.early_stopping = early_stopping
1493
- self.V = Visualization('dark')
1494
- self.default_sett = {
1495
- 'loss': 'squared_error',
1496
- 'learning_rate': 0.01,
1497
- 'n_estimators': 1,
1498
- 'max_depth': 3,
1499
- 'min_samples_split': 5,
1500
- 'min_samples_leaf': 2,
1501
- 'subsample': 0.8,
1502
- 'random_state': 42,
1503
- 'warm_start': True
1504
- }
1505
- self.meta = {
1506
- "model_type": "gb",
1507
- "model_settings": self.default_sett,
1508
- "model_loss": loss
1509
- }
1510
- if sett == {}:
1511
- self.base_model = GradientBoostingRegressor(**self.default_sett)
1512
- else:
1513
- self.base_model = GradientBoostingRegressor(**sett)
1514
-
1515
- def save(self, name, type_to_save='default'):
1516
- if type_to_save == 'default':
1517
- os.makedirs(name, exist_ok=True)
1518
- initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
1519
-
1520
- file_path = os.path.join(name, f"{name}_features.json")
1521
- with open(file_path, 'w', encoding='utf-8') as f:
1522
- json.dump(self.feature_list, f, ensure_ascii=False, indent=2)
1523
-
1524
- self.meta = {
1525
- "model_type": "gb",
1526
- "model_settings": self.default_sett,
1527
- "input_bars": self.input_bars,
1528
- "horizon": self.horizon,
1529
- "trees_count": self.trees_count,
1530
- }
1531
- file_path = os.path.join(name, f"{name}_model_settings.json")
1532
- with open(file_path, 'w', encoding='utf-8') as f:
1533
- json.dump(self.meta, f, ensure_ascii=False, indent=2)
1534
-
1535
- if hasattr(self, 'scaler'):
1536
- scaler_path = os.path.join(name, f"{name}_scaler.pkl")
1537
- joblib.dump(self.scaler, scaler_path)
1538
-
1539
- """if hasattr(self, 'scaler_y'):
1540
- scaler_path = os.path.join(name, f"{name}_scaler_y.pkl")
1541
- joblib.dump(self.scaler_y, scaler_path)"""
1542
-
1543
- for i in range(len(self.models)):
1544
- onx = convert_sklearn(self.models[i], initial_types=initial_type, target_opset=12)
1545
-
1546
- file_path = os.path.join(name, f"{name}_{i}.onnx")
1547
-
1548
- with open(file_path, "wb") as f:
1549
- f.write(onx.SerializeToString())
1550
- elif type_to_save == 'mql5':
1551
- self.save_mql5(name)
1552
- onnx_dir = os.path.join(name, f"{name}_onnx")
1553
- os.makedirs(onnx_dir, exist_ok=True)
1554
- self.dquantprint(f"Directory for ONNX files created: {onnx_dir}")
1555
-
1556
- initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
1557
-
1558
- if hasattr(self, 'scaler') and self.scaler is not None:
1559
- scaler_path = os.path.join(onnx_dir, f"{name}_scaler.pkl")
1560
- joblib.dump(self.scaler, scaler_path)
1561
- self.dquantprint(f"Scaler is saved in {scaler_path}")
1562
-
1563
- """if hasattr(self, 'scaler_y') and self.scaler_y is not None:
1564
- scaler_path = os.path.join(onnx_dir, f"{name}_scaler_y.pkl")
1565
- joblib.dump(self.scaler_y, scaler_path)
1566
- self.dquantprint(f"Scalery is saved in {scaler_path}")"""
1567
-
1568
- for i in range(len(self.models)):
1569
- onx = convert_sklearn(self.models[i], initial_types=initial_type, target_opset=12)
1570
- file_path = os.path.join(onnx_dir, f"{name}_{i}.onnx")
1571
- with open(file_path, "wb") as f:
1572
- f.write(onx.SerializeToString())
1573
- self.dquantprint(f"Model {i} is saved in {file_path}")
1574
-
1575
- self.dquantprint(f"All operations in directory '{name}' completed successfully!")
1576
-
1577
-
1578
- def load(self, name):
1579
- self.loaded_models = []
1580
-
1581
- if not os.path.exists(name):
1582
- raise FileNotFoundError(f"Directory {name} not found")
1583
-
1584
- try:
1585
- file_path = os.path.join(name, f"{name}_features.json")
1586
- with open(file_path, 'r', encoding='utf-8') as f:
1587
- self.feature_list = json.load(f)
1588
- except FileNotFoundError:
1589
- self.dquantprint(f'Model {name} is not valid, file {name}_features.json is not found')
1590
- return
1591
-
1592
- try:
1593
- file_path = os.path.join(name, f"{name}_model_settings.json")
1594
- with open(file_path, 'r', encoding='utf-8') as f:
1595
- self.meta = json.load(f)
1596
- except FileNotFoundError:
1597
- self.dquantprint(f'Model {name} is not valid, file {name}_model_settings.json is not found')
1598
- return
1599
-
1600
- if self.meta['model_type'] != 'gb':
1601
- raise ValueError(f"Wrong model type, expected gb and not a {self.meta['model_type']}")
1602
-
1603
- scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
1604
- if scaler_files:
1605
- scaler_path = os.path.join(name, scaler_files[0])
1606
- self.scaler = joblib.load(scaler_path)
1607
-
1608
- """scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler_y.pkl')]
1609
- if scaler_files:
1610
- scaler_path = os.path.join(name, scaler_files[0])"""
1611
- #self.scaler_y = joblib.load(scaler_path)
1612
-
1613
- model_files = [f for f in os.listdir(name) if f.endswith('.onnx')]
1614
-
1615
- if not model_files:
1616
- raise FileNotFoundError(f"No .onnx files found in directory {name}")
1617
-
1618
- model_files.sort()
1619
- ml = len(model_files)
1620
- numbers = {}
1621
- for f in model_files:
1622
- match = re.search(r'_(\d+)\.onnx$', f)
1623
- if match:
1624
- num = int(match.group(1))
1625
- numbers[num] = f
1626
-
1627
- model_files = []
1628
- for i in range(ml):
1629
- model_files.append(numbers[i])
1630
-
1631
-
1632
- for model_file in model_files:
1633
- model_path = os.path.join(name, model_file)
1634
- session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
1635
-
1636
- input_info = session.get_inputs()[0]
1637
- self.loaded_models.append(session)
1638
-
1639
- self.onnx_load = True
1640
-
1641
-
1642
-
1643
-
1644
1401
  class VolClustXGB(FichEn):
1645
1402
  def __init__(self, sett, early_stopping=True, output=True, loss="QLIKE"):
1646
1403
  self.loss = loss
1647
1404
  self.output = output
1648
1405
  self.models = []
1649
- self.scaler = StandardScaler()
1650
- #self.scaler_y = StandardScaler()
1651
1406
  self.X_shape = 0
1652
1407
  self.is_fitted = False
1653
1408
  self.onnx_load = False
@@ -1713,9 +1468,9 @@ class VolClustXGB(FichEn):
1713
1468
  with open(file_path, 'w', encoding='utf-8') as f:
1714
1469
  json.dump(self.meta, f, ensure_ascii=False, indent=2)
1715
1470
 
1716
- if hasattr(self, 'scaler'):
1471
+ """if hasattr(self, 'scaler'):
1717
1472
  scaler_path = os.path.join(name, f"{name}_scaler.pkl")
1718
- joblib.dump(self.scaler, scaler_path)
1473
+ joblib.dump(self.scaler, scaler_path)"""
1719
1474
 
1720
1475
  """if hasattr(self, 'scaler_y'):
1721
1476
  scaler_path = os.path.join(name, f"{name}_scaler_y.pkl")
@@ -1734,10 +1489,10 @@ class VolClustXGB(FichEn):
1734
1489
 
1735
1490
  initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
1736
1491
 
1737
- if hasattr(self, 'scaler') and self.scaler is not None:
1492
+ """if hasattr(self, 'scaler') and self.scaler is not None:
1738
1493
  scaler_path = os.path.join(onnx_dir, f"{name}_scaler.pkl")
1739
1494
  joblib.dump(self.scaler, scaler_path)
1740
- self.dquantprint(f"Scaler is saved in {scaler_path}")
1495
+ self.dquantprint(f"Scaler is saved in {scaler_path}")"""
1741
1496
 
1742
1497
  """if hasattr(self, 'scaler_y') and self.scaler_y is not None:
1743
1498
  scaler_path = os.path.join(onnx_dir, f"{name}_scaler_y.pkl")
@@ -1779,10 +1534,10 @@ class VolClustXGB(FichEn):
1779
1534
  if self.meta['model_type'] != 'xgb':
1780
1535
  raise ValueError(f"Wrong model type, expected xgb and not a {self.meta['model_type']}")
1781
1536
 
1782
- scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
1537
+ """scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
1783
1538
  if scaler_files:
1784
1539
  scaler_path = os.path.join(name, scaler_files[0])
1785
- self.scaler = joblib.load(scaler_path)
1540
+ self.scaler = joblib.load(scaler_path)"""
1786
1541
 
1787
1542
  """scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler_y.pkl')]
1788
1543
  if scaler_files:
@@ -1823,8 +1578,6 @@ class VolClustLightGBM(FichEn):
1823
1578
  self.loss = loss
1824
1579
  self.output = output
1825
1580
  self.models = []
1826
- self.scaler = StandardScaler()
1827
- #self.scaler_y = StandardScaler()
1828
1581
  self.X_shape = 0
1829
1582
  self.is_fitted = False
1830
1583
  self.onnx_load = False
@@ -1892,9 +1645,9 @@ class VolClustLightGBM(FichEn):
1892
1645
  with open(file_path, 'w', encoding='utf-8') as f:
1893
1646
  json.dump(self.meta, f, ensure_ascii=False, indent=2)
1894
1647
 
1895
- if hasattr(self, 'scaler'):
1648
+ """if hasattr(self, 'scaler'):
1896
1649
  scaler_path = os.path.join(name, f"{name}_scaler.pkl")
1897
- joblib.dump(self.scaler, scaler_path)
1650
+ joblib.dump(self.scaler, scaler_path)"""
1898
1651
 
1899
1652
  """if hasattr(self, 'scaler_y'):
1900
1653
  scaler_path = os.path.join(name, f"{name}_scaler_y.pkl")
@@ -1913,10 +1666,10 @@ class VolClustLightGBM(FichEn):
1913
1666
 
1914
1667
  initial_type = [('float_input', FloatTensorType([None, self.X_shape]))]
1915
1668
 
1916
- if hasattr(self, 'scaler') and self.scaler is not None:
1669
+ """if hasattr(self, 'scaler') and self.scaler is not None:
1917
1670
  scaler_path = os.path.join(onnx_dir, f"{name}_scaler.pkl")
1918
1671
  joblib.dump(self.scaler, scaler_path)
1919
- self.dquantprint(f"Scaler is saved in {scaler_path}")
1672
+ self.dquantprint(f"Scaler is saved in {scaler_path}")"""
1920
1673
 
1921
1674
  """if hasattr(self, 'scaler_y') and self.scaler_y is not None:
1922
1675
  scaler_path = os.path.join(onnx_dir, f"{name}_scaler_y.pkl")
@@ -1958,10 +1711,10 @@ class VolClustLightGBM(FichEn):
1958
1711
  if self.meta['model_type'] != 'lgbm':
1959
1712
  raise ValueError(f"Wrong model type, expected lgbm and not a {self.meta['model_type']}")
1960
1713
 
1961
- scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
1714
+ """scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler.pkl')]
1962
1715
  if scaler_files:
1963
1716
  scaler_path = os.path.join(name, scaler_files[0])
1964
- self.scaler = joblib.load(scaler_path)
1717
+ self.scaler = joblib.load(scaler_path)"""
1965
1718
 
1966
1719
  """scaler_files = [f for f in os.listdir(name) if f.endswith('_scaler_y.pkl')]
1967
1720
  if scaler_files:
@@ -314,8 +314,8 @@ class Visualization:
314
314
 
315
315
  plt.show()
316
316
 
317
- def forward_validation_errors(self, mse_errors, r2_errors, save_path=None):
318
- fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
317
+ def forward_validation_errors(self, mse_errors, save_path=None):
318
+ fig, (ax1) = plt.subplots(1, 1, figsize=(15, 6))
319
319
 
320
320
  ax1.plot(list(mse_errors), label='Train Loss',
321
321
  color=self.config['colors']['primary'])
@@ -327,17 +327,6 @@ class Visualization:
327
327
  self.__style_axes(ax1)
328
328
  self.__style_legend(ax1)
329
329
 
330
- ax2.plot(list(r2_errors), label='Train R²',
331
- color=self.config['colors']['primary'])
332
-
333
- ax2.set_xlabel('Trees')
334
- ax2.set_ylabel('R² Score')
335
- ax2.set_title('R² Score over Trees')
336
- ax2.grid(True)
337
-
338
- self.__style_axes(ax2)
339
- self.__style_legend(ax2)
340
-
341
330
  plt.tight_layout()
342
331
 
343
332
  if save_path:
@@ -346,8 +335,8 @@ class Visualization:
346
335
  plt.show()
347
336
 
348
337
 
349
- def show_errors(self, train_errors, val_errors, train_r2, val_r2, save_path=None):
350
- fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
338
+ def show_errors(self, train_errors, val_errors, save_path=None):
339
+ fig, (ax1) = plt.subplots(1, 1, figsize=(15, 6))
351
340
 
352
341
  ax1.plot(list(train_errors), label='Train Loss',
353
342
  color=self.config['colors']['primary'])
@@ -360,19 +349,6 @@ class Visualization:
360
349
 
361
350
  self.__style_axes(ax1)
362
351
  self.__style_legend(ax1)
363
-
364
- ax2.plot(list(train_r2), label='Train R²',
365
- color=self.config['colors']['primary'])
366
- ax2.plot(list(val_r2), label='Validation R²',
367
- color=self.config['colors']['secondary'])
368
- ax2.set_xlabel('Trees')
369
- ax2.set_ylabel('R² Score')
370
- ax2.set_title('R² Score over Trees')
371
- ax2.grid(True)
372
-
373
- self.__style_axes(ax2)
374
- self.__style_legend(ax2)
375
-
376
352
  plt.tight_layout()
377
353
 
378
354
  if save_path:
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes