sciml 0.0.10__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sciml
3
- Version: 0.0.10
4
- Summary: draw and basic calculations/conversions
3
+ Version: 0.0.12
4
+ Summary: Machine/deep learning models and toolboxes for geosciences.
5
5
  Home-page: https://github.com/soonyenju/sciml
6
6
  Author: Songyan Zhu
7
7
  Author-email: zhusy93@gmail.com
8
8
  License: MIT Licence
9
- Keywords: Scientific machine learning wrappers
9
+ Keywords: Geospatial scientific ML
10
10
  Platform: any
11
11
  License-File: LICENSE
12
12
 
@@ -0,0 +1,530 @@
1
+ import numpy as np
2
+ import copy
3
+ import itertools
4
+ from scipy import ndimage
5
+ from xgboost import XGBRegressor
6
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
7
+ from sklearn.model_selection import train_test_split
8
+
9
+ class SmartForest4D:
10
+ """
11
+ SmartForest4D is an ensemble learning model designed to handle complex 4D input data
12
+ (samples, time, spatial, features). It integrates ideas from gradient-boosted decision trees
13
+ (XGBoost) with LSTM-style forget gates and spatial max pooling.
14
+
15
+ The model builds layers of regressors, each layer taking the previous output as part of its
16
+ input (deep forest style). A forget gate mechanism is applied along the time dimension to
17
+ emphasize recent temporal information. Spatial max pooling is used to reduce dimensionality
18
+ across spatial units before flattening and feeding into the regressors.
19
+
20
+ Parameters:
21
+ -----------
22
+ n_estimators_per_layer : int
23
+ Number of XGBoost regressors per layer.
24
+
25
+ max_layers : int
26
+ Maximum number of layers in the deep forest.
27
+
28
+ early_stopping_rounds : int
29
+ Number of rounds without improvement on the validation set before early stopping.
30
+
31
+ param_grid : dict
32
+ Dictionary of hyperparameter lists to search over for XGBoost.
33
+
34
+ use_gpu : bool
35
+ Whether to use GPU for training XGBoost models.
36
+
37
+ gpu_id : int
38
+ GPU device ID to use if use_gpu is True.
39
+
40
+ kernel: np.ndarray
41
+ Convolutional kernel for spatial processing.
42
+ # ===============================
43
+ # 0. Do nothing
44
+ # ===============================
45
+
46
+ identity_kernel = np.array([
47
+ [0, 0, 0],
48
+ [0, 1, 0],
49
+ [0, 0, 0]
50
+ ])
51
+
52
+ # ===============================
53
+ # 1. Sobel Edge Detection Kernels
54
+ # ===============================
55
+
56
+ sobel_x = np.array([
57
+ [-1, 0, 1],
58
+ [-2, 0, 2],
59
+ [-1, 0, 1]
60
+ ])
61
+
62
+ sobel_y = np.array([
63
+ [-1, -2, -1],
64
+ [ 0, 0, 0],
65
+ [ 1, 2, 1]
66
+ ])
67
+
68
+ # ===============================
69
+ # 2. Gaussian Blur Kernel (3x3)
70
+ # ===============================
71
+ gaussian_kernel = (1/16) * np.array([
72
+ [1, 2, 1],
73
+ [2, 4, 2],
74
+ [1, 2, 1]
75
+ ])
76
+
77
+ # ===============================
78
+ # 3. Morphological Structuring Element (3x3 cross)
79
+ # Used in binary dilation/erosion
80
+ # ===============================
81
+ morph_kernel = np.array([
82
+ [0, 1, 0],
83
+ [1, 1, 1],
84
+ [0, 1, 0]
85
+ ])
86
+
87
+ # ===============================
88
+ # 4. Sharpening Kernel
89
+ # Enhances edges and contrast
90
+ # ===============================
91
+ sharpen_kernel = np.array([
92
+ [ 0, -1, 0],
93
+ [-1, 5, -1],
94
+ [ 0, -1, 0]
95
+ ])
96
+
97
+ # ===============================
98
+ # 5. Embossing Kernel
99
+ # Creates a 3D-like shadowed effect
100
+ # ===============================
101
+ emboss_kernel = np.array([
102
+ [-2, -1, 0],
103
+ [-1, 1, 1],
104
+ [ 0, 1, 2]
105
+ ])
106
+
107
+ spatial_h : int
108
+ The height of the 2D grid for the flattened spatial dimension.
109
+
110
+ spatial_w : int
111
+ The width of the 2D grid for the flattened spatial dimension.
112
+
113
+ forget_factor : float
114
+ Exponential decay rate applied along the time axis. Higher values mean stronger forgetting.
115
+
116
+ verbose : int
117
+ Verbosity level for training output.
118
+ eval_metric : str
119
+ Statistical metric for evaluating model performance.
120
+
121
+ Attributes:
122
+ -----------
123
+ layers : list
124
+ List of trained layers, each containing a list of regressors.
125
+
126
+ best_model : list
127
+ The set of layers corresponding to the best validation RMSE seen during training.
128
+
129
+ best_score : float
130
+ The best metric e.g., lowest RMSE achieved on the validation set.
131
+
132
+ Methods:
133
+ --------
134
+ fit(X, y, X_val=None, y_val=None):
135
+ Train the SmartForest4D model on the given 4D input data.
136
+
137
+ predict(X):
138
+ Predict targets for new 4D input data using the trained model.
139
+
140
+ get_best_model():
141
+ Return the best set of layers and corresponding RMSE.
142
+
143
+ Notes:
144
+ ------
145
+ - The product of spatial_h and spatial_w must equal spatial_size (spatial_h * spatial_w = spatial_size).
146
+
147
+ Example:
148
+ --------
149
+ >>> model = SmartForest4D(n_estimators_per_layer=5, max_layers=10, early_stopping_rounds=3, forget_factor=0.3, verbose=1)
150
+ >>> model.fit(X_train, y_train, X_val, y_val)
151
+ >>> y_pred = model.predict(X_val)
152
+ >>> best_model, best_rmse = model.get_best_model()
153
+ """
154
+ def __init__(self, n_estimators_per_layer=5, max_layers=10, early_stopping_rounds=3, param_grid=None,
155
+ use_gpu=False, gpu_id=0, kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]]), spatial_h=None, spatial_w=None,
156
+ forget_factor=0.0, verbose=1, eval_metric='rmse'):
157
+ self.n_estimators_per_layer = n_estimators_per_layer
158
+ self.max_layers = max_layers
159
+ self.early_stopping_rounds = early_stopping_rounds
160
+ self.param_grid = param_grid or {
161
+ "objective": ["reg:squarederror"],
162
+ "random_state": [42],
163
+ 'n_estimators': [100],
164
+ 'max_depth': [6],
165
+ 'min_child_weight': [4],
166
+ 'subsample': [0.8],
167
+ 'colsample_bytree': [0.8],
168
+ 'gamma': [0],
169
+ 'reg_alpha': [0],
170
+ 'reg_lambda': [1],
171
+ 'learning_rate': [0.05],
172
+ }
173
+ self.use_gpu = use_gpu
174
+ self.gpu_id = gpu_id
175
+ self.kernel = kernel
176
+ self.spatial_h = spatial_h
177
+ self.spatial_w = spatial_w
178
+ self.forget_factor = forget_factor
179
+ self.layers = []
180
+ self.best_model = None
181
+ self.verbose = verbose
182
+ self.eval_metric = eval_metric.lower()
183
+ self.best_score = float("inf") if self.eval_metric != 'r2' else float("-inf")
184
+ if (self.spatial_h is None) or (self.spatial_w is None):
185
+ raise ValueError("Please specify spatial_h and spatial_w")
186
+
187
+ def _evaluate(self, y_true, y_pred):
188
+ if self.eval_metric == 'rmse':
189
+ return np.sqrt(mean_squared_error(y_true, y_pred))
190
+ elif self.eval_metric == 'nrmse':
191
+ return np.sqrt(mean_squared_error(y_true, y_pred)) / np.mean(np.abs(y_true))
192
+ elif self.eval_metric == 'mae':
193
+ return mean_absolute_error(y_true, y_pred)
194
+ elif self.eval_metric == 'mape':
195
+ return np.mean(np.abs((y_true - y_pred) / np.clip(np.abs(y_true), 1e-8, None))) * 100
196
+ elif self.eval_metric == 'r2':
197
+ return r2_score(y_true, y_pred)
198
+ else:
199
+ raise ValueError(f"Unknown evaluation metric: {self.eval_metric}")
200
+
201
+ def _get_param_combinations(self):
202
+ keys, values = zip(*self.param_grid.items())
203
+ return [dict(zip(keys, v)) for v in itertools.product(*values)]
204
+
205
+ def _prepare_input(self, X, y=None, apply_forget=False, layer_index=0):
206
+ if X.ndim == 2:
207
+ X = X[:, np.newaxis, np.newaxis, :]
208
+ elif X.ndim == 3:
209
+ X = X[:, :, np.newaxis, :]
210
+ elif X.ndim == 4:
211
+ pass
212
+ else:
213
+ raise ValueError("Input must be 2D, 3D, or 4D.")
214
+
215
+ n_samples, n_time, n_spatial, n_features = X.shape
216
+
217
+ if apply_forget and self.forget_factor > 0:
218
+ decay = np.exp(-self.forget_factor * np.arange(n_time))[::-1]
219
+ decay = decay / decay.sum()
220
+ decay = decay.reshape(1, n_time, 1, 1)
221
+ X = X * decay
222
+
223
+ if n_spatial != 1:
224
+ if self.spatial_h * self.spatial_w != n_spatial: raise ValueError("spatial_h * spatial_w != n_spatial")
225
+ X_out = np.zeros_like(X)
226
+ for sample in range(X.shape[0]):
227
+ for t in range(X.shape[1]):
228
+ for f in range(X.shape[3]):
229
+ spatial_2d = X[sample, t, :, f].reshape(self.spatial_h, self.spatial_w)
230
+ filtered = ndimage.convolve(spatial_2d, self.kernel, mode='constant', cval=0.0)
231
+ X_out[sample, t, :, f] = filtered.reshape(n_spatial)
232
+ X = X_out; del(X_out)
233
+ X_pooled = X.max(axis=2)
234
+ X_flattened = X_pooled.reshape(n_samples, -1)
235
+ return X_flattened
236
+
237
+ def _fit_layer(self, X, y, X_val=None, y_val=None, layer_index=0):
238
+ layer = []
239
+ layer_outputs = []
240
+ param_combos = self._get_param_combinations()
241
+
242
+ for i in range(self.n_estimators_per_layer):
243
+ best_metric = float('inf')
244
+ best_model = None
245
+
246
+ for params in param_combos:
247
+ if self.use_gpu:
248
+ params['tree_method'] = 'hist'
249
+ params['device'] = 'cuda'
250
+
251
+ params = params.copy()
252
+ params['random_state'] = i
253
+
254
+ model = XGBRegressor(**params)
255
+ model.fit(X, y)
256
+
257
+ if X_val is not None:
258
+ preds_val = model.predict(X_val)
259
+ metric = self._evaluate(y_val, preds_val)
260
+ if metric < best_metric:
261
+ best_metric = metric
262
+ best_model = model
263
+ else:
264
+ best_model = model
265
+
266
+ preds = best_model.predict(X).reshape(-1, 1)
267
+ layer.append(best_model)
268
+ layer_outputs.append(preds)
269
+
270
+ output = np.hstack(layer_outputs)
271
+ return layer, output
272
+
273
+ def fit(self, X, y, X_val=None, y_val=None):
274
+ y = y.ravel()
275
+ X_current = self._prepare_input(X, apply_forget=True)
276
+ X_val_current = self._prepare_input(X_val, apply_forget=True) if X_val is not None else None
277
+
278
+ no_improve_rounds = 0
279
+
280
+ for layer_index in range(self.max_layers):
281
+ if self.verbose:
282
+ print(f"Training Layer {layer_index + 1}")
283
+
284
+ layer, output = self._fit_layer(X_current, y, X_val_current, y_val, layer_index)
285
+ self.layers.append(layer)
286
+ X_current = np.hstack([X_current, output])
287
+
288
+ if X_val is not None:
289
+ val_outputs = []
290
+ for reg in layer:
291
+ n_features = reg.n_features_in_
292
+ preds = reg.predict(X_val_current[:, :n_features]).reshape(-1, 1)
293
+ val_outputs.append(preds)
294
+ val_output = np.hstack(val_outputs)
295
+ X_val_current = np.hstack([X_val_current, val_output])
296
+
297
+ y_pred = self.predict(X_val)
298
+ score = self._evaluate(y_val, y_pred)
299
+ if self.verbose:
300
+ print(f"Validation {self.eval_metric.upper()}: {score:.4f}")
301
+
302
+ improvement = (score < self.best_score) if self.eval_metric != 'r2' else (score > self.best_score)
303
+ if improvement:
304
+ self.best_score = score
305
+ self.best_model = copy.deepcopy(self.layers)
306
+ no_improve_rounds = 0
307
+ if self.verbose:
308
+ print(f"\u2705 New best {self.eval_metric.upper()}: {self.best_score:.4f}")
309
+ else:
310
+ no_improve_rounds += 1
311
+ if no_improve_rounds >= self.early_stopping_rounds:
312
+ if self.verbose:
313
+ print("Early stopping triggered.")
314
+ break
315
+
316
+ def predict(self, X):
317
+ X_current = self._prepare_input(X, apply_forget=True)
318
+
319
+ for layer in self.layers:
320
+ layer_outputs = []
321
+ for reg in layer:
322
+ n_features = reg.n_features_in_
323
+ preds = reg.predict(X_current[:, :n_features]).reshape(-1, 1)
324
+ layer_outputs.append(preds)
325
+ output = np.hstack(layer_outputs)
326
+ X_current = np.hstack([X_current, output])
327
+
328
+ final_outputs = []
329
+ for reg in self.layers[-1]:
330
+ n_features = reg.n_features_in_
331
+ final_outputs.append(reg.predict(X_current[:, :n_features]).reshape(-1, 1))
332
+ return np.mean(np.hstack(final_outputs), axis=1)
333
+
334
+ def get_best_model(self):
335
+ return self.best_model, self.best_score
336
+
337
+ """
338
+ # ============================== Test Example ==============================
339
+ import numpy as np
340
+ import copy
341
+ import itertools
342
+ from scipy import ndimage
343
+ from xgboost import XGBRegressor
344
+ from sklearn.metrics import mean_squared_error
345
+ from sklearn.model_selection import train_test_split
346
+
347
+ # Generate synthetic 4D data: (samples, time, spatial, features)
348
+ # time order is like [t (today), t - 1 (yesterday), t -2, ...]
349
+ n_samples = 200
350
+ n_time = 5
351
+ n_spatial = 4
352
+ n_features = 5
353
+
354
+ np.random.seed(42)
355
+ X = np.random.rand(n_samples, n_time, n_spatial, n_features)
356
+ y = X[:, :3, :2, :4].mean(axis=(1, 2, 3)) + 0.1 * np.random.randn(n_samples)
357
+ y = y.ravel()
358
+
359
+ # Split
360
+ X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
361
+
362
+ # Train model
363
+ model = SmartForest4D(
364
+ n_estimators_per_layer=5,
365
+ max_layers=20,
366
+ early_stopping_rounds=5,
367
+ spatial_h = 2,
368
+ spatial_w = 2,
369
+ forget_factor=0.1,
370
+ verbose=1
371
+ )
372
+ model.fit(X_train, y_train, X_val, y_val)
373
+
374
+ # Predict
375
+ y_pred = model.predict(X_val)
376
+ rmse = np.sqrt(mean_squared_error(y_val, y_pred))
377
+ print("\n✅ Final RMSE on validation set:", rmse)
378
+
379
+
380
+ # Output best model and RMSE
381
+ best_model, best_rmse = model.get_best_model()
382
+ print("\nBest validation RMSE:", best_rmse)
383
+ """
384
+
385
+ # ============================================================================================================================================================
386
+ # Function mode
387
+
388
+ import tensorflow as tf
389
+ from tensorflow import keras
390
+ from tensorflow.keras import layers
391
+ from tensorflow.keras.models import load_model
392
+
393
+ def srcnn(learning_rate=0.001):
394
+ """
395
+ Builds and compiles a Super-Resolution Convolutional Neural Network (SRCNN) model
396
+ that fuses features from both low-resolution and high-resolution images.
397
+
398
+ This model uses two parallel input streams:
399
+ - A low-resolution input which undergoes upscaling through convolutional layers.
400
+ - A high-resolution input from which texture features are extracted and fused with the low-resolution stream.
401
+
402
+ Args:
403
+ save_path (str, optional): Path to save the compiled model. If None, the model is not saved.
404
+ learning_rate (float): Learning rate for the Adam optimizer.
405
+
406
+ Returns:
407
+ keras.Model: A compiled Keras model ready for training.
408
+ """
409
+ # Input layers
410
+ lowres_input = layers.Input(shape=(None, None, 1)) # Low-resolution input
411
+ highres_input = layers.Input(shape=(None, None, 1)) # High-resolution image
412
+
413
+ # Feature extraction from high-resolution image
414
+ highres_features = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(highres_input)
415
+ highres_features = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(highres_features)
416
+
417
+ # Processing low-resoltuion input
418
+ x = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(lowres_input)
419
+ x = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(x)
420
+
421
+ # Fusion of high-resolution image textures
422
+ fusion = layers.Concatenate()([x, highres_features])
423
+ fusion = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(fusion)
424
+ fusion = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(fusion)
425
+
426
+ # Output
427
+ output = layers.Conv2D(1, (3, 3), activation="sigmoid", padding="same")(fusion)
428
+
429
+ model = keras.Model(inputs=[lowres_input, highres_input], outputs=output)
430
+ model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
431
+
432
+ return model
433
+
434
+ def print_model(model):
435
+ return model.summary()
436
+
437
+ def train(lowres_data, highres_data, epochs=100, batch_size=1, verbose=1, save_path=None):
438
+ model = srcnn()
439
+ # Train SRCNN
440
+ model.fit([lowres_data, highres_data], highres_data, epochs=epochs, batch_size=batch_size, verbose=verbose)
441
+ # Save the complete model
442
+ # Recommended in newer versions of Keras (TensorFlow 2.11+): e.g., 'texture_fusion_model.keras'
443
+ if save_path: model.save(save_path)
444
+
445
+ def apply(model, lowres_data_app, highres_data):
446
+ super_resolved = model.predict([lowres_data_app, highres_data]).squeeze()
447
+ super_resolved = xr.DataArray(
448
+ super_resolved,
449
+ dims = ("latitude", "longitude"),
450
+ coords={"latitude": highres_data.latitude, "longitude": highres_data.longitude},
451
+ name="super_res"
452
+ )
453
+ return super_resolved
454
+
455
+ def load_model(save_path):
456
+ model = load_model('texture_fusion_model.keras')
457
+
458
+ # ------------------------------------------------------------------------------------------------------------------------------------------------------------
459
+ # Class mode
460
+
461
+ import numpy as np
462
+ import xarray as xr
463
+ import tensorflow as tf
464
+ from tensorflow import keras
465
+ from tensorflow.keras import layers
466
+ from tensorflow.keras.callbacks import EarlyStopping
467
+
468
+ class TextureFusionSRCNN:
469
+ def __init__(self, learning_rate=0.001):
470
+ self.learning_rate = learning_rate
471
+ self.model = self._build_model()
472
+
473
+ def _build_model(self):
474
+ # Input layers
475
+ lowres_input = layers.Input(shape=(None, None, 1)) # Low-resolution input
476
+ highres_input = layers.Input(shape=(None, None, 1)) # High-resolution image
477
+
478
+ # Feature extraction from high-resolution image
479
+ highres_features = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(highres_input)
480
+ highres_features = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(highres_features)
481
+
482
+ # Processing low-resolution input
483
+ x = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(lowres_input)
484
+ x = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(x)
485
+
486
+ # Fusion of high-resolution image textures
487
+ fusion = layers.Concatenate()([x, highres_features])
488
+ fusion = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(fusion)
489
+ fusion = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(fusion)
490
+
491
+ # Output
492
+ output = layers.Conv2D(1, (3, 3), activation="sigmoid", padding="same")(fusion)
493
+
494
+ model = keras.Model(inputs=[lowres_input, highres_input], outputs=output)
495
+ model.compile(optimizer=keras.optimizers.Adam(learning_rate=self.learning_rate), loss="mse")
496
+
497
+ return model
498
+
499
+ def summary(self):
500
+ return self.model.summary()
501
+
502
+ def train(self, lowres_data, highres_data, epochs=100, batch_size=1, verbose=1, save_path=None):
503
+ early_stop = EarlyStopping(
504
+ monitor='loss', # You can change to 'val_loss' if you add validation
505
+ patience=10, # Number of epochs with no improvement after which training will be stopped
506
+ restore_best_weights=True
507
+ )
508
+
509
+ self.model.fit(
510
+ [lowres_data, highres_data], highres_data,
511
+ epochs=epochs,
512
+ batch_size=batch_size,
513
+ verbose=verbose,
514
+ callbacks=[early_stop]
515
+ )
516
+
517
+ if save_path:
518
+ self.model.save(save_path)
519
+
520
+ def apply(self, lowres_data_app, highres_data):
521
+ super_resolved = self.model.predict([lowres_data_app, highres_data]).squeeze()
522
+ return super_resolved
523
+
524
+ @staticmethod
525
+ def load(save_path):
526
+ model = keras.models.load_model(save_path)
527
+ instance = TextureFusionSRCNN()
528
+ instance.model = model
529
+ return instance
530
+
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sciml
3
- Version: 0.0.10
4
- Summary: draw and basic calculations/conversions
3
+ Version: 0.0.12
4
+ Summary: Machine/deep learning models and toolboxes for geosciences.
5
5
  Home-page: https://github.com/soonyenju/sciml
6
6
  Author: Songyan Zhu
7
7
  Author-email: zhusy93@gmail.com
8
8
  License: MIT Licence
9
- Keywords: Scientific machine learning wrappers
9
+ Keywords: Geospatial scientific ML
10
10
  Platform: any
11
11
  License-File: LICENSE
12
12
 
@@ -13,9 +13,9 @@ from setuptools import setup, find_packages
13
13
 
14
14
  setup(
15
15
  name = "sciml",
16
- version = "0.0.10",
17
- keywords = ("Scientific machine learning wrappers"),
18
- description = "draw and basic calculations/conversions",
16
+ version = "0.0.12",
17
+ keywords = ("Geospatial scientific ML"),
18
+ description = "Machine/deep learning models and toolboxes for geosciences.",
19
19
  long_description = "coming soon",
20
20
  license = "MIT Licence",
21
21
 
@@ -1,276 +0,0 @@
1
- import numpy as np
2
- import copy
3
- import itertools
4
- import warnings
5
- from xgboost import XGBRegressor
6
- from sklearn.metrics import mean_squared_error
7
- from sklearn.model_selection import train_test_split
8
-
9
- class SmartForest:
10
- """
11
- SmartForest: A deep, intelligent decision forest model for complex sequential and tabular data.
12
-
13
- SmartForest blends ideas from deep forests (cascade forest structures), LSTM-style forget gates,
14
- and ensemble learning using XGBoost. It is especially suited for time series or structured tabular data
15
- where layer-wise feature expansion and memory-inspired filtering can enhance performance.
16
-
17
- Key Features:
18
- -------------
19
- - Deep cascade of XGBoost regressors
20
- - Optional Multi-Grained Scanning (MGS) for local feature extraction
21
- - Forget-gate-inspired mechanism to regulate information flow across layers
22
- - Early stopping to prevent overfitting
23
- - Full retention of best-performing model (lowest validation RMSE)
24
-
25
- Parameters:
26
- -----------
27
- n_estimators_per_layer : int
28
- Number of XGBoost regressors per layer.
29
-
30
- max_layers : int
31
- Maximum number of layers (depth) in the model.
32
-
33
- early_stopping_rounds : int
34
- Number of layers with no improvement before early stopping is triggered.
35
-
36
- param_grid : dict
37
- Grid of XGBoost hyperparameters to search over.
38
-
39
- use_gpu : bool
40
- If True, use GPU-accelerated training (CUDA required).
41
-
42
- gpu_id : int
43
- ID of GPU to use (if use_gpu=True).
44
-
45
- window_sizes : list of int
46
- Enables Multi-Grained Scanning if non-empty, with specified sliding window sizes.
47
-
48
- forget_factor : float in [0, 1]
49
- Simulates LSTM-style forget gate; higher values forget more past information.
50
-
51
- verbose : int
52
- Verbosity level (0 = silent, 1 = progress updates).
53
-
54
- Methods:
55
- --------
56
- fit(X, y, X_val=None, y_val=None):
57
- Train the SmartForest model layer by layer, using optional validation for early stopping.
58
-
59
- predict(X):
60
- Make predictions on new data using the trained cascade structure.
61
-
62
- get_best_model():
63
- Returns a copy of the best model and the corresponding RMSE from validation.
64
-
65
- Example:
66
- --------
67
- >>> model = SmartForest(n_estimators_per_layer=5, max_layers=10, window_sizes=[2, 3], forget_factor=0.2)
68
- >>> model.fit(X_train, y_train, X_val, y_val)
69
- >>> y_pred = model.predict(X_val)
70
- >>> best_model, best_rmse = model.get_best_model()
71
- """
72
- def __init__(self, n_estimators_per_layer = 5, max_layers = 10, early_stopping_rounds = 3, param_grid = None,
73
- use_gpu = False, gpu_id = 0, window_sizes = [], forget_factor = 0, verbose = 1):
74
- self.n_estimators_per_layer = n_estimators_per_layer
75
- self.max_layers = max_layers
76
- self.early_stopping_rounds = early_stopping_rounds
77
- self.param_grid = param_grid or {
78
- "objective": ["reg:squarederror"],
79
- "random_state": [42],
80
- 'seed': [0],
81
- 'n_estimators': [100],
82
- 'max_depth': [6],
83
- 'min_child_weight': [4],
84
- 'subsample': [0.8],
85
- 'colsample_bytree': [0.8],
86
- 'gamma': [0],
87
- 'reg_alpha': [0],
88
- 'reg_lambda': [1],
89
- 'learning_rate': [0.05],
90
- }
91
- self.use_gpu = use_gpu
92
- self.gpu_id = gpu_id
93
- self.window_sizes = window_sizes
94
- self.forget_factor = forget_factor
95
- self.layers = []
96
- self.best_model = None
97
- self.best_rmse = float("inf")
98
- self.verbose = verbose
99
-
100
- def _get_param_combinations(self):
101
- keys, values = zip(*self.param_grid.items())
102
- return [dict(zip(keys, v)) for v in itertools.product(*values)]
103
-
104
- def _multi_grained_scanning(self, X, y):
105
- new_features = []
106
- for window_size in self.window_sizes:
107
- if X.shape[1] < window_size:
108
- continue
109
- for start in range(X.shape[1] - window_size + 1):
110
- window = X[:, start:start + window_size]
111
- if y is None:
112
- new_features.append(window)
113
- continue
114
-
115
- param_combos = self._get_param_combinations()
116
- for params in param_combos:
117
- if self.use_gpu:
118
- params['tree_method'] = 'hist'
119
- params['device'] = 'cuda'
120
- model = XGBRegressor(**params)
121
- model.fit(window, y)
122
- preds = model.predict(window).reshape(-1, 1)
123
- new_features.append(preds)
124
- return np.hstack(new_features) if new_features else X
125
-
126
- def _apply_forget_gate(self, X, layer_index):
127
- forget_weights = np.random.rand(X.shape[1]) * self.forget_factor
128
- return X * (1 - forget_weights)
129
-
130
- def _fit_layer(self, X, y, X_val=None, y_val=None, layer_index=0):
131
- layer = []
132
- layer_outputs = []
133
- param_combos = self._get_param_combinations()
134
- X = self._apply_forget_gate(X, layer_index)
135
-
136
- for i in range(self.n_estimators_per_layer):
137
- best_rmse = float('inf')
138
- best_model = None
139
-
140
- for params in param_combos:
141
- if self.use_gpu:
142
- params['tree_method'] = 'hist'
143
- params['device'] = 'cuda'
144
-
145
- params = params.copy() # Prevent modification from affecting the next loop iteration
146
- params['random_state'] = i # Use a different random seed for each model to enhance diversity
147
-
148
- model = XGBRegressor(**params)
149
- model.fit(X, y)
150
-
151
- if X_val is not None:
152
- preds_val = model.predict(X_val)
153
- rmse = np.sqrt(mean_squared_error(y_val, preds_val))
154
- if rmse < best_rmse:
155
- best_rmse = rmse
156
- best_model = model
157
- else:
158
- best_model = model
159
-
160
- preds = best_model.predict(X).reshape(-1, 1)
161
- layer.append(best_model)
162
- layer_outputs.append(preds)
163
-
164
- output = np.hstack(layer_outputs)
165
- return layer, output
166
-
167
- def fit(self, X, y, X_val=None, y_val=None):
168
- X_current = self._multi_grained_scanning(X, y)
169
- X_val_current = self._multi_grained_scanning(X_val, y_val) if X_val is not None else None
170
- no_improve_rounds = 0
171
-
172
- for layer_index in range(self.max_layers):
173
- if self.verbose: print(f"Training Layer {layer_index + 1}")
174
- layer, output = self._fit_layer(X_current, y, X_val_current, y_val, layer_index)
175
- self.layers.append(layer)
176
- X_current = np.hstack([X_current, output])
177
-
178
- if X_val is not None:
179
- val_outputs = []
180
- for reg in layer:
181
- n_features = reg.n_features_in_
182
- preds = reg.predict(X_val_current[:, :n_features]).reshape(-1, 1)
183
- val_outputs.append(preds)
184
- val_output = np.hstack(val_outputs)
185
- X_val_current = np.hstack([X_val_current, val_output])
186
-
187
- y_pred = self.predict(X_val)
188
- rmse = np.sqrt(mean_squared_error(y_val, y_pred))
189
- if self.verbose: print(f"Validation RMSE: {rmse:.4f}")
190
-
191
- if rmse < self.best_rmse:
192
- self.best_rmse = rmse
193
- self.best_model = copy.deepcopy(self.layers)
194
- no_improve_rounds = 0
195
- if self.verbose: print(f"✅ New best RMSE: {self.best_rmse:.4f}")
196
- else:
197
- no_improve_rounds += 1
198
- if no_improve_rounds >= self.early_stopping_rounds:
199
- if self.verbose: print("Early stopping triggered.")
200
- break
201
-
202
- def predict(self, X):
203
- X_current = self._multi_grained_scanning(X, None)
204
- X_current = self._apply_forget_gate(X_current, layer_index=-1)
205
-
206
- for layer in self.layers:
207
- layer_outputs = []
208
- for reg in layer:
209
- n_features = reg.n_features_in_
210
- preds = reg.predict(X_current[:, :n_features]).reshape(-1, 1)
211
- layer_outputs.append(preds)
212
- output = np.hstack(layer_outputs)
213
- X_current = np.hstack([X_current, output])
214
-
215
- final_outputs = []
216
- for reg in self.layers[-1]:
217
- n_features = reg.n_features_in_
218
- final_outputs.append(reg.predict(X_current[:, :n_features]).reshape(-1, 1))
219
- return np.mean(np.hstack(final_outputs), axis=1)
220
-
221
- def get_best_model(self):
222
- return self.best_model, self.best_rmse
223
-
224
- """
225
- # ============================== Test Example ==============================
226
- import warnings
227
- import numpy as np
228
- from sklearn.datasets import load_diabetes
229
- from sklearn.datasets import fetch_california_housing
230
- from sklearn.model_selection import train_test_split
231
- from sklearn.metrics import mean_squared_error
232
-
233
- # X, y = load_diabetes(return_X_y=True) # Using diabetes dataset
234
- X, y = fetch_california_housing(return_X_y=True) # Using house price dataset
235
- X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
236
-
237
- # Hyperparameter grid
238
- param_grid = {
239
- "objective": ["reg:squarederror"],
240
- "random_state": [42],
241
- 'seed': [0],
242
- 'n_estimators': [100],
243
- 'max_depth': [6],
244
- 'min_child_weight': [4],
245
- 'subsample': [0.8],
246
- 'colsample_bytree': [0.8],
247
- 'gamma': [0],
248
- 'reg_alpha': [0],
249
- 'reg_lambda': [1],
250
- 'learning_rate': [0.05],
251
- }
252
-
253
- # Create the model with Multi-Grained Scanning enabled (with window sizes 2 and 3)
254
- regr = SmartForest(
255
- n_estimators_per_layer = 5,
256
- max_layers = 10,
257
- early_stopping_rounds = 5,
258
- param_grid = param_grid,
259
- use_gpu = False,
260
- gpu_id = 0,
261
- window_sizes = [], # Enables MGS if e.g., [2, 3], else empty disables MGS.
262
- forget_factor = 0., # Set forget factor to simulate forget gate behavior
263
- verbose = 1
264
- )
265
-
266
- regr.fit(X_train, y_train, X_val, y_val)
267
-
268
- # Predict on validation set and evaluate
269
- y_pred = regr.predict(X_val)
270
- rmse = np.sqrt(mean_squared_error(y_val, y_pred))
271
- print("\nFinal RMSE:", rmse)
272
-
273
- # Output best model and RMSE
274
- best_model, best_rmse = regr.get_best_model()
275
- print("\nBest validation RMSE:", best_rmse)
276
- """
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes