sciml 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sciml/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
1
  # coding: utf-8
2
- __all__ = ["utils", "pipelines", "models"]
2
+ __all__ = ["pipelines", "models", "metrics", "regress2", "ccc"]
sciml/ccc.py ADDED
@@ -0,0 +1,36 @@
1
+ # https://rowannicholls.github.io/python/statistics/agreement/correlation_coefficients.html#lins-concordance-correlation-coefficient-ccc
2
+ # Lin LIK (1989). “A concordance correlation coefficient to evaluate reproducibility”. Biometrics. 45 (1):255-268.
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ def concordance_correlation_coefficient(y_true, y_pred):
7
+ """Concordance correlation coefficient."""
8
+ # Remove NaNs
9
+ df = pd.DataFrame({
10
+ 'y_true': y_true,
11
+ 'y_pred': y_pred
12
+ })
13
+ df = df.dropna()
14
+ y_true = df['y_true']
15
+ y_pred = df['y_pred']
16
+ # Pearson product-moment correlation coefficients
17
+ cor = np.corrcoef(y_true, y_pred)[0][1]
18
+ # Mean
19
+ mean_true = np.mean(y_true)
20
+ mean_pred = np.mean(y_pred)
21
+ # Variance
22
+ var_true = np.var(y_true)
23
+ var_pred = np.var(y_pred)
24
+ # Standard deviation
25
+ sd_true = np.std(y_true)
26
+ sd_pred = np.std(y_pred)
27
+ # Calculate CCC
28
+ numerator = 2 * cor * sd_true * sd_pred
29
+ denominator = var_true + var_pred + (mean_true - mean_pred)**2
30
+ return numerator / denominator
31
+
32
+
33
+ # y_true = [3, -0.5, 2, 7, np.NaN]
34
+ # y_pred = [2.5, 0.0, 2, 8, 3]
35
+ # ccc = concordance_correlation_coefficient(y_true, y_pred)
36
+ # print(ccc)
sciml/metrics.py ADDED
@@ -0,0 +1,123 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from scipy import stats
4
+ from sklearn.metrics import explained_variance_score, max_error, mean_absolute_error, mean_squared_error, mean_squared_log_error, median_absolute_error, r2_score, mean_poisson_deviance, mean_gamma_deviance, mean_tweedie_deviance
5
+
6
+ def stats_summary(df):
7
+ min_ = df.min().to_frame().T
8
+ Q1 = df.quantile(0.25).to_frame().T
9
+ median_ = df.quantile(0.5).to_frame().T
10
+ mean_ = df.mean().to_frame().T
11
+ Q3 = df.quantile(0.75).to_frame().T
12
+ max_ = df.max().to_frame().T
13
+ df_stats = pd.concat([min_, Q1, median_, mean_, Q3, max_])
14
+ df_stats.index = ["Min", "Q1", "Median", "Mean", "Q3", "Max"]
15
+ return df_stats
16
+
17
+ def stats_measures(x, y, return_dict = False):
18
+ slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
19
+ mse = mean_squared_error(x, y)
20
+ r2 = rvalue ** 2
21
+ rmse = np.sqrt(mse)
22
+ mbe = (y - x).mean()
23
+ if return_dict:
24
+ return {
25
+ "R2": r2,
26
+ "SLOPE": slope,
27
+ "RMSE": rmse,
28
+ "MBE": mbe
29
+ }
30
+ else:
31
+ return [r2, slope, rmse, mbe]
32
+
33
+ def stats_measures_full(x, y):
34
+ # from sklearn.metrics import mean_absolute_percentage_error
35
+ slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
36
+ mse = mean_squared_error(x, y)
37
+ r2 = rvalue ** 2
38
+ rmse = np.sqrt(mse)
39
+ mbe = (y - x).mean()
40
+ # ----------------------------------------------------------------
41
+ pearsonr = stats.pearsonr(x, y)
42
+ evs = explained_variance_score(x, y)
43
+ me = max_error(x, y)
44
+ mae = mean_absolute_error(x, y)
45
+ msle = mean_squared_log_error(x, y)
46
+ meae = median_absolute_error(x, y)
47
+ r2_score = r2_score(x, y)
48
+ mpd = mean_poisson_deviance(x, y)
49
+ mgd = mean_gamma_deviance(x, y)
50
+ mtd = mean_tweedie_deviance(x, y)
51
+ return {
52
+ "R2": r2,
53
+ "SLOPE": slope,
54
+ "RMSE": rmse,
55
+ "MBE": mbe,
56
+ "INTERCEPT": intercept,
57
+ "PVALUE": pvalue,
58
+ "STDERR": stderr,
59
+ "PEARSON": pearsonr,
60
+ "EXPLAINED_VARIANCE": evs,
61
+ "MAXERR": me,
62
+ "MAE": mae,
63
+ "MSLE": msle,
64
+ "MEDIAN_AE": meae,
65
+ "R2_SCORE": r2_score,
66
+ "MPD": mpd,
67
+ "MGD": mgd,
68
+ "MTD": mtd
69
+ }
70
+
71
+ def stats_measures_df(df, name1, name2, return_dict = False):
72
+ slope, intercept, rvalue, pvalue, stderr = stats.linregress(df[name1], df[name2])
73
+ mse = mean_squared_error(df[name1], df[name2])
74
+ r2 = rvalue ** 2
75
+ rmse = np.sqrt(mse)
76
+ mbe = (df[name2] - df[name1]).mean()
77
+ if return_dict:
78
+ return {
79
+ "R2": r2,
80
+ "SLOPE": slope,
81
+ "RMSE": rmse,
82
+ "MBE": mbe
83
+ }
84
+ else:
85
+ return [r2, slope, rmse, mbe]
86
+
87
+
88
+
89
+ def get_r2(x, y):
90
+ try:
91
+ x_bar = x.mean()
92
+ except:
93
+ x_bar = np.mean(x)
94
+
95
+ r2 = 1 - np.sum((x - y)**2) / np.sum((x - x_bar)**2)
96
+ return r2
97
+
98
+ def get_rmse(observations, estimates):
99
+ return np.sqrt(((estimates - observations) ** 2).mean())
100
+
101
+ def calculate_R2(y_true, y_pred):
102
+ """
103
+ Calculate the R^2 (coefficient of determination).
104
+
105
+ Args:
106
+ y_true (array-like): Actual values of the dependent variable.
107
+ y_pred (array-like): Predicted values of the dependent variable.
108
+
109
+ Returns:
110
+ float: The R^2 value.
111
+ """
112
+ y_true = np.array(y_true)
113
+ y_pred = np.array(y_pred)
114
+
115
+ # Residual sum of squares
116
+ ss_res = np.sum((y_true - y_pred) ** 2)
117
+
118
+ # Total sum of squares
119
+ ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
120
+
121
+ # R^2 calculation
122
+ R2 = 1 - (ss_res / ss_tot)
123
+ return R2
sciml/models.py CHANGED
@@ -223,13 +223,12 @@ class SmartForest:
223
223
 
224
224
  """
225
225
  # ============================== Test Example ==============================
226
+ import warnings
227
+ import numpy as np
226
228
  from sklearn.datasets import load_diabetes
227
229
  from sklearn.datasets import fetch_california_housing
228
230
  from sklearn.model_selection import train_test_split
229
-
230
-
231
-
232
- warnings.simplefilter('ignore')
231
+ from sklearn.metrics import mean_squared_error
233
232
 
234
233
  # X, y = load_diabetes(return_X_y=True) # Using diabetes dataset
235
234
  X, y = fetch_california_housing(return_X_y=True) # Using house price dataset
@@ -274,4 +273,524 @@ print("\nFinal RMSE:", rmse)
274
273
  # Output best model and RMSE
275
274
  best_model, best_rmse = regr.get_best_model()
276
275
  print("\nBest validation RMSE:", best_rmse)
277
- """
276
+ """
277
+
278
+ # ============================================================================================================================================================
279
+
280
+ import numpy as np
281
+ import copy
282
+ import itertools
283
+ from scipy import ndimage
284
+ from xgboost import XGBRegressor
285
+ from sklearn.metrics import mean_squared_error
286
+ from sklearn.model_selection import train_test_split
287
+
288
+ class SmartForest4D:
289
+ """
290
+ SmartForest4D is an ensemble learning model designed to handle complex 4D input data
291
+ (samples, time, spatial, features). It integrates ideas from gradient-boosted decision trees
292
+ (XGBoost) with LSTM-style forget gates and spatial max pooling.
293
+
294
+ The model builds layers of regressors, each layer taking the previous output as part of its
295
+ input (deep forest style). A forget gate mechanism is applied along the time dimension to
296
+ emphasize recent temporal information. Spatial max pooling is used to reduce dimensionality
297
+ across spatial units before flattening and feeding into the regressors.
298
+
299
+ Parameters:
300
+ -----------
301
+ n_estimators_per_layer : int
302
+ Number of XGBoost regressors per layer.
303
+
304
+ max_layers : int
305
+ Maximum number of layers in the deep forest.
306
+
307
+ early_stopping_rounds : int
308
+ Number of rounds without improvement on the validation set before early stopping.
309
+
310
+ param_grid : dict
311
+ Dictionary of hyperparameter lists to search over for XGBoost.
312
+
313
+ use_gpu : bool
314
+ Whether to use GPU for training XGBoost models.
315
+
316
+ gpu_id : int
317
+ GPU device ID to use if use_gpu is True.
318
+
319
+ kernel: np.ndarray
320
+ Convolutional kernel for spatial processing.
321
+ # ===============================
322
+ # 0. Do nothing
323
+ # ===============================
324
+
325
+ identity_kernel = np.array([
326
+ [0, 0, 0],
327
+ [0, 1, 0],
328
+ [0, 0, 0]
329
+ ])
330
+
331
+ # ===============================
332
+ # 1. Sobel Edge Detection Kernels
333
+ # ===============================
334
+
335
+ sobel_x = np.array([
336
+ [-1, 0, 1],
337
+ [-2, 0, 2],
338
+ [-1, 0, 1]
339
+ ])
340
+
341
+ sobel_y = np.array([
342
+ [-1, -2, -1],
343
+ [ 0, 0, 0],
344
+ [ 1, 2, 1]
345
+ ])
346
+
347
+ # ===============================
348
+ # 2. Gaussian Blur Kernel (3x3)
349
+ # ===============================
350
+ gaussian_kernel = (1/16) * np.array([
351
+ [1, 2, 1],
352
+ [2, 4, 2],
353
+ [1, 2, 1]
354
+ ])
355
+
356
+ # ===============================
357
+ # 3. Morphological Structuring Element (3x3 cross)
358
+ # Used in binary dilation/erosion
359
+ # ===============================
360
+ morph_kernel = np.array([
361
+ [0, 1, 0],
362
+ [1, 1, 1],
363
+ [0, 1, 0]
364
+ ])
365
+
366
+ # ===============================
367
+ # 4. Sharpening Kernel
368
+ # Enhances edges and contrast
369
+ # ===============================
370
+ sharpen_kernel = np.array([
371
+ [ 0, -1, 0],
372
+ [-1, 5, -1],
373
+ [ 0, -1, 0]
374
+ ])
375
+
376
+ # ===============================
377
+ # 5. Embossing Kernel
378
+ # Creates a 3D-like shadowed effect
379
+ # ===============================
380
+ emboss_kernel = np.array([
381
+ [-2, -1, 0],
382
+ [-1, 1, 1],
383
+ [ 0, 1, 2]
384
+ ])
385
+
386
+ spatial_h : int
387
+ The height of the 2D grid for the flattened spatial dimension.
388
+
389
+ spatial_w : int
390
+ The width of the 2D grid for the flattened spatial dimension.
391
+
392
+ forget_factor : float
393
+ Exponential decay rate applied along the time axis. Higher values mean stronger forgetting.
394
+
395
+ verbose : int
396
+ Verbosity level for training output.
397
+
398
+ Attributes:
399
+ -----------
400
+ layers : list
401
+ List of trained layers, each containing a list of regressors.
402
+
403
+ best_model : list
404
+ The set of layers corresponding to the best validation RMSE seen during training.
405
+
406
+ best_rmse : float
407
+ The lowest RMSE achieved on the validation set.
408
+
409
+ Methods:
410
+ --------
411
+ fit(X, y, X_val=None, y_val=None):
412
+ Train the SmartForest4D model on the given 4D input data.
413
+
414
+ predict(X):
415
+ Predict targets for new 4D input data using the trained model.
416
+
417
+ get_best_model():
418
+ Return the best set of layers and corresponding RMSE.
419
+
420
+ Notes:
421
+ ------
422
+ - The product of spatial_h and spatial_w must equal spatial_size (spatial_h * spatial_w = spatial_size).
423
+
424
+ Example:
425
+ --------
426
+ >>> model = SmartForest4D(n_estimators_per_layer=5, max_layers=10, early_stopping_rounds=3, forget_factor=0.3, verbose=1)
427
+ >>> model.fit(X_train, y_train, X_val, y_val)
428
+ >>> y_pred = model.predict(X_val)
429
+ >>> best_model, best_rmse = model.get_best_model()
430
+ """
431
+ def __init__(self, n_estimators_per_layer=5, max_layers=10, early_stopping_rounds=3, param_grid=None,
432
+ use_gpu=False, gpu_id=0, kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]]), spatial_h=None, spatial_w=None,
433
+ forget_factor=0.0, verbose=1):
434
+ self.n_estimators_per_layer = n_estimators_per_layer
435
+ self.max_layers = max_layers
436
+ self.early_stopping_rounds = early_stopping_rounds
437
+ self.param_grid = param_grid or {
438
+ "objective": ["reg:squarederror"],
439
+ "random_state": [42],
440
+ 'n_estimators': [100],
441
+ 'max_depth': [6],
442
+ 'min_child_weight': [4],
443
+ 'subsample': [0.8],
444
+ 'colsample_bytree': [0.8],
445
+ 'gamma': [0],
446
+ 'reg_alpha': [0],
447
+ 'reg_lambda': [1],
448
+ 'learning_rate': [0.05],
449
+ }
450
+ self.use_gpu = use_gpu
451
+ self.gpu_id = gpu_id
452
+ self.kernel = kernel
453
+ self.spatial_h = spatial_h
454
+ self.spatial_w = spatial_w
455
+ self.forget_factor = forget_factor
456
+ self.layers = []
457
+ self.best_model = None
458
+ self.best_rmse = float("inf")
459
+ self.verbose = verbose
460
+ if (self.spatial_h is None) or (self.spatial_w is None):
461
+ raise ValueError("Please specify spatial_h and spatial_w")
462
+
463
+ def _get_param_combinations(self):
464
+ keys, values = zip(*self.param_grid.items())
465
+ return [dict(zip(keys, v)) for v in itertools.product(*values)]
466
+
467
+ def _prepare_input(self, X, y=None, apply_forget=False, layer_index=0):
468
+ # Ensure 4D: (samples, time, spatial, features)
469
+ if X.ndim == 2:
470
+ X = X[:, np.newaxis, np.newaxis, :]
471
+ elif X.ndim == 3:
472
+ X = X[:, :, np.newaxis, :]
473
+ elif X.ndim == 4:
474
+ pass
475
+ else:
476
+ raise ValueError("Input must be 2D, 3D, or 4D.")
477
+
478
+ n_samples, n_time, n_spatial, n_features = X.shape
479
+
480
+ if apply_forget and self.forget_factor > 0:
481
+ decay = np.exp(-self.forget_factor * np.arange(n_time))[::-1]
482
+ decay = decay / decay.sum()
483
+ decay = decay.reshape(1, n_time, 1, 1)
484
+ X = X * decay
485
+
486
+ # Apply convolutional kernels:
487
+ if n_spatial != 1:
488
+ if self.spatial_h * self.spatial_w != n_spatial: raise ValueError("spatial_h * spatial_w != n_spatial")
489
+ X_out = np.zeros_like(X)
490
+ for sample in range(X.shape[0]):
491
+ for t in range(X.shape[1]):
492
+ for f in range(X.shape[3]):
493
+ spatial_2d = X[sample, t, :, f].reshape(self.spatial_h, self.spatial_w)
494
+ # Apply 2D convolution
495
+ filtered = ndimage.convolve(spatial_2d, self.kernel, mode='constant', cval=0.0)
496
+ # Flatten back to (20,) and store
497
+ X_out[sample, t, :, f] = filtered.reshape(n_spatial)
498
+ X = X_out; del(X_out)
499
+ # Max pooling over spatial dim
500
+ X_pooled = X.max(axis=2) # (samples, time, features)
501
+ X_flattened = X_pooled.reshape(n_samples, -1) # (samples, time * features)
502
+ return X_flattened
503
+
504
+ def _fit_layer(self, X, y, X_val=None, y_val=None, layer_index=0):
505
+ layer = []
506
+ layer_outputs = []
507
+ param_combos = self._get_param_combinations()
508
+
509
+ for i in range(self.n_estimators_per_layer):
510
+ best_rmse = float('inf')
511
+ best_model = None
512
+
513
+ for params in param_combos:
514
+ if self.use_gpu:
515
+ params['tree_method'] = 'hist'
516
+ params['device'] = 'cuda'
517
+
518
+ params = params.copy()
519
+ params['random_state'] = i
520
+
521
+ model = XGBRegressor(**params)
522
+ model.fit(X, y)
523
+
524
+ if X_val is not None:
525
+ preds_val = model.predict(X_val)
526
+ rmse = np.sqrt(mean_squared_error(y_val, preds_val))
527
+ if rmse < best_rmse:
528
+ best_rmse = rmse
529
+ best_model = model
530
+ else:
531
+ best_model = model
532
+
533
+ preds = best_model.predict(X).reshape(-1, 1)
534
+ layer.append(best_model)
535
+ layer_outputs.append(preds)
536
+
537
+ output = np.hstack(layer_outputs)
538
+ return layer, output
539
+
540
+ def fit(self, X, y, X_val=None, y_val=None):
541
+ y = y.ravel()
542
+ X_current = self._prepare_input(X, apply_forget=True)
543
+ X_val_current = self._prepare_input(X_val, apply_forget=True) if X_val is not None else None
544
+
545
+ no_improve_rounds = 0
546
+
547
+ for layer_index in range(self.max_layers):
548
+ if self.verbose:
549
+ print(f"Training Layer {layer_index + 1}")
550
+
551
+ layer, output = self._fit_layer(X_current, y, X_val_current, y_val, layer_index)
552
+ self.layers.append(layer)
553
+ X_current = np.hstack([X_current, output])
554
+
555
+ if X_val is not None:
556
+ val_outputs = []
557
+ for reg in layer:
558
+ n_features = reg.n_features_in_
559
+ preds = reg.predict(X_val_current[:, :n_features]).reshape(-1, 1)
560
+ val_outputs.append(preds)
561
+ val_output = np.hstack(val_outputs)
562
+ X_val_current = np.hstack([X_val_current, val_output])
563
+
564
+ y_pred = self.predict(X_val)
565
+ rmse = np.sqrt(mean_squared_error(y_val, y_pred))
566
+ if self.verbose:
567
+ print(f"Validation RMSE: {rmse:.4f}")
568
+
569
+ if rmse < self.best_rmse:
570
+ self.best_rmse = rmse
571
+ self.best_model = copy.deepcopy(self.layers)
572
+ no_improve_rounds = 0
573
+ if self.verbose:
574
+ print(f"✅ New best RMSE: {self.best_rmse:.4f}")
575
+ else:
576
+ no_improve_rounds += 1
577
+ if no_improve_rounds >= self.early_stopping_rounds:
578
+ if self.verbose:
579
+ print("Early stopping triggered.")
580
+ break
581
+
582
+ def predict(self, X):
583
+ X_current = self._prepare_input(X, apply_forget=True)
584
+
585
+ for layer in self.layers:
586
+ layer_outputs = []
587
+ for reg in layer:
588
+ n_features = reg.n_features_in_
589
+ preds = reg.predict(X_current[:, :n_features]).reshape(-1, 1)
590
+ layer_outputs.append(preds)
591
+ output = np.hstack(layer_outputs)
592
+ X_current = np.hstack([X_current, output])
593
+
594
+ final_outputs = []
595
+ for reg in self.layers[-1]:
596
+ n_features = reg.n_features_in_
597
+ final_outputs.append(reg.predict(X_current[:, :n_features]).reshape(-1, 1))
598
+ return np.mean(np.hstack(final_outputs), axis=1)
599
+
600
+ def get_best_model(self):
601
+ return self.best_model, self.best_rmse
602
+
603
+ """
604
+ # ============================== Test Example ==============================
605
+ import numpy as np
606
+ import copy
607
+ import itertools
608
+ from scipy import ndimage
609
+ from xgboost import XGBRegressor
610
+ from sklearn.metrics import mean_squared_error
611
+ from sklearn.model_selection import train_test_split
612
+
613
+ # Generate synthetic 4D data: (samples, time, spatial, features)
614
+ # time order is like [t (today), t - 1 (yesterday), t -2, ...]
615
+ n_samples = 200
616
+ n_time = 5
617
+ n_spatial = 4
618
+ n_features = 5
619
+
620
+ np.random.seed(42)
621
+ X = np.random.rand(n_samples, n_time, n_spatial, n_features)
622
+ y = X[:, :3, :2, :4].mean(axis=(1, 2, 3)) + 0.1 * np.random.randn(n_samples)
623
+ y = y.ravel()
624
+
625
+ # Split
626
+ X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
627
+
628
+ # Train model
629
+ model = SmartForest4D(
630
+ n_estimators_per_layer=5,
631
+ max_layers=20,
632
+ early_stopping_rounds=5,
633
+ spatial_h = 2,
634
+ spatial_w = 2,
635
+ forget_factor=0.1,
636
+ verbose=1
637
+ )
638
+ model.fit(X_train, y_train, X_val, y_val)
639
+
640
+ # Predict
641
+ y_pred = model.predict(X_val)
642
+ rmse = np.sqrt(mean_squared_error(y_val, y_pred))
643
+ print("\n✅ Final RMSE on validation set:", rmse)
644
+
645
+
646
+ # Output best model and RMSE
647
+ best_model, best_rmse = model.get_best_model()
648
+ print("\nBest validation RMSE:", best_rmse)
649
+ """
650
+
651
+ # ============================================================================================================================================================
652
+ # Function mode
653
+
654
+ import tensorflow as tf
655
+ from tensorflow import keras
656
+ from tensorflow.keras import layers
657
+ from tensorflow.keras.models import load_model
658
+
659
+ def srcnn(learning_rate=0.001):
660
+ """
661
+ Builds and compiles a Super-Resolution Convolutional Neural Network (SRCNN) model
662
+ that fuses features from both low-resolution and high-resolution images.
663
+
664
+ This model uses two parallel input streams:
665
+ - A low-resolution input which undergoes upscaling through convolutional layers.
666
+ - A high-resolution input from which texture features are extracted and fused with the low-resolution stream.
667
+
668
+ Args:
669
+ save_path (str, optional): Path to save the compiled model. If None, the model is not saved.
670
+ learning_rate (float): Learning rate for the Adam optimizer.
671
+
672
+ Returns:
673
+ keras.Model: A compiled Keras model ready for training.
674
+ """
675
+ # Input layers
676
+ lowres_input = layers.Input(shape=(None, None, 1)) # Low-resolution input
677
+ highres_input = layers.Input(shape=(None, None, 1)) # High-resolution image
678
+
679
+ # Feature extraction from high-resolution image
680
+ highres_features = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(highres_input)
681
+ highres_features = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(highres_features)
682
+
683
+ # Processing low-resoltuion input
684
+ x = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(lowres_input)
685
+ x = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(x)
686
+
687
+ # Fusion of high-resolution image textures
688
+ fusion = layers.Concatenate()([x, highres_features])
689
+ fusion = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(fusion)
690
+ fusion = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(fusion)
691
+
692
+ # Output
693
+ output = layers.Conv2D(1, (3, 3), activation="sigmoid", padding="same")(fusion)
694
+
695
+ model = keras.Model(inputs=[lowres_input, highres_input], outputs=output)
696
+ model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
697
+
698
+ return model
699
+
700
+ def print_model(model):
701
+ return model.summary()
702
+
703
+ def train(lowres_data, highres_data, epochs=100, batch_size=1, verbose=1, save_path=None):
704
+ model = srcnn()
705
+ # Train SRCNN
706
+ model.fit([modis_data_1, s2_data], s2_data, epochs=epochs, batch_size=batch_size, verbose=verbose)
707
+ # Save the complete model
708
+ # Recommended in newer versions of Keras (TensorFlow 2.11+): e.g., 'texture_fusion_model.keras'
709
+ if save_path: model.save(save_path)
710
+
711
+ def apply(model, lowres_data_app, highres_data):
712
+ super_resolved = model.predict([lowres_data_app, highres_data]).squeeze()
713
+ super_resolved = xr.DataArray(
714
+ super_resolved,
715
+ dims = ("latitude", "longitude"),
716
+ coords={"latitude": highres_data.latitude, "longitude": highres_data.longitude},
717
+ name="super_res"
718
+ )
719
+ return super_resolved
720
+
721
+ def load_model(save_path):
722
+ model = load_model('texture_fusion_model.keras')
723
+
724
+ # ------------------------------------------------------------------------------------------------------------------------------------------------------------
725
+ # Class mode
726
+
727
+ import numpy as np
728
+ import xarray as xr
729
+ import tensorflow as tf
730
+ from tensorflow import keras
731
+ from tensorflow.keras import layers
732
+ from tensorflow.keras.callbacks import EarlyStopping
733
+
734
+ class TextureFusionSRCNN:
735
+ def __init__(self, learning_rate=0.001):
736
+ self.learning_rate = learning_rate
737
+ self.model = self._build_model()
738
+
739
+ def _build_model(self):
740
+ # Input layers
741
+ lowres_input = layers.Input(shape=(None, None, 1)) # Low-resolution input
742
+ highres_input = layers.Input(shape=(None, None, 1)) # High-resolution image
743
+
744
+ # Feature extraction from high-resolution image
745
+ highres_features = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(highres_input)
746
+ highres_features = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(highres_features)
747
+
748
+ # Processing low-resolution input
749
+ x = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(lowres_input)
750
+ x = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(x)
751
+
752
+ # Fusion of high-resolution image textures
753
+ fusion = layers.Concatenate()([x, highres_features])
754
+ fusion = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(fusion)
755
+ fusion = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(fusion)
756
+
757
+ # Output
758
+ output = layers.Conv2D(1, (3, 3), activation="sigmoid", padding="same")(fusion)
759
+
760
+ model = keras.Model(inputs=[lowres_input, highres_input], outputs=output)
761
+ model.compile(optimizer=keras.optimizers.Adam(learning_rate=self.learning_rate), loss="mse")
762
+
763
+ return model
764
+
765
+ def summary(self):
766
+ return self.model.summary()
767
+
768
+ def train(self, lowres_data, highres_data, epochs=100, batch_size=1, verbose=1, save_path=None):
769
+ early_stop = EarlyStopping(
770
+ monitor='loss', # You can change to 'val_loss' if you add validation
771
+ patience=10, # Number of epochs with no improvement after which training will be stopped
772
+ restore_best_weights=True
773
+ )
774
+
775
+ self.model.fit(
776
+ [lowres_data, highres_data], highres_data,
777
+ epochs=epochs,
778
+ batch_size=batch_size,
779
+ verbose=verbose,
780
+ callbacks=[early_stop]
781
+ )
782
+
783
+ if save_path:
784
+ self.model.save(save_path)
785
+
786
+ def apply(self, lowres_data_app, highres_data):
787
+ super_resolved = self.model.predict([lowres_data_app, highres_data]).squeeze()
788
+ return super_resolved
789
+
790
+ @staticmethod
791
+ def load(save_path):
792
+ model = keras.models.load_model(save_path)
793
+ instance = TextureFusionSRCNN()
794
+ instance.model = model
795
+ return instance
796
+
sciml/pipelines.py CHANGED
@@ -155,7 +155,7 @@ try:
155
155
  from tensorflow.keras import models
156
156
  # from keras.layers import Dropout
157
157
  from keras.callbacks import EarlyStopping
158
- from scitbx.stutils import *
158
+ from scitbx.utils import *
159
159
  except Exception as e:
160
160
  print(e)
161
161
 
@@ -173,263 +173,54 @@ def train_lstm(X_train, y_train, nfeature, ntime, verbose = 2, epochs = 200, bat
173
173
  model.fit(X_train, y_train, epochs = epochs, batch_size = batch_size, verbose=verbose)
174
174
  return model
175
175
 
176
-
177
- '''
178
- # ========================================================================================================
179
- import numpy as np
180
- from xgboost import XGBRegressor
181
- from sklearn.metrics import mean_squared_error
182
-
183
- class XGBoostDeepForestRegressor:
184
- def __init__(self, n_estimators_per_layer=2, max_layers=20, early_stopping_rounds=2):
185
- self.n_estimators_per_layer = n_estimators_per_layer
186
- self.max_layers = max_layers
187
- self.early_stopping_rounds = early_stopping_rounds
188
- self.layers = []
189
-
190
- def _fit_layer(self, X, y):
191
- layer = []
192
- layer_outputs = []
193
- for _ in range(self.n_estimators_per_layer):
194
- reg = XGBRegressor()
195
- reg.fit(X, y)
196
- preds = reg.predict(X).reshape(-1, 1)
197
- layer.append(reg)
198
- layer_outputs.append(preds)
199
- output = np.hstack(layer_outputs)
200
- return layer, output
201
-
202
- def fit(self, X, y, X_val=None, y_val=None):
203
- X_current = X.copy()
204
- best_rmse = float("inf")
205
- no_improve_rounds = 0
206
-
207
- for layer_index in range(self.max_layers):
208
- print(f"Training Layer {layer_index + 1}")
209
- layer, output = self._fit_layer(X_current, y)
210
- self.layers.append(layer)
211
- X_current = np.hstack([X_current, output])
212
-
213
- if X_val is not None:
214
- y_pred = self.predict(X_val)
215
- # rmse = mean_squared_error(y_val, y_pred, squared=False)
216
- rmse = np.sqrt(mean_squared_error(y_val, y_pred))
217
- print(f"Validation RMSE: {rmse:.4f}")
218
-
219
- if rmse < best_rmse:
220
- best_rmse = rmse
221
- no_improve_rounds = 0
222
- else:
223
- no_improve_rounds += 1
224
- if no_improve_rounds >= self.early_stopping_rounds:
225
- print("Early stopping triggered.")
226
- break
227
-
228
- def predict(self, X):
229
- X_current = X.copy()
230
- for layer in self.layers:
231
- layer_outputs = []
232
- for reg in layer:
233
- n_features = reg.n_features_in_
234
- preds = reg.predict(X_current[:, :n_features]).reshape(-1, 1)
235
- layer_outputs.append(preds)
236
- output = np.hstack(layer_outputs)
237
- X_current = np.hstack([X_current, output])
238
-
239
- # Final prediction = average of last layer regressors
240
- final_outputs = []
241
- for reg in self.layers[-1]:
242
- n_features = reg.n_features_in_
243
- final_outputs.append(reg.predict(X_current[:, :n_features]).reshape(-1, 1))
244
- return np.mean(np.hstack(final_outputs), axis=1)
245
-
246
-
247
- from sklearn.datasets import load_diabetes
248
- from sklearn.model_selection import train_test_split
249
- from sklearn.metrics import mean_squared_error
250
-
251
- X, y = load_diabetes(return_X_y=True)
252
- X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
253
-
254
- df_reg = XGBoostDeepForestRegressor(n_estimators_per_layer=2, max_layers=5)
255
- df_reg.fit(X_train, y_train, X_val, y_val)
256
-
257
- y_pred = df_reg.predict(X_val)
258
- # rmse = mean_squared_error(y_val, y_pred, squared=False)
259
- rmse = np.sqrt(mean_squared_error(y_val, y_pred))
260
- print("Final RMSE:", rmse)
261
-
262
- # ----------------------------------------------------------------------------------------------------
263
-
176
+ # ===============================================================================================================================
177
+ # Training utils
264
178
  import numpy as np
265
- from xgboost import XGBRegressor
266
- from sklearn.metrics import mean_squared_error
267
- import itertools
268
-
269
- class XGBoostDeepForestRegressor:
270
- def __init__(self, n_estimators_per_layer=2, max_layers=20, early_stopping_rounds=2, param_grid=None, use_gpu=True, gpu_id=0):
271
- self.n_estimators_per_layer = n_estimators_per_layer
272
- self.max_layers = max_layers
273
- self.early_stopping_rounds = early_stopping_rounds
274
- self.param_grid = param_grid or {
275
- 'max_depth': [3],
276
- 'learning_rate': [0.1],
277
- 'n_estimators': [100]
278
- }
279
- self.use_gpu = use_gpu
280
- self.gpu_id = gpu_id
281
- self.layers = []
282
-
283
- def _get_param_combinations(self):
284
- keys, values = zip(*self.param_grid.items())
285
- return [dict(zip(keys, v)) for v in itertools.product(*values)]
286
-
287
- def _fit_layer(self, X, y, X_val=None, y_val=None):
288
- layer = []
289
- layer_outputs = []
290
- param_combos = self._get_param_combinations()
291
-
292
- for i in range(self.n_estimators_per_layer):
293
- best_rmse = float('inf')
294
- best_model = None
295
-
296
- for params in param_combos:
297
- # Set GPU support parameters in XGBRegressor
298
- if self.use_gpu:
299
- params['tree_method'] = 'hist' # Use hist method
300
- params['device'] = 'cuda' # Enable CUDA for GPU
301
-
302
- model = XGBRegressor(**params)
303
- model.fit(X, y)
304
-
305
- if X_val is not None:
306
- preds_val = model.predict(X_val)
307
- rmse = np.sqrt(mean_squared_error(y_val, preds_val))
308
- if rmse < best_rmse:
309
- best_rmse = rmse
310
- best_model = model
311
- else:
312
- best_model = model
313
-
314
- final_model = best_model
315
- preds = final_model.predict(X).reshape(-1, 1)
316
- layer.append(final_model)
317
- layer_outputs.append(preds)
318
-
319
- output = np.hstack(layer_outputs)
320
- return layer, output
321
-
322
- def fit(self, X, y, X_val=None, y_val=None):
323
- X_current = X.copy()
324
- X_val_current = X_val.copy() if X_val is not None else None
325
-
326
- best_rmse = float("inf")
327
- no_improve_rounds = 0
328
-
329
- for layer_index in range(self.max_layers):
330
- print(f"Training Layer {layer_index + 1}")
331
- layer, output = self._fit_layer(X_current, y, X_val_current, y_val)
332
- self.layers.append(layer)
333
- X_current = np.hstack([X_current, output])
334
-
335
- if X_val is not None:
336
- val_outputs = []
337
- for reg in layer:
338
- n_features = reg.n_features_in_
339
- preds = reg.predict(X_val_current[:, :n_features]).reshape(-1, 1)
340
- val_outputs.append(preds)
341
- val_output = np.hstack(val_outputs)
342
- X_val_current = np.hstack([X_val_current, val_output])
343
-
344
- y_pred = self.predict(X_val)
345
- rmse = np.sqrt(mean_squared_error(y_val, y_pred))
346
- print(f"Validation RMSE: {rmse:.4f}")
347
-
348
- if rmse < best_rmse:
349
- best_rmse = rmse
350
- no_improve_rounds = 0
351
- else:
352
- no_improve_rounds += 1
353
- if no_improve_rounds >= self.early_stopping_rounds:
354
- print("Early stopping triggered.")
355
- break
356
-
357
- def predict(self, X):
358
- X_current = X.copy()
359
- for layer in self.layers:
360
- layer_outputs = []
361
- for reg in layer:
362
- n_features = reg.n_features_in_
363
- preds = reg.predict(X_current[:, :n_features]).reshape(-1, 1)
364
- layer_outputs.append(preds)
365
- output = np.hstack(layer_outputs)
366
- X_current = np.hstack([X_current, output])
367
-
368
- final_outputs = []
369
- for reg in self.layers[-1]:
370
- n_features = reg.n_features_in_
371
- final_outputs.append(reg.predict(X_current[:, :n_features]).reshape(-1, 1))
372
- return np.mean(np.hstack(final_outputs), axis=1)
373
-
374
-
375
- from sklearn.datasets import load_diabetes
179
+ import pandas as pd
180
+ from sklearn.model_selection import ShuffleSplit
376
181
  from sklearn.model_selection import train_test_split
377
- from sklearn.metrics import mean_squared_error
378
-
379
- # Load dataset
380
- X, y = load_diabetes(return_X_y=True)
381
- X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
382
-
383
- # Hyperparameter grid
384
- param_grid = {
385
- 'max_depth': [3, 4],
386
- 'learning_rate': [0.1, 0.05],
387
- 'n_estimators': [50, 100]
388
- }
389
-
390
- # Create and fit the model with GPU enabled
391
- df_reg = XGBoostDeepForestRegressor(
392
- n_estimators_per_layer=2,
393
- max_layers=5,
394
- early_stopping_rounds=2,
395
- param_grid=param_grid,
396
- use_gpu=True, # Enable GPU acceleration
397
- gpu_id=0 # Default to the first GPU
398
- )
399
-
400
- df_reg.fit(X_train, y_train, X_val, y_val)
401
-
402
- # Final evaluation
403
- y_pred = df_reg.predict(X_val)
404
- rmse = np.sqrt(mean_squared_error(y_val, y_pred))
405
- print("Final RMSE:", rmse)
406
-
407
- # ----------------------------------------------------------------------------------------------------
408
-
409
- xgb_params = {
410
- "objective": "reg:squarederror",
411
- "random_state": 0,
412
- 'seed': 0,
413
- 'n_estimators': 100,
414
- 'max_depth': 6,
415
- 'min_child_weight': 4,
416
- 'subsample': 0.8,
417
- 'colsample_bytree': 0.8,
418
- 'gamma': 0,
419
- 'reg_alpha': 0,
420
- 'reg_lambda': 1,
421
- 'learning_rate': 0.05,
422
- }
423
182
 
424
- from xgboost import XGBRegressor
425
- regr = XGBRegressor(**xgb_params)
426
-
427
- regr.fit(X_train, y_train)
428
- y_pred = regr.predict(X_val)
429
-
430
-
431
- from scipy import stats
183
+ # randomly select sites
184
+ def random_select(ds, count, num, random_state = 0):
185
+ np.random.seed(random_state)
186
+ idxs = np.random.choice(np.delete(np.arange(len(ds)), count), num, replace = False)
187
+ return np.sort(idxs)
188
+
189
+ def split(Xs, ys, return_index = False, test_size = 0.33, random_state = 42):
190
+ if return_index:
191
+ sss = ShuffleSplit(n_splits=1, test_size = test_size, random_state = random_state)
192
+ sss.get_n_splits(Xs, ys)
193
+ train_index, test_index = next(sss.split(Xs, ys))
194
+ return (train_index, test_index)
195
+ else:
196
+ X_train, X_test, y_train, y_test = train_test_split(
197
+ Xs, ys,
198
+ test_size = test_size,
199
+ random_state = random_state
200
+ )
201
+ return (X_train, X_test, y_train, y_test)
202
+
203
+ def split_cut(Xs, ys, test_ratio = 0.33):
204
+ """
205
+ Split the timeseries into before and after halves
206
+ """
207
+ assert ys.ndim == 2, 'ys must be 2D!'
208
+ assert len(Xs) == len(ys), 'Xs and ys should be equally long!'
209
+ assert type(Xs) == type(ys), 'Xs and ys should be the same data type!'
210
+ if not type(Xs) in [pd.core.frame.DataFrame, np.ndarray]: raise Exception('Only accept numpy ndarray or pandas dataframe')
211
+ anchor = int(np.floor(len(ys) * (1 - test_ratio)))
212
+
213
+ if type(Xs) == pd.core.frame.DataFrame:
214
+ X_train = Xs.iloc[0: anchor, :]
215
+ X_test = Xs.iloc[anchor::, :]
216
+ y_train = ys.iloc[0: anchor, :]
217
+ y_test = ys.iloc[anchor::, :]
218
+ else:
219
+ X_train = Xs[0: anchor, :]
220
+ X_test = Xs[anchor::, :]
221
+ y_train = ys[0: anchor, :]
222
+ y_test = ys[anchor::, :]
432
223
 
433
- stats.linregress(y_val, y_pred)
224
+ assert len(X_train) + len(X_test) == len(Xs), 'The sum of train and test lengths must equal to Xs/ys!'
434
225
 
435
- '''
226
+ return (X_train, X_test, y_train, y_test)
sciml/regress2.py ADDED
@@ -0,0 +1,217 @@
1
+ # Model type I and II regression, including RMA (reduced major axis regression)
2
+
3
+ """
4
+ Credit: UMaine MISC Lab; emmanuel.boss@maine.edu
5
+ http://misclab.umeoce.maine.edu/
6
+ https://github.com/OceanOptics
7
+ ------------------------------------------------------------------------------
8
+ MIT License
9
+
10
+ Copyright (c) [year] [fullname]
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ """
30
+
31
+ import statsmodels.api as sm
32
+ import numpy as np
33
+
34
+
35
+ def regress2(_x, _y, _method_type_1 = "ordinary least square",
36
+ _method_type_2 = "reduced major axis",
37
+ _weight_x = [], _weight_y = [], _need_intercept = True):
38
+ # Regression Type II based on statsmodels
39
+ # Type II regressions are recommended if there is variability on both x and y
40
+ # It's computing the linear regression type I for (x,y) and (y,x)
41
+ # and then average relationship with one of the type II methods
42
+ #
43
+ # INPUT:
44
+ # _x <np.array>
45
+ # _y <np.array>
46
+ # _method_type_1 <str> method to use for regression type I:
47
+ # ordinary least square or OLS <default>
48
+ # weighted least square or WLS
49
+ # robust linear model or RLM
50
+ # _method_type_2 <str> method to use for regression type II:
51
+ # major axis
52
+ # reduced major axis <default> (also known as geometric mean)
53
+ # arithmetic mean
54
+ # _need_intercept <bool>
55
+ # True <default> add a constant to relation (y = a x + b)
56
+ # False force relation by 0 (y = a x)
57
+ # _weight_x <np.array> containing the weigth of x
58
+ # _weigth_y <np.array> containing the weigth of y
59
+ #
60
+ # OUTPUT:
61
+ # slope
62
+ # intercept
63
+ # r
64
+ # std_slope
65
+ # std_intercept
66
+ # predict
67
+ #
68
+ # REQUIRE:
69
+ # numpy
70
+ # statsmodels
71
+ #
72
+ # The code is based on the matlab function of MBARI.
73
+ # AUTHOR: Nils Haentjens
74
+ # REFERENCE: https://www.mbari.org/products/research-software/matlab-scripts-linear-regressions/
75
+
76
+ # Check input
77
+ if _method_type_2 != "reduced major axis" and _method_type_1 != "ordinary least square":
78
+ raise ValueError("'" + _method_type_2 + "' only supports '" + _method_type_1 + "' method as type 1.")
79
+
80
+ # Set x, y depending on intercept requirement
81
+ if _need_intercept:
82
+ x_intercept = sm.add_constant(_x)
83
+ y_intercept = sm.add_constant(_y)
84
+
85
+ # Compute Regression Type I (if type II requires it)
86
+ if (_method_type_2 == "reduced major axis" or
87
+ _method_type_2 == "geometric mean"):
88
+ if _method_type_1 == "OLS" or _method_type_1 == "ordinary least square":
89
+ if _need_intercept:
90
+ [intercept_a, slope_a] = sm.OLS(_y, x_intercept).fit().params
91
+ [intercept_b, slope_b] = sm.OLS(_x, y_intercept).fit().params
92
+ else:
93
+ slope_a = sm.OLS(_y, _x).fit().params
94
+ slope_b = sm.OLS(_x, _y).fit().params
95
+ elif _method_type_1 == "WLS" or _method_type_1 == "weighted least square":
96
+ if _need_intercept:
97
+ [intercept_a, slope_a] = sm.WLS(
98
+ _y, x_intercept, weights=1. / _weight_y).fit().params
99
+ [intercept_b, slope_b] = sm.WLS(
100
+ _x, y_intercept, weights=1. / _weight_x).fit().params
101
+ else:
102
+ slope_a = sm.WLS(_y, _x, weights=1. / _weight_y).fit().params
103
+ slope_b = sm.WLS(_x, _y, weights=1. / _weight_x).fit().params
104
+ elif _method_type_1 == "RLM" or _method_type_1 == "robust linear model":
105
+ if _need_intercept:
106
+ [intercept_a, slope_a] = sm.RLM(_y, x_intercept).fit().params
107
+ [intercept_b, slope_b] = sm.RLM(_x, y_intercept).fit().params
108
+ else:
109
+ slope_a = sm.RLM(_y, _x).fit().params
110
+ slope_b = sm.RLM(_x, _y).fit().params
111
+ else:
112
+ raise ValueError("Invalid literal for _method_type_1: " + _method_type_1)
113
+
114
+ # Compute Regression Type II
115
+ if (_method_type_2 == "reduced major axis" or
116
+ _method_type_2 == "geometric mean"):
117
+ # Transpose coefficients
118
+ if _need_intercept:
119
+ intercept_b = -intercept_b / slope_b
120
+ slope_b = 1 / slope_b
121
+ # Check if correlated in same direction
122
+ if np.sign(slope_a) != np.sign(slope_b):
123
+ raise RuntimeError('Type I regressions of opposite sign.')
124
+ # Compute Reduced Major Axis Slope
125
+ slope = np.sign(slope_a) * np.sqrt(slope_a * slope_b)
126
+ if _need_intercept:
127
+ # Compute Intercept (use mean for least square)
128
+ if _method_type_1 == "OLS" or _method_type_1 == "ordinary least square":
129
+ intercept = np.mean(_y) - slope * np.mean(_x)
130
+ else:
131
+ intercept = np.median(_y) - slope * np.median(_x)
132
+ else:
133
+ intercept = 0
134
+ # Compute r
135
+ r = np.sign(slope_a) * np.sqrt(slope_a / slope_b)
136
+ # Compute predicted values
137
+ predict = slope * _x + intercept
138
+ # Compute standard deviation of the slope and the intercept
139
+ n = len(_x)
140
+ diff = _y - predict
141
+ Sx2 = np.sum(np.multiply(_x, _x))
142
+ den = n * Sx2 - np.sum(_x) ** 2
143
+ s2 = np.sum(np.multiply(diff, diff)) / (n - 2)
144
+ std_slope = np.sqrt(n * s2 / den)
145
+ if _need_intercept:
146
+ std_intercept = np.sqrt(Sx2 * s2 / den)
147
+ else:
148
+ std_intercept = 0
149
+ elif (_method_type_2 == "Pearson's major axis" or
150
+ _method_type_2 == "major axis"):
151
+ if not _need_intercept:
152
+ raise ValueError("Invalid value for _need_intercept: " + str(_need_intercept))
153
+ xm = np.mean(_x)
154
+ ym = np.mean(_y)
155
+ xp = _x - xm
156
+ yp = _y - ym
157
+ sumx2 = np.sum(np.multiply(xp, xp))
158
+ sumy2 = np.sum(np.multiply(yp, yp))
159
+ sumxy = np.sum(np.multiply(xp, yp))
160
+ slope = ((sumy2 - sumx2 + np.sqrt((sumy2 - sumx2)**2 + 4 * sumxy**2)) /
161
+ (2 * sumxy))
162
+ intercept = ym - slope * xm
163
+ # Compute r
164
+ r = sumxy / np.sqrt(sumx2 * sumy2)
165
+ # Compute standard deviation of the slope and the intercept
166
+ n = len(_x)
167
+ std_slope = (slope / r) * np.sqrt((1 - r ** 2) / n)
168
+ sigx = np.sqrt(sumx2 / (n - 1))
169
+ sigy = np.sqrt(sumy2 / (n - 1))
170
+ std_i1 = (sigy - sigx * slope) ** 2
171
+ std_i2 = (2 * sigx * sigy) + ((xm ** 2 * slope * (1 + r)) / r ** 2)
172
+ std_intercept = np.sqrt((std_i1 + ((1 - r) * slope * std_i2)) / n)
173
+ # Compute predicted values
174
+ predict = slope * _x + intercept
175
+ elif _method_type_2 == "arithmetic mean":
176
+ if not _need_intercept:
177
+ raise ValueError("Invalid value for _need_intercept: " + str(_need_intercept))
178
+ n = len(_x)
179
+ sg = np.floor(n / 2)
180
+ # Sort x and y in order of x
181
+ sorted_index = sorted(range(len(_x)), key=lambda i: _x[i])
182
+ x_w = np.array([_x[i] for i in sorted_index])
183
+ y_w = np.array([_y[i] for i in sorted_index])
184
+ x1 = x_w[1:sg + 1]
185
+ x2 = x_w[sg:n]
186
+ y1 = y_w[1:sg + 1]
187
+ y2 = y_w[sg:n]
188
+ x1m = np.mean(x1)
189
+ x2m = np.mean(x2)
190
+ y1m = np.mean(y1)
191
+ y2m = np.mean(y2)
192
+ xm = (x1m + x2m) / 2
193
+ ym = (y1m + y2m) / 2
194
+ slope = (x2m - x1m) / (y2m - y1m)
195
+ intercept = ym - xm * slope
196
+ # r (to verify)
197
+ r = []
198
+ # Compute predicted values
199
+ predict = slope * _x + intercept
200
+ # Compute standard deviation of the slope and the intercept
201
+ std_slope = []
202
+ std_intercept = []
203
+
204
+ # Return all that
205
+ return {"slope": float(slope), "intercept": intercept, "r": r,
206
+ "std_slope": std_slope, "std_intercept": std_intercept,
207
+ "predict": predict}
208
+
209
+
210
+ # if __name__ == '__main__':
211
+ # x = np.linspace(0, 10, 100)
212
+ # # Add random error on y
213
+ # e = np.random.normal(size=len(x))
214
+ # y = x + e
215
+ # results = regress2(x, y, _method_type_2="reduced major axis",
216
+ # _need_intercept=False)
217
+ # # print(results)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sciml
3
- Version: 0.0.9
3
+ Version: 0.0.11
4
4
  Summary: draw and basic calculations/conversions
5
5
  Home-page: https://github.com/soonyenju/sciml
6
6
  Author: Songyan Zhu
@@ -0,0 +1,11 @@
1
+ sciml/__init__.py,sha256=OglTSUWcPOHOFqTObFkbwIpv-ZStEl-iMHRZG9aT2pU,80
2
+ sciml/ccc.py,sha256=uQryOK1y2w3iLPhC2AScXFfbcvc5gMXjRAibYD38GkQ,1245
3
+ sciml/metrics.py,sha256=wLO1bka7GeXEbMT-w3ZZAwFt1TH0A4U3wf1-TkGtDuM,3699
4
+ sciml/models.py,sha256=UD8wOOJTeVrjzHM_OZmfNIGq32l4g4-6ZZoPwodcMhU,30104
5
+ sciml/pipelines.py,sha256=vhWbyoOi7-7F7v65ShMFi0aEBj08JhjHh-JDAxdc65c,8606
6
+ sciml/regress2.py,sha256=GvD3eQPRdzNSvOBhdcKd08NDg56CHlNZSQgwx5aN_bY,9194
7
+ sciml-0.0.11.dist-info/LICENSE,sha256=hcunSTJmVgRcUNOa1rKl8axtY3Jsy2B4wXDYtQsrAt0,1081
8
+ sciml-0.0.11.dist-info/METADATA,sha256=1FCJe3TgTnVW2jQLKfP0CRf2u2ghuJF1P-2dPWOjExg,327
9
+ sciml-0.0.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
10
+ sciml-0.0.11.dist-info/top_level.txt,sha256=dS_7aBCZFKQE3myPy5sh4USjQZCZyGg382-YxUUYcdw,6
11
+ sciml-0.0.11.dist-info/RECORD,,
sciml/utils.py DELETED
@@ -1,46 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- from sklearn.model_selection import ShuffleSplit
4
- from sklearn.model_selection import train_test_split
5
-
6
- # randomly select sites
7
- def random_select(ds, count, num, random_state = 0):
8
- np.random.seed(random_state)
9
- idxs = np.random.choice(np.delete(np.arange(len(ds)), count), num, replace = False)
10
- return np.sort(idxs)
11
-
12
- def split(Xs, ys, return_index = False, test_size = 0.33, random_state = 42):
13
- if return_index:
14
- sss = ShuffleSplit(n_splits=1, test_size = test_size, random_state = random_state)
15
- sss.get_n_splits(Xs, ys)
16
- train_index, test_index = next(sss.split(Xs, ys))
17
- return (train_index, test_index)
18
- else:
19
- X_train, X_test, y_train, y_test = train_test_split(
20
- Xs, ys,
21
- test_size = test_size,
22
- random_state = random_state
23
- )
24
- return (X_train, X_test, y_train, y_test)
25
-
26
- def split_cut(Xs, ys, test_ratio = 0.33):
27
- assert ys.ndim == 2, 'ys must be 2D!'
28
- assert len(Xs) == len(ys), 'Xs and ys should be equally long!'
29
- assert type(Xs) == type(ys), 'Xs and ys should be the same data type!'
30
- if not type(Xs) in [pd.core.frame.DataFrame, np.ndarray]: raise Exception('Only accept numpy ndarray or pandas dataframe')
31
- anchor = int(np.floor(len(ys) * (1 - test_ratio)))
32
-
33
- if type(Xs) == pd.core.frame.DataFrame:
34
- X_train = Xs.iloc[0: anchor, :]
35
- X_test = Xs.iloc[anchor::, :]
36
- y_train = ys.iloc[0: anchor, :]
37
- y_test = ys.iloc[anchor::, :]
38
- else:
39
- X_train = Xs[0: anchor, :]
40
- X_test = Xs[anchor::, :]
41
- y_train = ys[0: anchor, :]
42
- y_test = ys[anchor::, :]
43
-
44
- assert len(X_train) + len(X_test) == len(Xs), 'The sum of train and test lengths must equal to Xs/ys!'
45
-
46
- return (X_train, X_test, y_train, y_test)
@@ -1,9 +0,0 @@
1
- sciml/__init__.py,sha256=wtdlXERN2ik7NT_TQxFdd2gdodBY9vSU1ClSdeJnLm4,59
2
- sciml/models.py,sha256=BjbliW-KNfzbNdGNgM7nBdJ2SF2z21qCoAvug_v0FEg,10574
3
- sciml/pipelines.py,sha256=ReNEkQbdFn04D5G2tbxcA7jdSwACy8SnmZ8bFZI_oqE,15702
4
- sciml/utils.py,sha256=qCdABaTUu3K0R269jI7D_8SO6AqEjphg03CzdxCJR2k,1876
5
- sciml-0.0.9.dist-info/LICENSE,sha256=hcunSTJmVgRcUNOa1rKl8axtY3Jsy2B4wXDYtQsrAt0,1081
6
- sciml-0.0.9.dist-info/METADATA,sha256=S5hG3pP3x4yDPe8AJOKn4R-fIuvL-DL1GSKqGqiImSw,326
7
- sciml-0.0.9.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
8
- sciml-0.0.9.dist-info/top_level.txt,sha256=dS_7aBCZFKQE3myPy5sh4USjQZCZyGg382-YxUUYcdw,6
9
- sciml-0.0.9.dist-info/RECORD,,
File without changes