autofuzzts 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
autofuzzts/pipeline.py CHANGED
@@ -8,8 +8,11 @@ from autofuzzts.config import get_config
8
8
  from autofuzzts.data import data_loader
9
9
  from autofuzzts.data_validation.validate import validate_and_clean_input
10
10
  from autofuzzts.partition.partition import FuzzyPartition
11
- from autofuzzts.preprocess.preprocess import preprocess_data,preprocess_data_val
12
- from autofuzzts.preprocess.prep_for_model import prepare_for_model,prepare_for_model_val_set
11
+ from autofuzzts.preprocess.preprocess import preprocess_data, preprocess_data_val
12
+ from autofuzzts.preprocess.prep_for_model import (
13
+ prepare_for_model,
14
+ prepare_for_model_val_set,
15
+ )
13
16
  from autofuzzts.models.fuzzy_classifier import FuzzyPipelineModel
14
17
 
15
18
  from sklearn.model_selection import ParameterGrid
@@ -17,7 +20,11 @@ from sklearn.calibration import CalibratedClassifierCV
17
20
 
18
21
 
19
22
  ## Import RMSE and MAE
20
- from sklearn.metrics import root_mean_squared_error, mean_absolute_error,mean_squared_error
23
+ from sklearn.metrics import (
24
+ root_mean_squared_error,
25
+ mean_absolute_error,
26
+ mean_squared_error,
27
+ )
21
28
 
22
29
  # Example custom configuration
23
30
  custom_config = {
@@ -29,100 +36,126 @@ custom_config = {
29
36
  selected_config = get_config(custom_config)
30
37
 
31
38
 
32
-
33
39
  def run_pipeline(datasetet_name: str, config: dict = selected_config):
34
40
  # Load data
35
41
 
36
42
  data = data_loader.load_sample_data(datasetet_name)
37
43
  print(data.head(5))
38
- print('Evaluated configuration is')
44
+ print("Evaluated configuration is")
39
45
  print(config)
40
46
 
41
47
  pass
42
-
43
48
 
44
- def train_val_pipeline(train_set:pd.DataFrame,val_set:pd.DataFrame,config:Dict = selected_config, metric:Literal['rmse','mse','mae'] = 'rmse',
45
- diff_type:Literal['perc','abs'] = 'perc', covariates:list[str] = None) -> float:
49
+
50
+ def train_val_pipeline(
51
+ train_set: pd.DataFrame,
52
+ val_set: pd.DataFrame,
53
+ config: Dict = selected_config,
54
+ metric: Literal["rmse", "mse", "mae"] = "rmse",
55
+ diff_type: Literal["perc", "abs"] = "perc",
56
+ covariates: list[str] = None,
57
+ ) -> float:
46
58
  train_set = validate_and_clean_input(train_set, covariates)
47
59
  val_set = validate_and_clean_input(val_set, covariates)
48
60
 
49
- print('train set length:', len(train_set))
61
+ print("train set length:", len(train_set))
50
62
 
51
- if covariates :
63
+ if covariates:
52
64
  train_covariates = train_set[covariates].copy()
53
- val_covariates = val_set[covariates].copy()
54
-
65
+ val_covariates = val_set[covariates].copy()
55
66
 
56
67
  train_set_preprocessed, scaler_train = preprocess_data(train_set, diff_type)
57
- val_set_preprocessed = preprocess_data_val(df=val_set, df_train=train_set, diff_type=diff_type, scaler=scaler_train)
58
-
59
-
60
- fp = FuzzyPartition(fuzzy_function=config['fuzzy_part_func'], n_clusters=config['n_clusters'], sigma=config['sigma'], scaler=scaler_train, verbosity=config['verbosity'])
68
+ val_set_preprocessed = preprocess_data_val(
69
+ df=val_set, df_train=train_set, diff_type=diff_type, scaler=scaler_train
70
+ )
71
+
72
+ fp = FuzzyPartition(
73
+ fuzzy_function=config["fuzzy_part_func"],
74
+ n_clusters=config["n_clusters"],
75
+ sigma=config["sigma"],
76
+ scaler=scaler_train,
77
+ verbosity=config["verbosity"],
78
+ )
61
79
 
62
80
  # Prepare train and validation fuzzy partitions
63
- X_training = train_set_preprocessed['diff_scaled'].values
64
- X_validation = val_set_preprocessed['diff_scaled'].values
65
-
81
+ X_training = train_set_preprocessed["diff_scaled"].values
82
+ X_validation = val_set_preprocessed["diff_scaled"].values
66
83
 
67
- train_fuzzy_partition,_,_ = fp.fuzzy_partition(X_training)
68
- val_fuzzy_partition, _,center_points_unscaled_test_val = fp.fuzzy_partition(X_validation)
69
-
70
- X_train, y_train = prepare_for_model(train_fuzzy_partition.copy(), config['number_of_lags'])
71
- X_val, y_val = prepare_for_model_val_set(df_val_fp = val_fuzzy_partition.copy(),df_train_fp = train_fuzzy_partition.copy(),n_lags = config['number_of_lags'])
84
+ train_fuzzy_partition, _, _ = fp.fuzzy_partition(X_training)
85
+ val_fuzzy_partition, _, center_points_unscaled_test_val = fp.fuzzy_partition(
86
+ X_validation
87
+ )
88
+
89
+ X_train, y_train = prepare_for_model(
90
+ train_fuzzy_partition.copy(), config["number_of_lags"]
91
+ )
92
+ X_val, y_val = prepare_for_model_val_set(
93
+ df_val_fp=val_fuzzy_partition.copy(),
94
+ df_train_fp=train_fuzzy_partition.copy(),
95
+ n_lags=config["number_of_lags"],
96
+ )
72
97
 
73
-
74
98
  if covariates:
75
99
  X_train = pd.concat([X_train, train_covariates], axis=1)
76
100
  X_val = pd.concat([X_val, val_covariates], axis=1)
77
101
 
78
- model = FuzzyPipelineModel(n_clusters=config['n_clusters'], number_of_lags=config['number_of_lags'], verbosity=config['verbosity'])
79
-
102
+ model = FuzzyPipelineModel(
103
+ n_clusters=config["n_clusters"],
104
+ number_of_lags=config["number_of_lags"],
105
+ verbosity=config["verbosity"],
106
+ )
80
107
 
81
- model.fit(X_train, y_train, model_type='xgb')
108
+ model.fit(X_train, y_train, model_type="xgb")
82
109
 
83
110
  pred_cluster = model.predict(X_val)
84
111
 
85
-
86
112
  ## Convert prediction to crips number using center points of clusters
87
113
  y_val_pred_center_point = [center_points_unscaled_test_val[i] for i in pred_cluster]
88
114
 
89
-
90
-
91
-
92
115
  ## Recalculate percentage difference to actual values
93
- y_val_pred= [None] * len(val_set)
116
+ y_val_pred = [None] * len(val_set)
94
117
 
95
118
  # Set the first prediction using the last known value from the train set
96
- last_train_value = train_set['Y'].iloc[-1] # Assuming `df_train` holds the training data
119
+ last_train_value = train_set["Y"].iloc[
120
+ -1
121
+ ] # Assuming `df_train` holds the training data
97
122
  y_val_pred[0] = last_train_value * (1 + y_val_pred_center_point[0])
98
123
 
99
124
  # Loop to calculate each subsequent prediction based on the actual previous value in `df_test['Y']`
100
125
 
101
- if diff_type == 'perc':
126
+ if diff_type == "perc":
102
127
  for i in range(1, len(val_set)):
103
- prev_Y = val_set['Y'].iloc[i-1] # Use the previous actual value from `df_test`
128
+ prev_Y = val_set["Y"].iloc[
129
+ i - 1
130
+ ] # Use the previous actual value from `df_test`
104
131
  perc_change = y_val_pred_center_point[i]
105
132
  y_val_pred[i] = prev_Y * (1 + perc_change)
106
133
 
107
- elif diff_type == 'abs':
134
+ elif diff_type == "abs":
108
135
  for i in range(1, len(val_set)):
109
- prev_Y = val_set['Y'].iloc[i-1]
136
+ prev_Y = val_set["Y"].iloc[i - 1]
110
137
  y_val_pred[i] = prev_Y + y_val_pred_center_point[i]
111
-
112
-
113
- if metric == 'rmse':
114
- metric_value = root_mean_squared_error(val_set['Y'], y_val_pred)
115
- elif metric == 'mse':
116
- metric_value = root_mean_squared_error(val_set['Y'], y_val_pred)**2
117
- elif metric == 'mae':
118
- metric_value = mean_absolute_error(val_set['Y'], y_val_pred)
138
+
139
+ if metric == "rmse":
140
+ metric_value = root_mean_squared_error(val_set["Y"], y_val_pred)
141
+ elif metric == "mse":
142
+ metric_value = root_mean_squared_error(val_set["Y"], y_val_pred) ** 2
143
+ elif metric == "mae":
144
+ metric_value = mean_absolute_error(val_set["Y"], y_val_pred)
119
145
  else:
120
- raise ValueError(f"Invalid metric {metric}. Please choose one of 'rmse', 'mse', 'mae'")
121
-
146
+ raise ValueError(
147
+ f"Invalid metric {metric}. Please choose one of 'rmse', 'mse', 'mae'"
148
+ )
149
+
122
150
  return metric_value
123
151
 
124
- def train_model(dataset: pd.DataFrame, config: Dict, model_type: Literal['xgb','mlp','tpot'] = 'xgb'):
125
- '''
152
+
153
+ def train_model(
154
+ dataset: pd.DataFrame,
155
+ config: Dict,
156
+ model_type: Literal["xgb", "mlp", "tpot"] = "xgb",
157
+ ):
158
+ """
126
159
  Function to train a model on the dataset provided.
127
160
 
128
161
  Parameters:
@@ -132,52 +165,81 @@ def train_model(dataset: pd.DataFrame, config: Dict, model_type: Literal['xgb','
132
165
  The configuration dictionary for the model.
133
166
  model_type: str
134
167
  The type of model to train. Default is 'xgb'.
135
-
136
- '''
168
+
169
+ """
137
170
  config = get_config(config)
138
171
 
139
172
  df = validate_and_clean_input(dataset)
140
-
141
- df_preprocessed, scaler_train = preprocess_data(df, diff_type='perc')
142
173
 
143
-
144
- fp = FuzzyPartition(fuzzy_function=config['fuzzy_part_func'], n_clusters=config['n_clusters'], sigma=config['sigma'], scaler=scaler_train, verbosity=config['verbosity'])
174
+ df_preprocessed, scaler_train = preprocess_data(df, diff_type="perc")
175
+
176
+ fp = FuzzyPartition(
177
+ fuzzy_function=config["fuzzy_part_func"],
178
+ n_clusters=config["n_clusters"],
179
+ sigma=config["sigma"],
180
+ scaler=scaler_train,
181
+ verbosity=config["verbosity"],
182
+ )
145
183
 
146
- X_training = df_preprocessed['diff_scaled'].values
184
+ X_training = df_preprocessed["diff_scaled"].values
147
185
 
148
- train_fuzzy_partition,_,_ = fp.fuzzy_partition(X_training)
186
+ train_fuzzy_partition, _, _ = fp.fuzzy_partition(X_training)
149
187
 
150
- X_train, y_train = prepare_for_model(train_fuzzy_partition.copy(), config['number_of_lags'])
188
+ X_train, y_train = prepare_for_model(
189
+ train_fuzzy_partition.copy(), config["number_of_lags"]
190
+ )
151
191
 
152
- model_train = FuzzyPipelineModel(n_clusters=config['n_clusters'], number_of_lags=config['number_of_lags'], verbosity=config['verbosity'])
192
+ model_train = FuzzyPipelineModel(
193
+ n_clusters=config["n_clusters"],
194
+ number_of_lags=config["number_of_lags"],
195
+ verbosity=config["verbosity"],
196
+ )
153
197
 
154
198
  model_train.fit(X_train, y_train, model_type=model_type)
155
-
156
-
199
+
157
200
  return model_train, scaler_train
158
201
 
159
- def tune_hyperparameters_bayes(train_set: pd.DataFrame, val_set: pd.DataFrame, n_trials: int = 315, metric: Literal['rmse', 'mse', 'mae'] = 'rmse',
160
- diff_type: Literal['perc', 'abs'] = 'perc', covariates: list[str] = None):
202
+
203
+ def tune_hyperparameters_bayes(
204
+ train_set: pd.DataFrame,
205
+ val_set: pd.DataFrame,
206
+ n_trials: int = 315,
207
+ metric: Literal["rmse", "mse", "mae"] = "rmse",
208
+ diff_type: Literal["perc", "abs"] = "perc",
209
+ covariates: list[str] = None,
210
+ ):
161
211
  def objective(trial):
162
212
  # Define search space based on your specifications
163
213
  config = {
164
- 'n_clusters': trial.suggest_int('n_clusters', 4, 40), # Number of fuzzy sets
165
- 'number_of_lags': trial.suggest_int('number_of_lags', 1, 10), # Number of lags
166
- 'fuzzy_part_func': trial.suggest_categorical('fuzzy_part_func', ['Triangle', 'Cosine', 'Gaussian']), # Partition function type
214
+ "n_clusters": trial.suggest_int(
215
+ "n_clusters", 4, 40
216
+ ), # Number of fuzzy sets
217
+ "number_of_lags": trial.suggest_int(
218
+ "number_of_lags", 1, 10
219
+ ), # Number of lags
220
+ "fuzzy_part_func": trial.suggest_categorical(
221
+ "fuzzy_part_func", ["Triangle", "Cosine", "Gaussian"]
222
+ ), # Partition function type
167
223
  }
168
224
 
169
- if config['fuzzy_part_func'] == 'Gaussian':
170
- config['sigma'] = trial.suggest_float('sigma', 0.1, 4, log=True)
225
+ if config["fuzzy_part_func"] == "Gaussian":
226
+ config["sigma"] = trial.suggest_float("sigma", 0.1, 4, log=True)
171
227
  else:
172
- config['sigma'] = None
228
+ config["sigma"] = None
173
229
 
174
230
  selected_config = get_config(config)
175
231
 
176
232
  # Use train_val_pipeline to evaluate this configuration
177
- metric_value = train_val_pipeline(train_set, val_set, selected_config, metric, diff_type, covariates=covariates)
233
+ metric_value = train_val_pipeline(
234
+ train_set,
235
+ val_set,
236
+ selected_config,
237
+ metric,
238
+ diff_type,
239
+ covariates=covariates,
240
+ )
178
241
  return metric_value
179
242
 
180
-
181
243
  # Create and optimize the Optuna study
182
244
  study = optuna.create_study(direction="minimize")
183
245
  study.optimize(objective, n_trials=n_trials)
@@ -191,26 +253,37 @@ def tune_hyperparameters_bayes(train_set: pd.DataFrame, val_set: pd.DataFrame, n
191
253
  return best_config, best_metric_value
192
254
 
193
255
 
194
- def tune_hyperparameters_bayes_Henon(train_set: pd.DataFrame, val_set: pd.DataFrame, n_trials: int = 315, metric: Literal['rmse', 'mse', 'mae'] = 'rmse', diff_type: Literal['perc', 'abs'] = 'perc'):
256
+ def tune_hyperparameters_bayes_Henon(
257
+ train_set: pd.DataFrame,
258
+ val_set: pd.DataFrame,
259
+ n_trials: int = 315,
260
+ metric: Literal["rmse", "mse", "mae"] = "rmse",
261
+ diff_type: Literal["perc", "abs"] = "perc",
262
+ ):
195
263
  def objective(trial):
196
264
  config = {
197
- 'n_clusters': trial.suggest_int('n_clusters', 2, 29), # Number of fuzzy sets
198
- 'number_of_lags': trial.suggest_int('n_lags', 2, 5), # Number of lags
199
- 'fuzzy_part_func': trial.suggest_categorical('fuzzy_part_func', ['Triangle', 'Cosine', 'Gaussian']),
265
+ "n_clusters": trial.suggest_int(
266
+ "n_clusters", 2, 29
267
+ ), # Number of fuzzy sets
268
+ "number_of_lags": trial.suggest_int("n_lags", 2, 5), # Number of lags
269
+ "fuzzy_part_func": trial.suggest_categorical(
270
+ "fuzzy_part_func", ["Triangle", "Cosine", "Gaussian"]
271
+ ),
200
272
  }
201
273
 
202
- if config['fuzzy_part_func'] == 'Gaussian':
203
- config['sigma'] = trial.suggest_float('sigma', 0.1, 4, log=True)
274
+ if config["fuzzy_part_func"] == "Gaussian":
275
+ config["sigma"] = trial.suggest_float("sigma", 0.1, 4, log=True)
204
276
  else:
205
- config['sigma'] = None
206
-
277
+ config["sigma"] = None
278
+
207
279
  selected_config = get_config(config)
208
280
 
209
281
  # Use train_val_pipeline to evaluate this configuration
210
- metric_value = train_val_pipeline(train_set, val_set, selected_config, metric,diff_type)
282
+ metric_value = train_val_pipeline(
283
+ train_set, val_set, selected_config, metric, diff_type
284
+ )
211
285
  return metric_value
212
286
 
213
-
214
287
  # Create and optimize the Optuna study
215
288
  study = optuna.create_study(direction="minimize")
216
289
  study.optimize(objective, n_trials=n_trials)
@@ -224,25 +297,27 @@ def tune_hyperparameters_bayes_Henon(train_set: pd.DataFrame, val_set: pd.DataFr
224
297
  return best_config, best_metric_value
225
298
 
226
299
 
227
-
228
-
229
-
230
- def tune_hyperparameters_grid(train_set: pd.DataFrame, val_set: pd.DataFrame,n_trials: int = 315, metric: Literal['rmse', 'mse', 'mae'] = 'rmse', diff_type: Literal['perc', 'abs'] = 'perc'):
231
-
300
+ def tune_hyperparameters_grid(
301
+ train_set: pd.DataFrame,
302
+ val_set: pd.DataFrame,
303
+ n_trials: int = 315,
304
+ metric: Literal["rmse", "mse", "mae"] = "rmse",
305
+ diff_type: Literal["perc", "abs"] = "perc",
306
+ ):
232
307
  # Define grid for Gaussian fuzzy function (includes 'sigma')
233
308
  grid_gauss = {
234
- 'n_lags': [1, 3, 5, 7, 9],
235
- 'n_clusters': [4, 6, 8, 10, 12, 14, 16, 18, 20],
236
- 'sigma': [0.1, 0.5, 1, 5, 9],
237
- 'fuzzy_part_func': ['matrix_F_transform_gauss']
309
+ "n_lags": [1, 3, 5, 7, 9],
310
+ "n_clusters": [4, 6, 8, 10, 12, 14, 16, 18, 20],
311
+ "sigma": [0.1, 0.5, 1, 5, 9],
312
+ "fuzzy_part_func": ["matrix_F_transform_gauss"],
238
313
  }
239
314
 
240
315
  # Define grid for non-Gaussian fuzzy functions (excludes 'sigma')
241
316
  grid_non_gauss = {
242
- 'n_lags': [1, 3, 5, 7, 9],
243
- 'n_clusters': [4, 6, 8, 10, 12, 14, 16, 18, 20],
244
- 'sigma': [None], # Set sigma to None for non-Gaussian functions
245
- 'fuzzy_part_func': ['matrix_F_transform_cosine', 'matrix_F_transform_triangle']
317
+ "n_lags": [1, 3, 5, 7, 9],
318
+ "n_clusters": [4, 6, 8, 10, 12, 14, 16, 18, 20],
319
+ "sigma": [None], # Set sigma to None for non-Gaussian functions
320
+ "fuzzy_part_func": ["matrix_F_transform_cosine", "matrix_F_transform_triangle"],
246
321
  }
247
322
 
248
323
  # Combine the grids
@@ -268,84 +343,95 @@ def tune_hyperparameters_grid(train_set: pd.DataFrame, val_set: pd.DataFrame,n_t
268
343
  print(f"Number of evaluations done: {num_evaluations}")
269
344
 
270
345
  # Evaluate the config on the validation set using train_val_pipeline
271
- metric_value = train_val_pipeline(train_set, val_set, selected_config, metric, diff_type)
346
+ metric_value = train_val_pipeline(
347
+ train_set, val_set, selected_config, metric, diff_type
348
+ )
272
349
 
273
350
  # Update best config if this one is better according to the selected metric
274
351
  if metric_value < best_metric_value:
275
352
  best_metric_value = metric_value
276
353
  best_config = config
277
354
 
278
-
279
355
  return best_config, best_metric_value, num_evaluations
280
356
 
281
357
 
282
-
283
-
284
- def train_calib_pred_test(train_set: pd.DataFrame, test_set: pd.DataFrame,
285
- config: Dict,
286
- model_type: Literal['xgb','mlp','tpot'] = 'xgb', number_cv_calib = 5, diff_type: Literal['perc','abs'] = 'perc',
287
- covariates: list[str] = None, exclude_bool:bool = False) -> float:
288
- '''
358
+ def fit_calibrate_predict(
359
+ train_set: pd.DataFrame,
360
+ test_set: pd.DataFrame,
361
+ config: Dict,
362
+ model_type: Literal["xgb", "mlp", "tpot"] = "xgb",
363
+ number_cv_calib=5,
364
+ diff_type: Literal["perc", "abs"] = "perc",
365
+ covariates: list[str] = None,
366
+ exclude_bool: bool = False,
367
+ ) -> float:
368
+ """
289
369
  Aim of this question is to train a model on the train set, calibrate it using the calibration method provided, and predict it on the test set using the metric provided.
290
- '''
370
+ """
291
371
 
292
372
  config = get_config(config)
293
-
373
+
294
374
  # Step 1: Validate and preprocess the input data
295
375
  train_set = validate_and_clean_input(train_set, covariates=covariates)
296
376
  test_set = validate_and_clean_input(test_set, covariates=covariates)
297
377
 
298
- train_set_preprocessed, scaler_train = preprocess_data(train_set, diff_type=diff_type)
299
- test_set_preprocessed = preprocess_data_val(df=test_set, df_train=train_set, diff_type=diff_type, scaler=scaler_train)
378
+ train_set_preprocessed, scaler_train = preprocess_data(
379
+ train_set, diff_type=diff_type
380
+ )
381
+ test_set_preprocessed = preprocess_data_val(
382
+ df=test_set, df_train=train_set, diff_type=diff_type, scaler=scaler_train
383
+ )
300
384
 
301
385
  # Step 2: Fuzzy Partition for train, validation, and test sets
302
- fp = FuzzyPartition(fuzzy_function=config['fuzzy_part_func'],
303
- n_clusters=config['n_clusters'],
304
- sigma=config['sigma'],
305
- scaler=scaler_train,
306
- verbosity=config['verbosity'])
386
+ fp = FuzzyPartition(
387
+ fuzzy_function=config["fuzzy_part_func"],
388
+ n_clusters=config["n_clusters"],
389
+ sigma=config["sigma"],
390
+ scaler=scaler_train,
391
+ verbosity=config["verbosity"],
392
+ )
307
393
 
308
394
  # Prepare train, validation, and test fuzzy partitions
309
- X_training = train_set_preprocessed['diff_scaled'].values
310
- X_test = test_set_preprocessed['diff_scaled'].values
395
+ X_training = train_set_preprocessed["diff_scaled"].values
396
+ X_test = test_set_preprocessed["diff_scaled"].values
311
397
 
312
398
  train_fuzzy_partition, _, _ = fp.fuzzy_partition(X_training)
313
399
  test_fuzzy_partition, _, center_points_unscaled_test = fp.fuzzy_partition(X_test)
314
400
 
315
401
  if exclude_bool:
316
402
  ## Remove column left from train_fuzzy_partition
317
- train_fuzzy_partition = train_fuzzy_partition.drop(columns=['left'])
318
- test_fuzzy_partition = test_fuzzy_partition.drop(columns=['left'])
319
-
320
- train_fuzzy_partition.to_csv('train_fuzzy_partition.csv')
321
- test_fuzzy_partition.to_csv('test_fuzzy_partition.csv')
322
-
323
- print('center_points_unscaled_test:', center_points_unscaled_test)
403
+ train_fuzzy_partition = train_fuzzy_partition.drop(columns=["left"])
404
+ test_fuzzy_partition = test_fuzzy_partition.drop(columns=["left"])
324
405
 
325
406
  # Prepare data for model training, validation, and testing
326
- X_train, y_train = prepare_for_model(train_fuzzy_partition.copy(), config['number_of_lags'])
327
- X_test_final, _ = prepare_for_model_val_set(df_val_fp=test_fuzzy_partition.copy(),
328
- df_train_fp=train_fuzzy_partition.copy(),
329
- n_lags=config['number_of_lags'])
330
-
407
+ X_train, y_train = prepare_for_model(
408
+ train_fuzzy_partition.copy(), config["number_of_lags"]
409
+ )
410
+ X_test_final, _ = prepare_for_model_val_set(
411
+ df_val_fp=test_fuzzy_partition.copy(),
412
+ df_train_fp=train_fuzzy_partition.copy(),
413
+ n_lags=config["number_of_lags"],
414
+ )
415
+
331
416
  if covariates:
332
417
  train_covariates = train_set[covariates].copy()
333
- test_covariates = test_set[covariates].copy()
418
+ test_covariates = test_set[covariates].copy()
334
419
 
335
420
  X_train = pd.concat([X_train, train_covariates], axis=1)
336
421
  X_test_final = pd.concat([X_test_final, test_covariates], axis=1)
337
-
338
422
 
339
423
  # Step 3: Train the model on the combined train and validation set
340
- model = FuzzyPipelineModel(n_clusters=config['n_clusters'],
341
- number_of_lags=config['number_of_lags'],
342
- verbosity=config['verbosity'])
343
-
424
+ model = FuzzyPipelineModel(
425
+ n_clusters=config["n_clusters"],
426
+ number_of_lags=config["number_of_lags"],
427
+ verbosity=config["verbosity"],
428
+ )
429
+
344
430
  model.fit(X_train, y_train, model_type=model_type)
345
431
 
346
432
  try:
347
433
  # Step 4: Calibrate the model using CalibratedClassifierCV
348
- model.calibrate(X_train, y_train, method='sigmoid', cv=number_cv_calib)
434
+ model.calibrate(X_train, y_train, method="sigmoid", cv=number_cv_calib)
349
435
  except:
350
436
  pass
351
437
 
@@ -353,31 +439,31 @@ def train_calib_pred_test(train_set: pd.DataFrame, test_set: pd.DataFrame,
353
439
  y_test_pred_cluster = model.predict(X_test_final)
354
440
 
355
441
  ## Convert prediction to crips number using center points of clusters
356
- y_test_pred_center_point = [center_points_unscaled_test[i] for i in y_test_pred_cluster]
357
-
358
-
359
-
442
+ y_test_pred_center_point = [
443
+ center_points_unscaled_test[i] for i in y_test_pred_cluster
444
+ ]
360
445
 
361
446
  ## Recalculate percentage difference to actual values
362
- y_test_pred= [None] * len(test_set)
447
+ y_test_pred = [None] * len(test_set)
363
448
 
364
449
  # Set the first prediction using the last known value from the train set
365
- last_train_value = train_set['Y'].iloc[-1] # Assuming `df_train` holds the training data
450
+ last_train_value = train_set["Y"].iloc[
451
+ -1
452
+ ] # Assuming `df_train` holds the training data
366
453
  y_test_pred[0] = last_train_value * (1 + y_test_pred_center_point[0])
367
454
 
368
- if diff_type == 'perc':
455
+ if diff_type == "perc":
369
456
  # Loop to calculate each subsequent prediction based on the actual previous value in `df_test['Y']`
370
457
  for i in range(1, len(test_set)):
371
- prev_Y = test_set['Y'].iloc[i-1] # Use the previous actual value from `df_test`
458
+ prev_Y = test_set["Y"].iloc[
459
+ i - 1
460
+ ] # Use the previous actual value from `df_test`
372
461
  perc_change = y_test_pred_center_point[i]
373
462
  y_test_pred[i] = prev_Y * (1 + perc_change)
374
463
 
375
- elif diff_type == 'abs':
464
+ elif diff_type == "abs":
376
465
  for i in range(1, len(test_set)):
377
- prev_Y = test_set['Y'].iloc[i-1]
466
+ prev_Y = test_set["Y"].iloc[i - 1]
378
467
  y_test_pred[i] = prev_Y + y_test_pred_center_point[i]
379
468
 
380
- return y_test_pred_cluster, y_test_pred_center_point,y_test_pred
381
-
382
-
383
-
469
+ return y_test_pred_cluster, y_test_pred_center_point, y_test_pred
@@ -0,0 +1,146 @@
1
+ Metadata-Version: 2.4
2
+ Name: autofuzzts
3
+ Version: 0.1.2
4
+ Summary: 'Time series forecasting using fuzzy logic and AutoML'
5
+ Author-email: Jan Timko <jantimko16@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/jtimko16/AutoFuzzTS
8
+ Project-URL: Repository, https://github.com/jtimko16/AutoFuzzTS
9
+ Requires-Python: >=3.11
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: numpy>=1.26.0
13
+ Requires-Dist: pandas>=2.2.0
14
+ Requires-Dist: scikit-learn>=1.5.0
15
+ Requires-Dist: scipy>=1.15.0
16
+ Requires-Dist: xgboost>=3.0.0
17
+ Requires-Dist: lightgbm>=4.6.0
18
+ Requires-Dist: tpot>=1.0.0
19
+ Requires-Dist: optuna>=4.3.0
20
+ Requires-Dist: matplotlib>=3.10.0
21
+ Requires-Dist: seaborn>=0.13.0
22
+ Requires-Dist: requests>=2.32.0
23
+ Requires-Dist: PyYAML>=6.0.0
24
+ Requires-Dist: joblib>=1.4.0
25
+ Requires-Dist: tqdm>=4.67.0
26
+ Dynamic: license-file
27
+
28
+ # AutoFuzzTS
29
+
30
+ Time series forecasting library using fuzzy logic and automated machine learning.
31
+ Build and evaluate time series models automatically using fuzzy logic and AutoML techniques.
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ pip install autofuzzts
37
+ ```
38
+
39
+ ## 🚀 Quick Start
40
+
41
+ ### Load and prepare your time series data
42
+ ```python
43
+ import pandas as pd
44
+
45
+ # Load dataset into a pandas DataFrame
46
+ data = pd.read_csv("../clean_data/ADBE_yf_hourly_cleaned.csv").head(240)
47
+
48
+ # Select the target column to forecast
49
+ data_column_name = "close_price"
50
+ df = data[[data_column_name]].copy()
51
+
52
+ # Split into train, validation, and test sets
53
+ test_len = len(df) // 5
54
+ val_len = len(df) // 5
55
+ train_len = len(df) - test_len - val_len
56
+
57
+ df_train = df[:train_len]
58
+ df_val = df[train_len:(train_len + val_len)]
59
+ df_test = df[(train_len + val_len):]
60
+ ```
61
+
62
+ ---
63
+
64
+ ### Tune hyperparameters using Bayesian search
65
+ ```python
66
+ from autofuzzts import pipeline
67
+
68
+ # Run Bayesian optimization for fuzzy pipeline configuration
69
+ best_config, best_rmse = pipeline.tune_hyperparameters_bayes(
70
+ train_set=df_train,
71
+ val_set=df_val,
72
+ n_trials=20,
73
+ metric="rmse"
74
+ )
75
+
76
+ print(f"Best configuration: {best_config}")
77
+ ```
78
+
79
+ **Example output:**
80
+ ```
81
+ Best configuration: {'n_clusters': 19, 'number_of_lags': 2, 'fuzzy_part_func': 'Triangle'}
82
+ ```
83
+
84
+ ---
85
+
86
+ ### Train, calibrate, and predict
87
+ ```python
88
+ from autofuzzts import fit_calibrate_predict
89
+
90
+ # Train model, calibrate, and make one-step-ahead predictions
91
+ pred_set, pred_center_points, pred_test = fit_calibrate_predict(
92
+ train_set=df_train,
93
+ test_set=df_test,
94
+ config=best_config,
95
+ model_type="xgb"
96
+ )
97
+ ```
98
+
99
+ This returns:
100
+ - `pred_set`: predicted fuzzy sets
101
+ - `pred_center_points`: corresponding fuzzy center values
102
+ - `pred_test`: crisp numeric predictions (one-step-ahead forecast)
103
+
104
+ ---
105
+
106
+ ## Function Overview
107
+
108
+ ### `fit_calibrate_predict()`
109
+
110
+ ```python
111
+ fit_calibrate_predict(
112
+ train_set: pd.DataFrame,
113
+ test_set: pd.DataFrame,
114
+ config: dict,
115
+ model_type: Literal['xgb', 'mlp', 'tpot'] = 'xgb',
116
+ number_cv_calib: int = 5,
117
+ diff_type: Literal['perc', 'abs'] = 'perc',
118
+ covariates: list[str] | None = None,
119
+ exclude_bool: bool = False
120
+ ) -> float
121
+ ```
122
+
123
+ Trains and calibrates a fuzzy time series model on the training set using
124
+ cross-validation, then predicts on the test set and returns performance metrics.
125
+
126
+ ---
127
+
128
+ ## Description
129
+
130
+ AutoFuzzTS automates the process of fuzzy time series modeling by:
131
+ - building and testing multiple fuzzy pipelines,
132
+ - tuning hyperparameters using Bayesian optimization, and
133
+ - integrating tuned classification models - **XGBoost**, **MLP**, or **TPOT**.
134
+
135
+ This allows for rapid experimentation and selection of optimal configurations
136
+ for forecasting tasks.
137
+
138
+ ---
139
+
140
+
141
+ ---
142
+
143
+ ## 📄 License
144
+
145
+ This project is licensed under the MIT License.
146
+
@@ -1,6 +1,6 @@
1
1
  autofuzzts/__init__.py,sha256=2k_ZeqU7FvqZMFqGm-EYRiV98uxUxmiy5wXygvIobPU,13
2
2
  autofuzzts/config.py,sha256=rzwULHfKKtf5Rdpm8pk-zwuXrkKc0dckF-xIfz1UVcY,392
3
- autofuzzts/pipeline.py,sha256=3rre8nzxEtSQI2_G4STt-AqIACEDLczP0t4YMr0IMW8,15782
3
+ autofuzzts/pipeline.py,sha256=wwaVXBvnoAvd3MDvEaj4xKqPlBWMSyOHSR5TOTP2jTo,16189
4
4
  autofuzzts/utils.py,sha256=lywC_KhHuYgjUmXjj-ay9vZYTKUSxFgWXY2q6EdWf9s,10
5
5
  autofuzzts/data/__init__.py,sha256=2k_ZeqU7FvqZMFqGm-EYRiV98uxUxmiy5wXygvIobPU,13
6
6
  autofuzzts/data/data_loader.py,sha256=VO8V9O3WgXffyktUMSmbGTiXWBJ2kgN5wLqgFgvkE6w,266
@@ -18,8 +18,8 @@ autofuzzts/partition/visualize_partition.py,sha256=F31yovGfosqa-EmtuQdIIuF61XejH
18
18
  autofuzzts/preprocess/__init__.py,sha256=2k_ZeqU7FvqZMFqGm-EYRiV98uxUxmiy5wXygvIobPU,13
19
19
  autofuzzts/preprocess/prep_for_model.py,sha256=mp19PGo_p8YWezSny__qKnuTREhAldSlxCzIutrisGk,2565
20
20
  autofuzzts/preprocess/preprocess.py,sha256=QZ0h4bZslwOrjTUyvPQaXDT_lBlnL8nKdp545Qy3xdk,2786
21
- autofuzzts-0.1.0.dist-info/licenses/LICENSE,sha256=bjnZy7iTBVYeRcAPI9NVlXeQGx62R13_t8xwoLq44Ms,1087
22
- autofuzzts-0.1.0.dist-info/METADATA,sha256=8Kg6RDBTKZBNKA8y3Lwp74Rw8lL9H1Hl1VU43Dm_9n0,1222
23
- autofuzzts-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
- autofuzzts-0.1.0.dist-info/top_level.txt,sha256=YHgbVRUPg-x2WX7FKyJMUAeI9o46c8XFiR_eYKtXIxc,11
25
- autofuzzts-0.1.0.dist-info/RECORD,,
21
+ autofuzzts-0.1.2.dist-info/licenses/LICENSE,sha256=bjnZy7iTBVYeRcAPI9NVlXeQGx62R13_t8xwoLq44Ms,1087
22
+ autofuzzts-0.1.2.dist-info/METADATA,sha256=XuLUJuUcurF9DZE0YLGwFzkdJbIEZMXdJ3MI2KFztNk,3764
23
+ autofuzzts-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
+ autofuzzts-0.1.2.dist-info/top_level.txt,sha256=YHgbVRUPg-x2WX7FKyJMUAeI9o46c8XFiR_eYKtXIxc,11
25
+ autofuzzts-0.1.2.dist-info/RECORD,,
@@ -1,41 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: autofuzzts
3
- Version: 0.1.0
4
- Summary: Automated fuzzy time series forecasting library
5
- Home-page: https://github.com/jtimko16/AutoFuzzTS
6
- Author: Jan Timko
7
- Author-email: Jan Timko <jantimko16@gmail.com>
8
- License: MIT
9
- Project-URL: Homepage, https://github.com/jtimko16/AutoFuzzTS
10
- Project-URL: Bug Tracker, https://github.com/jtimko16/AutoFuzzTS/issues
11
- Requires-Python: >=3.11
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: numpy>=1.26.0
15
- Requires-Dist: pandas>=2.2.0
16
- Requires-Dist: scikit-learn>=1.5.0
17
- Requires-Dist: scipy>=1.15.0
18
- Requires-Dist: xgboost>=3.0.0
19
- Requires-Dist: lightgbm>=4.6.0
20
- Requires-Dist: tpot>=1.0.0
21
- Requires-Dist: optuna>=4.3.0
22
- Requires-Dist: matplotlib>=3.10.0
23
- Requires-Dist: seaborn>=0.13.0
24
- Requires-Dist: requests>=2.32.0
25
- Requires-Dist: PyYAML>=6.0.0
26
- Requires-Dist: joblib>=1.4.0
27
- Requires-Dist: tqdm>=4.67.0
28
- Dynamic: author
29
- Dynamic: home-page
30
- Dynamic: license-file
31
- Dynamic: requires-python
32
-
33
- # AutoFuzzTS
34
-
35
- Automated fuzzy time series forecasting library in Python.
36
- Build and evaluate time series models automatically using fuzzy logic and AutoML techniques.
37
-
38
- ## Installation
39
-
40
- ```bash
41
- pip install autofuzzts