dragon-ml-toolbox 13.3.2__tar.gz → 13.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (46) hide show
  1. {dragon_ml_toolbox-13.3.2/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-13.4.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ML_datasetmaster.py +61 -20
  4. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/pyproject.toml +1 -1
  5. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/LICENSE +0 -0
  6. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/LICENSE-THIRD-PARTY.md +0 -0
  7. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/README.md +0 -0
  8. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  9. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  10. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  11. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  12. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ETL_cleaning.py +0 -0
  13. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ETL_engineering.py +0 -0
  14. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/GUI_tools.py +0 -0
  15. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/MICE_imputation.py +0 -0
  16. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ML_callbacks.py +0 -0
  17. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ML_evaluation.py +0 -0
  18. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ML_evaluation_multi.py +0 -0
  19. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ML_inference.py +0 -0
  20. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ML_models.py +0 -0
  21. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ML_optimization.py +0 -0
  22. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ML_scaler.py +0 -0
  23. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ML_trainer.py +0 -0
  24. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ML_utilities.py +0 -0
  25. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/PSO_optimization.py +0 -0
  26. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/RNN_forecast.py +0 -0
  27. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/SQL.py +0 -0
  28. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/VIF_factor.py +0 -0
  29. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/__init__.py +0 -0
  30. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/_logger.py +0 -0
  31. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/_schema.py +0 -0
  32. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/_script_info.py +0 -0
  33. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/constants.py +0 -0
  34. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/custom_logger.py +0 -0
  35. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/data_exploration.py +0 -0
  36. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ensemble_evaluation.py +0 -0
  37. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ensemble_inference.py +0 -0
  38. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/ensemble_learning.py +0 -0
  39. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/handle_excel.py +0 -0
  40. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/keys.py +0 -0
  41. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/math_utilities.py +0 -0
  42. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/optimization_tools.py +0 -0
  43. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/path_manager.py +0 -0
  44. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/serde.py +0 -0
  45. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/ml_tools/utilities.py +0 -0
  46. {dragon_ml_toolbox-13.3.2 → dragon_ml_toolbox-13.4.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 13.3.2
3
+ Version: 13.4.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 13.3.2
3
+ Version: 13.4.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -126,8 +126,8 @@ class _BaseDatasetMaker(ABC):
126
126
  else:
127
127
  _LOGGER.info("No continuous features listed in schema. Scaler will not be fitted.")
128
128
 
129
- X_train_values = X_train.values
130
- X_test_values = X_test.values
129
+ X_train_values = X_train.to_numpy()
130
+ X_test_values = X_test.to_numpy()
131
131
 
132
132
  # continuous_feature_indices is derived
133
133
  if self.scaler is None and continuous_feature_indices:
@@ -253,26 +253,42 @@ class DatasetMaker(_BaseDatasetMaker):
253
253
  pandas_df: pandas.DataFrame,
254
254
  schema: FeatureSchema,
255
255
  kind: Literal["regression", "classification"],
256
+ scaler: Union[Literal["fit"], Literal["none"], PytorchScaler],
256
257
  test_size: float = 0.2,
257
- random_state: int = 42,
258
- scaler: Optional[PytorchScaler] = None):
258
+ random_state: int = 42):
259
259
  """
260
260
  Args:
261
261
  pandas_df (pandas.DataFrame):
262
262
  The pre-processed input DataFrame containing all columns. (features and single target).
263
263
  schema (FeatureSchema):
264
264
  The definitive schema object from data_exploration.
265
- kind (Literal["regression", "classification"]):
265
+ kind ("regression" | "classification"):
266
266
  The type of ML task. This determines the data type of the labels.
267
+ scaler ("fit" | "none" | PytorchScaler):
268
+ Strategy for data scaling:
269
+ - "fit": Fit a new PytorchScaler on continuous features.
270
+ - "none": Do not scale data (e.g., for TabularTransformer).
271
+ - PytorchScaler instance: Use a pre-fitted scaler to transform data.
267
272
  test_size (float):
268
273
  The proportion of the dataset to allocate to the test split.
269
274
  random_state (int):
270
275
  The seed for the random number of generator for reproducibility.
271
- scaler (PytorchScaler | None):
272
- A pre-fitted PytorchScaler instance, if None a new scaler will be created.
276
+
273
277
  """
274
278
  super().__init__()
275
- self.scaler = scaler
279
+
280
+ _apply_scaling: bool = False
281
+ if scaler == "fit":
282
+ self.scaler = None # To be created
283
+ _apply_scaling = True
284
+ elif scaler == "none":
285
+ self.scaler = None
286
+ elif isinstance(scaler, PytorchScaler):
287
+ self.scaler = scaler # Use the provided one
288
+ _apply_scaling = True
289
+ else:
290
+ _LOGGER.error(f"Invalid 'scaler' argument. Must be 'fit', 'none', or a PytorchScaler instance.")
291
+ raise ValueError()
276
292
 
277
293
  # --- 1. Identify features (from schema) ---
278
294
  self._feature_names = list(schema.feature_names)
@@ -310,9 +326,14 @@ class DatasetMaker(_BaseDatasetMaker):
310
326
  label_dtype = torch.float32 if kind == "regression" else torch.int64
311
327
 
312
328
  # --- 4. Scale (using the schema) ---
313
- X_train_final, X_test_final = self._prepare_scaler(
314
- X_train, y_train, X_test, label_dtype, schema
315
- )
329
+ if _apply_scaling:
330
+ X_train_final, X_test_final = self._prepare_scaler(
331
+ X_train, y_train, X_test, label_dtype, schema
332
+ )
333
+ else:
334
+ _LOGGER.info("Features have not been scaled as specified.")
335
+ X_train_final = X_train.to_numpy()
336
+ X_test_final = X_test.to_numpy()
316
337
 
317
338
  # --- 5. Create Datasets ---
318
339
  self._train_ds = _PytorchDataset(X_train_final, y_train, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
@@ -336,9 +357,9 @@ class DatasetMakerMulti(_BaseDatasetMaker):
336
357
  pandas_df: pandas.DataFrame,
337
358
  target_columns: List[str],
338
359
  schema: FeatureSchema,
360
+ scaler: Union[Literal["fit"], Literal["none"], PytorchScaler],
339
361
  test_size: float = 0.2,
340
- random_state: int = 42,
341
- scaler: Optional[PytorchScaler] = None):
362
+ random_state: int = 42):
342
363
  """
343
364
  Args:
344
365
  pandas_df (pandas.DataFrame):
@@ -348,20 +369,35 @@ class DatasetMakerMulti(_BaseDatasetMaker):
348
369
  List of target column names.
349
370
  schema (FeatureSchema):
350
371
  The definitive schema object from data_exploration.
372
+ scaler ("fit" | "none" | PytorchScaler):
373
+ Strategy for data scaling:
374
+ - "fit": Fit a new PytorchScaler on continuous features.
375
+ - "none": Do not scale data (e.g., for TabularTransformer).
376
+ - PytorchScaler instance: Use a pre-fitted scaler to transform data.
351
377
  test_size (float):
352
378
  The proportion of the dataset to allocate to the test split.
353
379
  random_state (int):
354
380
  The seed for the random number generator for reproducibility.
355
- scaler (PytorchScaler | None):
356
- A pre-fitted PytorchScaler instance.
357
381
 
358
382
  ## Note:
359
383
  For multi-binary classification, the most common PyTorch loss function is nn.BCEWithLogitsLoss.
360
384
  This loss function requires the labels to be torch.float32 which is the same type required for regression (multi-regression) tasks.
361
385
  """
362
386
  super().__init__()
363
- self.scaler = scaler
364
-
387
+
388
+ _apply_scaling: bool = False
389
+ if scaler == "fit":
390
+ self.scaler = None
391
+ _apply_scaling = True
392
+ elif scaler == "none":
393
+ self.scaler = None
394
+ elif isinstance(scaler, PytorchScaler):
395
+ self.scaler = scaler # Use the provided one
396
+ _apply_scaling = True
397
+ else:
398
+ _LOGGER.error(f"Invalid 'scaler' argument. Must be 'fit', 'none', or a PytorchScaler instance.")
399
+ raise ValueError()
400
+
365
401
  # --- 1. Get features and targets from schema/args ---
366
402
  self._feature_names = list(schema.feature_names)
367
403
  self._target_names = target_columns
@@ -403,9 +439,14 @@ class DatasetMakerMulti(_BaseDatasetMaker):
403
439
  label_dtype = torch.float32
404
440
 
405
441
  # --- 4. Scale (using the schema) ---
406
- X_train_final, X_test_final = self._prepare_scaler(
407
- X_train, y_train, X_test, label_dtype, schema
408
- )
442
+ if _apply_scaling:
443
+ X_train_final, X_test_final = self._prepare_scaler(
444
+ X_train, y_train, X_test, label_dtype, schema
445
+ )
446
+ else:
447
+ _LOGGER.info("Features have not been scaled as specified.")
448
+ X_train_final = X_train.to_numpy()
449
+ X_test_final = X_test.to_numpy()
409
450
 
410
451
  # --- 5. Create Datasets ---
411
452
  # _PytorchDataset now correctly handles y_train (a DataFrame)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "13.3.2"
3
+ version = "13.4.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }