dragon-ml-toolbox 5.0.0__py3-none-any.whl → 5.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 5.0.0
3
+ Version: 5.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -268,5 +268,5 @@ After installation, import modules like this:
268
268
 
269
269
  ```python
270
270
  from ml_tools.utilities import serialize_object, deserialize_object
271
- from ml_tools.custom_logger import custom_logger
271
+ from ml_tools import custom_logger
272
272
  ```
@@ -1,10 +1,10 @@
1
- dragon_ml_toolbox-5.0.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
- dragon_ml_toolbox-5.0.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
1
+ dragon_ml_toolbox-5.1.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
+ dragon_ml_toolbox-5.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
3
3
  ml_tools/ETL_engineering.py,sha256=4wwZXi9_U7xfCY70jGBaKniOeZ0m75ppxWpQBd_DmLc,39369
4
4
  ml_tools/GUI_tools.py,sha256=n4ZZ5kEjwK5rkOCFJE41HeLFfjhpJVLUSzk9Kd9Kr_0,45410
5
5
  ml_tools/MICE_imputation.py,sha256=b6ZTs8RedXFifOpuMCzr68xM16mCBVh1Ua6kcGfiVtg,11462
6
6
  ml_tools/ML_callbacks.py,sha256=0a-Rbr0Xp_B1FNopOKBBmuJ4MqazS5JgDiT7wx1dHvE,13161
7
- ml_tools/ML_datasetmaster.py,sha256=_tNC2v98eCQGr3nMW_EFs83TRgRme8Uc7ttg1vosmQU,30106
7
+ ml_tools/ML_datasetmaster.py,sha256=jrRK4fuVhRse4fJm_p3as5YPNXssT4rd6qYR6gJvxls,33327
8
8
  ml_tools/ML_evaluation.py,sha256=4dVqe6JF1Ukmk1sAcY8E5EG1oB1_oy2HXE5OT-pZwCs,10273
9
9
  ml_tools/ML_inference.py,sha256=Fh-X2UQn3AznWBjf-7iPSxwE-EzkGQm1VEIRUAkURmE,5336
10
10
  ml_tools/ML_optimization.py,sha256=u3H-TYGycKDdog-njkMfiAxd8TBtmGeLLFplBPRmmxk,10057
@@ -26,7 +26,7 @@ ml_tools/keys.py,sha256=kK9UF-hek2VcPGFILCKl5geoN6flmMOu7IzhdEA6z5Y,1068
26
26
  ml_tools/optimization_tools.py,sha256=MuT4OG7_r1QqLUti-yYix7QeCpglezD0oe9BDCq0QXk,5086
27
27
  ml_tools/path_manager.py,sha256=Z8e7w3MPqQaN8xmTnKuXZS6CIW59BFwwqGhGc00sdp4,13692
28
28
  ml_tools/utilities.py,sha256=mz-M351DzxWxnYVcLX-7ZQ6c-RGoCV9g4VTS9Qif2Es,18348
29
- dragon_ml_toolbox-5.0.0.dist-info/METADATA,sha256=N9-274zMIAQmEfaNoET6Ydj96huYS9_twKLBnl37bic,6639
30
- dragon_ml_toolbox-5.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
- dragon_ml_toolbox-5.0.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
32
- dragon_ml_toolbox-5.0.0.dist-info/RECORD,,
29
+ dragon_ml_toolbox-5.1.0.dist-info/METADATA,sha256=I9LBhqNEGmKUyWRKUQM-XW1Hq8h1FC740hq_b4kdgQA,6625
30
+ dragon_ml_toolbox-5.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
+ dragon_ml_toolbox-5.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
32
+ dragon_ml_toolbox-5.1.0.dist-info/RECORD,,
@@ -21,6 +21,7 @@ from ._script_info import _script_info
21
21
  # --- public-facing API ---
22
22
  __all__ = [
23
23
  "DatasetMaker",
24
+ "SimpleDatasetMaker",
24
25
  "VisionDatasetMaker",
25
26
  "SequenceMaker",
26
27
  "ResizeAspectFill",
@@ -328,7 +329,7 @@ class DatasetMaker(_BaseMaker):
328
329
 
329
330
  return self.scaler.inverse_transform(data_np)
330
331
 
331
- def get_datasets(self) -> Tuple[_PytorchDataset, _PytorchDataset]:
332
+ def get_datasets(self) -> Tuple[Dataset, Dataset]:
332
333
  """Primary method to get the final PyTorch Datasets."""
333
334
  if not self._is_split:
334
335
  raise RuntimeError("Data has not been split yet. Call .split_data() or .process() first.")
@@ -370,6 +371,95 @@ class DatasetMaker(_BaseMaker):
370
371
  return pandas.DataFrame(full_tensor.numpy(), columns=new_columns, index=cat_df.index)
371
372
 
372
373
 
374
+ # Streamlined DatasetMaker version
375
+ class SimpleDatasetMaker:
376
+ """
377
+ A simplified dataset maker for pre-processed, numerical pandas DataFrames.
378
+
379
+ This class takes a DataFrame, automatically splits it into training and
380
+ testing sets, and converts them into PyTorch Datasets. It assumes the
381
+ target variable is the last column.
382
+
383
+ Args:
384
+ pandas_df (pandas.DataFrame): The pre-processed input DataFrame with numerical data.
385
+ test_size (float): The proportion of the dataset to allocate to the
386
+ test split.
387
+ random_state (int): The seed for the random number generator for
388
+ reproducibility.
389
+ id (str | None): An optional object identifier.
390
+ """
391
+ def __init__(self, pandas_df: pandas.DataFrame, test_size: float = 0.2, random_state: int = 42, id: Optional[str]=None):
392
+ """
393
+ Attributes:
394
+ `train_dataset` -> PyTorch Dataset
395
+ `test_dataset` -> PyTorch Dataset
396
+ `feature_names` -> list[str]
397
+ `target_name` -> str
398
+ `id` -> str | None
399
+ """
400
+
401
+ if not isinstance(pandas_df, pandas.DataFrame):
402
+ raise TypeError("Input must be a pandas.DataFrame.")
403
+
404
+ #set id
405
+ self._id = id
406
+
407
+ # 1. Identify features and target
408
+ features = pandas_df.iloc[:, :-1]
409
+ target = pandas_df.iloc[:, -1]
410
+
411
+ self._feature_names = features.columns.tolist()
412
+ self._target_name = target.name
413
+
414
+ # 2. Split the data
415
+ X_train, X_test, y_train, y_test = train_test_split(
416
+ features, target, test_size=test_size, random_state=random_state
417
+ )
418
+
419
+ self._X_train_shape = X_train.shape
420
+ self._X_test_shape = X_test.shape
421
+ self._y_train_shape = y_train.shape
422
+ self._y_test_shape = y_test.shape
423
+
424
+ # 3. Convert to PyTorch Datasets
425
+ self._train_ds = _PytorchDataset(X_train.values, y_train.values)
426
+ self._test_ds = _PytorchDataset(X_test.values, y_test.values)
427
+
428
+ @property
429
+ def train_dataset(self) -> Dataset:
430
+ """Returns the training PyTorch dataset."""
431
+ return self._train_ds
432
+
433
+ @property
434
+ def test_dataset(self) -> Dataset:
435
+ """Returns the testing PyTorch dataset."""
436
+ return self._test_ds
437
+
438
+ @property
439
+ def feature_names(self) -> list[str]:
440
+ """Returns the list of feature column names."""
441
+ return self._feature_names
442
+
443
+ @property
444
+ def target_name(self) -> str:
445
+ """Returns the name of the target column."""
446
+ return str(self._target_name)
447
+
448
+ @property
449
+ def id(self) -> Optional[str]:
450
+ """Returns teh object identifier if any."""
451
+ return self._id
452
+
453
+ def dataframes_info(self) -> None:
454
+ """Prints the shape information of the split pandas DataFrames."""
455
+ print("--- Original DataFrame Shapes After Split ---")
456
+ print(f" X_train shape: {self._X_train_shape}")
457
+ print(f" y_train shape: {self._y_train_shape}\n")
458
+ print(f" X_test shape: {self._X_test_shape}")
459
+ print(f" y_test shape: {self._y_test_shape}")
460
+ print("-------------------------------------------")
461
+
462
+
373
463
  # --- VisionDatasetMaker ---
374
464
  class VisionDatasetMaker(_BaseMaker):
375
465
  """