dragon-ml-toolbox 5.3.0__py3-none-any.whl → 6.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

ml_tools/utilities.py CHANGED
@@ -22,8 +22,9 @@ __all__ = [
22
22
  "threshold_binary_values_batch",
23
23
  "serialize_object",
24
24
  "deserialize_object",
25
- "distribute_datasets_by_target",
25
+ "distribute_dataset_by_target",
26
26
  "train_dataset_orchestrator",
27
+ "train_dataset_yielder"
27
28
  ]
28
29
 
29
30
 
@@ -418,7 +419,7 @@ def deserialize_object(filepath: Union[str,Path], verbose: bool=True, raise_on_e
418
419
  return obj
419
420
 
420
421
 
421
- def distribute_datasets_by_target(
422
+ def distribute_dataset_by_target(
422
423
  df_or_path: Union[pd.DataFrame, str, Path],
423
424
  target_columns: list[str],
424
425
  verbose: bool = False
@@ -493,7 +494,7 @@ def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
493
494
  for df_dir in all_dir_paths:
494
495
  for df_name, df_path in list_csv_paths(df_dir).items():
495
496
  try:
496
- for target_name, df in distribute_datasets_by_target(df_or_path=df_path, target_columns=target_columns, verbose=False):
497
+ for target_name, df in distribute_dataset_by_target(df_or_path=df_path, target_columns=target_columns, verbose=False):
497
498
  if safe_mode:
498
499
  filename = df_dir.name + '_' + target_name + '_' + df_name
499
500
  else:
@@ -507,5 +508,28 @@ def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
507
508
  _LOGGER.info(f"✅ {total_saved} single-target datasets were created.")
508
509
 
509
510
 
511
+ def train_dataset_yielder(
512
+ df: pd.DataFrame,
513
+ target_cols: list[str]
514
+ ) -> Iterator[Tuple[pd.DataFrame, pd.Series, list[str], str]]:
515
+ """
516
+ Yields one tuple at a time:
517
+ (features_dataframe, target_series, feature_names, target_name)
518
+
519
+ Skips any target columns not found in the DataFrame.
520
+ """
521
+ # Determine which target columns actually exist in the DataFrame
522
+ valid_targets = [col for col in target_cols if col in df.columns]
523
+
524
+ # Features = all columns excluding valid target columns
525
+ df_features = df.drop(columns=valid_targets)
526
+ feature_names = df_features.columns.to_list()
527
+
528
+ for target_col in valid_targets:
529
+ df_target = df[target_col]
530
+ yield (df_features, df_target, feature_names, target_col)
531
+
532
+
533
+
510
534
  def info():
511
535
  _script_info(__all__)