dragon-ml-toolbox 3.7.0__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 3.7.0
3
+ Version: 3.8.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,7 +1,7 @@
1
- dragon_ml_toolbox-3.7.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
- dragon_ml_toolbox-3.7.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=6cfpIeQ6D4Mcs10nkogQrkVyq1T7i2qXjjNHFoUMOyE,1892
1
+ dragon_ml_toolbox-3.8.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
+ dragon_ml_toolbox-3.8.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=6cfpIeQ6D4Mcs10nkogQrkVyq1T7i2qXjjNHFoUMOyE,1892
3
3
  ml_tools/ETL_engineering.py,sha256=yeZsW_7zRvEcuMZbM4E2GV1dxwBoWIeJAcFFk2AK0fY,39502
4
- ml_tools/GUI_tools.py,sha256=3kRxok-QCN5S0q1i7yK137Bsr6c2N4M4nIvgPVAuZU0,20371
4
+ ml_tools/GUI_tools.py,sha256=z0CbN8zOC9bSGxOwcf539gSmvXyn-xP5xXHPxWiywMI,17920
5
5
  ml_tools/MICE_imputation.py,sha256=rYqvwQDVtoAJJ0agXWoGzoZEHedWiA6QzcEKEIkiZ08,11388
6
6
  ml_tools/ML_callbacks.py,sha256=OT2zwORLcn49megBEgXsSUxDHoW0Ft0_v7hLEVF3jHM,13063
7
7
  ml_tools/ML_evaluation.py,sha256=oiDV6HItQloUUKCUpltV-2pogubWLBieGpc-VUwosAQ,10106
@@ -18,8 +18,8 @@ ml_tools/datasetmaster.py,sha256=S3PKHNQZ9cyAOck8xQltVLZhaD1gFLfgHFL-aRjz4JU,300
18
18
  ml_tools/ensemble_learning.py,sha256=CDSIygnHaNe92aJ46Fofevd7q6lowTnE98yWuIV3Y6w,37462
19
19
  ml_tools/handle_excel.py,sha256=lwds7rDLlGSCWiWGI7xNg-Z7kxAepogp0lstSFa0590,12949
20
20
  ml_tools/logger.py,sha256=UkbiU9ihBhw9VKyn3rZzisdClWV94EBV6B09_D0iUU0,6026
21
- ml_tools/utilities.py,sha256=0w0vka0Aj9IYOHJ6crWIb6gwpQIJnPyj3v2_dnVxHrs,23138
22
- dragon_ml_toolbox-3.7.0.dist-info/METADATA,sha256=kvgFjd_BRwob7xycC5rbROCkq4C6FVq3J5-VdCXEPrI,3273
23
- dragon_ml_toolbox-3.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
- dragon_ml_toolbox-3.7.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
25
- dragon_ml_toolbox-3.7.0.dist-info/RECORD,,
21
+ ml_tools/utilities.py,sha256=ghEOhN5-eozgfDjJ0r8qOBlnDYahn3jaYzfnitL-GDU,31375
22
+ dragon_ml_toolbox-3.8.0.dist-info/METADATA,sha256=FBhxslY5Lx2HlauipzYsoPovFSdGqlYjgaN0oRVxfLk,3273
23
+ dragon_ml_toolbox-3.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
+ dragon_ml_toolbox-3.8.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
25
+ dragon_ml_toolbox-3.8.0.dist-info/RECORD,,
ml_tools/GUI_tools.py CHANGED
@@ -4,14 +4,13 @@ from typing import Optional, Callable, Any
4
4
  import traceback
5
5
  import FreeSimpleGUI as sg
6
6
  from functools import wraps
7
- from typing import Any, Dict, Tuple, List
7
+ from typing import Any, Dict, Tuple, List, Literal
8
8
  from .utilities import _script_info
9
9
  import numpy as np
10
10
  from .logger import _LOGGER
11
11
 
12
12
 
13
13
  __all__ = [
14
- "PathManager",
15
14
  "ConfigManager",
16
15
  "GUIFactory",
17
16
  "catch_exceptions",
@@ -19,68 +18,6 @@ __all__ = [
19
18
  "update_target_fields"
20
19
  ]
21
20
 
22
-
23
- # --- Path Management ---
24
- class PathManager:
25
- """
26
- Manages paths for a Python application, supporting both development mode and bundled mode via Briefcase.
27
- """
28
- def __init__(self, anchor_file: str):
29
- """
30
- Initializes the PathManager. The package name is automatically inferred
31
- from the parent directory of the anchor file.
32
-
33
- Args:
34
- anchor_file (str): The absolute path to a file within the project's
35
- package, typically `__file__` from a module inside
36
- that package (paths.py).
37
-
38
- Note:
39
- This inference assumes that the anchor file's parent directory
40
- has the same name as the package (e.g., `.../src/my_app/paths.py`).
41
- This is a standard and recommended project structure.
42
- """
43
- resolved_anchor_path = Path(anchor_file).resolve()
44
- self.package_name = resolved_anchor_path.parent.name
45
- self._is_bundled, self._resource_path_func = self._check_bundle_status()
46
-
47
- if self._is_bundled:
48
- # In a Briefcase bundle, resource_path gives an absolute path
49
- # to the resource directory.
50
- self.package_root = self._resource_path_func(self.package_name, "") # type: ignore
51
- else:
52
- # In development mode, the package root is the directory
53
- # containing the anchor file.
54
- self.package_root = resolved_anchor_path.parent
55
-
56
- def _check_bundle_status(self) -> tuple[bool, Optional[Callable]]:
57
- """Checks if the app is running in a bundled environment."""
58
- try:
59
- # This is the function Briefcase provides in a bundled app
60
- from briefcase.platforms.base import resource_path # type: ignore
61
- return True, resource_path
62
- except ImportError:
63
- return False, None
64
-
65
- def get_path(self, relative_path: str | Path) -> Path:
66
- """
67
- Gets the absolute path for a given resource file or directory
68
- relative to the package root.
69
-
70
- Args:
71
- relative_path (str | Path): The path relative to the package root (e.g., 'helpers/icon.png').
72
-
73
- Returns:
74
- Path: The absolute path to the resource.
75
- """
76
- if self._is_bundled:
77
- # Briefcase's resource_path handles resolving the path within the app bundle
78
- return self._resource_path_func(self.package_name, str(relative_path)) # type: ignore
79
- else:
80
- # In dev mode, join package root with the relative path.
81
- return self.package_root / relative_path
82
-
83
-
84
21
  # --- Configuration Management ---
85
22
  class _SectionProxy:
86
23
  """A helper class to represent a section of the .ini file as an object."""
@@ -273,8 +210,8 @@ class GUIFactory:
273
210
  self,
274
211
  data_dict: Dict[str, Tuple[float, float]],
275
212
  is_target: bool = False,
276
- layout_mode: str = 'grid',
277
- columns_per_row: int = 4
213
+ layout_mode: Literal["grid", "row"] = 'grid',
214
+ features_per_column: int = 4
278
215
  ) -> List[List[sg.Column]]:
279
216
  """
280
217
  Generates a layout for continuous features or targets.
@@ -283,7 +220,7 @@ class GUIFactory:
283
220
  data_dict (dict): Keys are feature names, values are (min, max) tuples.
284
221
  is_target (bool): If True, creates disabled inputs for displaying results.
285
222
  layout_mode (str): 'grid' for a multi-row grid layout, or 'row' for a single horizontal row.
286
- columns_per_row (int): Number of feature columns per row when layout_mode is 'grid'.
223
+ features_per_column (int): Number of features per column when `layout_mode` is 'grid'.
287
224
 
288
225
  Returns:
289
226
  A list of lists of sg.Column elements, ready to be used in a window layout.
@@ -294,7 +231,7 @@ class GUIFactory:
294
231
 
295
232
  columns = []
296
233
  for name, (val_min, val_max) in data_dict.items():
297
- key = f"TARGET_{name}" if is_target else name
234
+ key = name
298
235
  default_text = "" if is_target else str(val_max)
299
236
 
300
237
  label = sg.Text(name, font=label_font, background_color=bg_color, key=f"_text_{name}")
@@ -313,6 +250,7 @@ class GUIFactory:
313
250
  range_text = sg.Text(f"Range: {int(val_min)}-{int(val_max)}", font=range_font, background_color=bg_color)
314
251
  layout = [[label], [element], [range_text]]
315
252
 
253
+ # each feature is wrapped as a column element
316
254
  layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
317
255
  columns.append(sg.Column(layout, background_color=bg_color))
318
256
 
@@ -320,13 +258,13 @@ class GUIFactory:
320
258
  return [columns] # A single row containing all columns
321
259
 
322
260
  # Default to 'grid' layout
323
- return [columns[i:i + columns_per_row] for i in range(0, len(columns), columns_per_row)]
261
+ return [columns[i:i + features_per_column] for i in range(0, len(columns), features_per_column)]
324
262
 
325
263
  def generate_combo_layout(
326
264
  self,
327
265
  data_dict: Dict[str, List[Any]],
328
- layout_mode: str = 'grid',
329
- columns_per_row: int = 4
266
+ layout_mode: Literal["grid", "row"] = 'grid',
267
+ features_per_column: int = 4
330
268
  ) -> List[List[sg.Column]]:
331
269
  """
332
270
  Generates a layout for categorical or binary features using Combo boxes.
@@ -334,7 +272,7 @@ class GUIFactory:
334
272
  Args:
335
273
  data_dict (dict): Keys are feature names, values are lists of options.
336
274
  layout_mode (str): 'grid' for a multi-row grid layout, or 'row' for a single horizontal row.
337
- columns_per_row (int): Number of feature columns per row when layout_mode is 'grid'.
275
+ features_per_column (int): Number of features per column when `layout_mode` is 'grid'.
338
276
 
339
277
  Returns:
340
278
  A list of lists of sg.Column elements, ready to be used in a window layout.
@@ -352,13 +290,14 @@ class GUIFactory:
352
290
  )
353
291
  layout = [[label], [element]]
354
292
  layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
293
+ # each feature is wrapped in a Column element
355
294
  columns.append(sg.Column(layout, background_color=bg_color))
356
295
 
357
296
  if layout_mode == 'row':
358
297
  return [columns] # A single row containing all columns
359
298
 
360
299
  # Default to 'grid' layout
361
- return [columns[i:i + columns_per_row] for i in range(0, len(columns), columns_per_row)]
300
+ return [columns[i:i + features_per_column] for i in range(0, len(columns), features_per_column)]
362
301
 
363
302
  # --- Window Creation ---
364
303
  def create_window(self, title: str, layout: List[List[sg.Element]], **kwargs) -> sg.Window:
@@ -421,8 +360,8 @@ def _default_categorical_processor(feature_name: str, chosen_value: Any) -> List
421
360
  return [1.0] if str(chosen_value) == 'True' else [0.0]
422
361
 
423
362
  def prepare_feature_vector(
424
- values: Dict[str, Any],
425
- feature_order: List[str],
363
+ window_values: Dict[str, Any],
364
+ gui_feature_order: List[str],
426
365
  continuous_features: List[str],
427
366
  categorical_features: List[str],
428
367
  categorical_processor: Optional[Callable[[str, Any], List[float]]] = None
@@ -432,8 +371,8 @@ def prepare_feature_vector(
432
371
  This function supports label encoding and one-hot encoding via the processor.
433
372
 
434
373
  Args:
435
- values (dict): The values dictionary from a `window.read()` call.
436
- feature_order (list): A list of all feature names that have a GUI element.
374
+ window_values (dict): The values dictionary from a `window.read()` call.
375
+ gui_feature_order (list): A list of all feature names that have a GUI element.
437
376
  For one-hot encoding, this should be the name of the
438
377
  single GUI element (e.g., 'material_type'), not the
439
378
  expanded feature names (e.g., 'material_is_steel').
@@ -456,8 +395,8 @@ def prepare_feature_vector(
456
395
  cont_set = set(continuous_features)
457
396
  cat_set = set(categorical_features)
458
397
 
459
- for name in feature_order:
460
- chosen_value = values.get(name)
398
+ for name in gui_feature_order:
399
+ chosen_value = window_values.get(name)
461
400
 
462
401
  if chosen_value is None or chosen_value == '':
463
402
  raise ValueError(f"Feature '{name}' is missing a value.")
@@ -482,12 +421,12 @@ def update_target_fields(window: sg.Window, results_dict: Dict[str, Any]):
482
421
 
483
422
  Args:
484
423
  window (sg.Window): The application's window object.
485
- results_dict (dict): A dictionary where keys are target key names (including 'TARGET_' prefix if necessary) and values are the predicted results.
424
+ results_dict (dict): A dictionary where keys are target element-keys and values are the predicted results to update.
486
425
  """
487
426
  for target_name, result in results_dict.items():
488
427
  # Format numbers to 2 decimal places, leave other types as-is
489
428
  display_value = f"{result:.2f}" if isinstance(result, (int, float)) else result
490
- window[target_name].update(display_value)
429
+ window[target_name].update(display_value) # type: ignore
491
430
 
492
431
 
493
432
  def info():
ml_tools/utilities.py CHANGED
@@ -4,9 +4,10 @@ import pandas as pd
4
4
  import polars as pl
5
5
  from pathlib import Path
6
6
  import re
7
- from typing import Literal, Union, Sequence, Optional, Any, Iterator, Tuple
7
+ from typing import Literal, Union, Sequence, Optional, Any, Iterator, Tuple, Callable, List, Dict
8
8
  import joblib
9
9
  from joblib.externals.loky.process_executor import TerminatedWorkerError
10
+ from pprint import pprint
10
11
 
11
12
 
12
13
  # Keep track of available tools
@@ -25,7 +26,8 @@ __all__ = [
25
26
  "serialize_object",
26
27
  "deserialize_object",
27
28
  "distribute_datasets_by_target",
28
- "train_dataset_orchestrator"
29
+ "train_dataset_orchestrator",
30
+ "PathManager"
29
31
  ]
30
32
 
31
33
 
@@ -643,9 +645,211 @@ def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
643
645
  print(f"\n✅ {total_saved} single-target datasets were created.")
644
646
 
645
647
 
648
+ ### Path Manager
649
+ class PathManager:
650
+ """
651
+ Manages and stores a project's file paths, acting as a centralized
652
+ "path database". It supports both development mode and applications
653
+ bundled with Briefcase.
654
+
655
+ Supports python dictionary syntax.
656
+ """
657
+ def __init__(
658
+ self,
659
+ anchor_file: str,
660
+ base_directories: Optional[List[str]] = None
661
+ ):
662
+ """
663
+ The initializer determines the project's root directory and can pre-register
664
+ a list of base directories relative to that root.
665
+
666
+ Args:
667
+ anchor_file (str): The absolute path to a file whose parent directory will be considered the package root and name. Typically, `__file__`.
668
+ base_directories (Optional[List[str]]): A list of directory names
669
+ located at the same level as the anchor file's
670
+ parent directory to register immediately.
671
+ """
672
+ resolved_anchor_path = Path(anchor_file).resolve()
673
+ self._package_name = resolved_anchor_path.parent.name
674
+ self._is_bundled, self._resource_path_func = self._check_bundle_status()
675
+ self._paths: Dict[str, Path] = {}
676
+
677
+ if self._is_bundled:
678
+ # In a bundle, resource_path gives the absolute path to the 'app_packages' dir
679
+ # when given the package name.
680
+ package_root = self._resource_path_func(self._package_name) # type: ignore
681
+ else:
682
+ # In dev mode, the package root is the directory containing the anchor file.
683
+ package_root = resolved_anchor_path.parent
684
+
685
+ # Register the root of the package itself
686
+ self._paths["ROOT"] = package_root
687
+
688
+ # Register all the base directories
689
+ if base_directories:
690
+ for dir_name in base_directories:
691
+ # In dev mode, this is simple. In a bundle, we must resolve
692
+ # each path from the package root.
693
+ if self._is_bundled:
694
+ self._paths[dir_name] = self._resource_path_func(self._package_name, dir_name) # type: ignore
695
+ else:
696
+ self._paths[dir_name] = package_root / dir_name
697
+
698
+ # A helper function to find the briefcase-injected resource function
699
+ def _check_bundle_status(self) -> tuple[bool, Optional[Callable]]:
700
+ """Checks if the app is running in a Briefcase bundle."""
701
+ try:
702
+ # This function is injected by Briefcase into the global scope
703
+ from briefcase.platforms.base import resource_path # type: ignore
704
+ return True, resource_path
705
+ except (ImportError, NameError):
706
+ return False, None
707
+
708
+ def get(self, key: str) -> Path:
709
+ """
710
+ Retrieves a stored path by its key.
711
+
712
+ Args:
713
+ key (str): The key of the path to retrieve.
714
+
715
+ Returns:
716
+ Path: The resolved, absolute Path object.
717
+
718
+ Raises:
719
+ KeyError: If the key is not found in the manager.
720
+ """
721
+ try:
722
+ return self._paths[key]
723
+ except KeyError:
724
+ print(f"❌ Path key '{key}' not found.")
725
+ # Consider suggesting close matches if you want to get fancy
726
+ raise
727
+
728
+ def update(self, new_paths: Dict[str, Union[str, Path]], overwrite: bool = False) -> None:
729
+ """
730
+ Adds new paths or overwrites existing ones in the manager.
731
+
732
+ Args:
733
+ new_paths (Dict[str, Union[str, Path]]): A dictionary where keys are
734
+ the identifiers and values are the
735
+ Path objects or strings to store.
736
+ overwrite (bool): If False (default), raises a KeyError if any
737
+ key in new_paths already exists. If True,
738
+ allows overwriting existing keys.
739
+ """
740
+ if not overwrite:
741
+ for key in new_paths:
742
+ if key in self._paths:
743
+ raise KeyError(
744
+ f"Path key '{key}' already exists in the manager. To replace it, call update() with overwrite=True."
745
+ )
746
+
747
+ # Resolve any string paths to Path objects before storing
748
+ resolved_new_paths = {k: Path(v) for k, v in new_paths.items()}
749
+ self._paths.update(resolved_new_paths)
750
+
751
+ def make_dirs(self, keys: Optional[List[str]] = None, verbose: bool = False) -> None:
752
+ """
753
+ Creates directory structures for registered paths in writable locations.
754
+
755
+ This method identifies paths that are directories (no file suffix) and creates them on the filesystem.
756
+
757
+ In a bundled application, this method will NOT attempt to create directories inside the read-only app package, preventing crashes. It
758
+ will only operate on paths outside of the package (e.g., user data dirs).
759
+
760
+ Args:
761
+ keys (Optional[List[str]]): If provided, only the directories
762
+ corresponding to these keys will be
763
+ created. If None (default), all
764
+ registered directory paths are used.
765
+ verbose (bool): If True, prints a message for each action.
766
+ """
767
+ path_items = []
768
+ if keys:
769
+ for key in keys:
770
+ if key in self._paths:
771
+ path_items.append((key, self._paths[key]))
772
+ elif verbose:
773
+ print(f"⚠️ Key '{key}' not found in PathManager, skipping.")
774
+ else:
775
+ path_items = self._paths.items()
776
+
777
+ # Get the package root to check against.
778
+ package_root = self._paths.get("ROOT")
779
+
780
+ for key, path in path_items:
781
+ if path.suffix: # It's a file, not a directory
782
+ continue
783
+
784
+ # --- THE CRITICAL CHECK ---
785
+ # Determine if the path is inside the main application package.
786
+ is_internal_path = package_root and path.is_relative_to(package_root)
787
+
788
+ if self._is_bundled and is_internal_path:
789
+ if verbose:
790
+ print(f"ℹ️ Skipping internal directory '{key}' in bundled app (read-only).")
791
+ continue
792
+ # -------------------------
793
+
794
+ if verbose:
795
+ print(f"📁 Ensuring directory exists for key '{key}': {path}")
796
+
797
+ path.mkdir(parents=True, exist_ok=True)
798
+
799
+ def status(self) -> None:
800
+ """
801
+ Checks the status of all registered paths on the filesystem and prints a formatted report.
802
+ """
803
+ report = {}
804
+ for key, path in self.items():
805
+ if path.is_dir():
806
+ report[key] = "📁 Directory"
807
+ elif path.is_file():
808
+ report[key] = "📄 File"
809
+ else:
810
+ report[key] = "❌ Not Found"
811
+
812
+ print("\n--- Path Status Report ---")
813
+ pprint(report)
814
+
815
+ def __repr__(self) -> str:
816
+ """Provides a string representation of the stored paths."""
817
+ path_list = "\n".join(f" '{k}': '{v}'" for k, v in self._paths.items())
818
+ return f"PathManager(\n{path_list}\n)"
819
+
820
+ # --- Dictionary-Style Methods ---
821
+ def __getitem__(self, key: str) -> Path:
822
+ """Allows dictionary-style getting, e.g., PM['my_key']"""
823
+ return self.get(key)
824
+
825
+ def __setitem__(self, key: str, value: Union[str, Path]):
826
+ """Allows dictionary-style setting, e.g., PM['my_key'] = path"""
827
+ self.update({key: value}, overwrite=True)
828
+
829
+ def __contains__(self, key: str) -> bool:
830
+ """Allows checking for a key's existence, e.g., if 'my_key' in PM"""
831
+ return key in self._paths
832
+
833
+ def __len__(self) -> int:
834
+ """Allows getting the number of paths, e.g., len(PM)"""
835
+ return len(self._paths)
836
+
837
+ def keys(self):
838
+ """Returns all registered path keys."""
839
+ return self._paths.keys()
840
+
841
+ def values(self):
842
+ """Returns all registered Path objects."""
843
+ return self._paths.values()
844
+
845
+ def items(self):
846
+ """Returns all registered (key, Path) pairs."""
847
+ return self._paths.items()
848
+
849
+
646
850
  class LogKeys:
647
851
  """
648
- Used for ML scripts only
852
+ Used internally for ML scripts.
649
853
 
650
854
  Centralized keys for logging and history.
651
855
  """