dragon-ml-toolbox 19.13.0__py3-none-any.whl → 20.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
  2. dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
  3. ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
  4. ml_tools/ETL_cleaning/_basic_clean.py +351 -0
  5. ml_tools/ETL_cleaning/_clean_tools.py +128 -0
  6. ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
  7. ml_tools/ETL_cleaning/_imprimir.py +13 -0
  8. ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
  9. ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
  10. ml_tools/ETL_engineering/_imprimir.py +24 -0
  11. ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
  12. ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
  13. ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
  14. ml_tools/GUI_tools/_imprimir.py +12 -0
  15. ml_tools/IO_tools/_IO_loggers.py +235 -0
  16. ml_tools/IO_tools/_IO_save_load.py +151 -0
  17. ml_tools/IO_tools/_IO_utils.py +140 -0
  18. ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
  19. ml_tools/IO_tools/_imprimir.py +14 -0
  20. ml_tools/MICE/_MICE_imputation.py +132 -0
  21. ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
  22. ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
  23. ml_tools/MICE/_imprimir.py +11 -0
  24. ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
  25. ml_tools/ML_callbacks/_base.py +101 -0
  26. ml_tools/ML_callbacks/_checkpoint.py +232 -0
  27. ml_tools/ML_callbacks/_early_stop.py +208 -0
  28. ml_tools/ML_callbacks/_imprimir.py +12 -0
  29. ml_tools/ML_callbacks/_scheduler.py +197 -0
  30. ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
  31. ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
  32. ml_tools/ML_chain/_dragon_chain.py +140 -0
  33. ml_tools/ML_chain/_imprimir.py +11 -0
  34. ml_tools/ML_configuration/__init__.py +90 -0
  35. ml_tools/ML_configuration/_base_model_config.py +69 -0
  36. ml_tools/ML_configuration/_finalize.py +366 -0
  37. ml_tools/ML_configuration/_imprimir.py +47 -0
  38. ml_tools/ML_configuration/_metrics.py +593 -0
  39. ml_tools/ML_configuration/_models.py +206 -0
  40. ml_tools/ML_configuration/_training.py +124 -0
  41. ml_tools/ML_datasetmaster/__init__.py +28 -0
  42. ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
  43. ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
  44. ml_tools/ML_datasetmaster/_imprimir.py +15 -0
  45. ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
  46. ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
  47. ml_tools/ML_evaluation/__init__.py +53 -0
  48. ml_tools/ML_evaluation/_classification.py +629 -0
  49. ml_tools/ML_evaluation/_feature_importance.py +409 -0
  50. ml_tools/ML_evaluation/_imprimir.py +25 -0
  51. ml_tools/ML_evaluation/_loss.py +92 -0
  52. ml_tools/ML_evaluation/_regression.py +273 -0
  53. ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
  54. ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
  55. ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
  56. ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
  57. ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
  58. ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
  59. ml_tools/ML_finalize_handler/__init__.py +10 -0
  60. ml_tools/ML_finalize_handler/_imprimir.py +8 -0
  61. ml_tools/ML_inference/__init__.py +22 -0
  62. ml_tools/ML_inference/_base_inference.py +166 -0
  63. ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
  64. ml_tools/ML_inference/_dragon_inference.py +332 -0
  65. ml_tools/ML_inference/_imprimir.py +11 -0
  66. ml_tools/ML_inference/_multi_inference.py +180 -0
  67. ml_tools/ML_inference_sequence/__init__.py +10 -0
  68. ml_tools/ML_inference_sequence/_imprimir.py +8 -0
  69. ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
  70. ml_tools/ML_inference_vision/__init__.py +10 -0
  71. ml_tools/ML_inference_vision/_imprimir.py +8 -0
  72. ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
  73. ml_tools/ML_models/__init__.py +32 -0
  74. ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
  75. ml_tools/ML_models/_base_mlp_attention.py +198 -0
  76. ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
  77. ml_tools/ML_models/_dragon_tabular.py +248 -0
  78. ml_tools/ML_models/_imprimir.py +18 -0
  79. ml_tools/ML_models/_mlp_attention.py +134 -0
  80. ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
  81. ml_tools/ML_models_sequence/__init__.py +10 -0
  82. ml_tools/ML_models_sequence/_imprimir.py +8 -0
  83. ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
  84. ml_tools/ML_models_vision/__init__.py +29 -0
  85. ml_tools/ML_models_vision/_base_wrapper.py +254 -0
  86. ml_tools/ML_models_vision/_image_classification.py +182 -0
  87. ml_tools/ML_models_vision/_image_segmentation.py +108 -0
  88. ml_tools/ML_models_vision/_imprimir.py +16 -0
  89. ml_tools/ML_models_vision/_object_detection.py +135 -0
  90. ml_tools/ML_optimization/__init__.py +21 -0
  91. ml_tools/ML_optimization/_imprimir.py +13 -0
  92. ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
  93. ml_tools/ML_optimization/_single_dragon.py +203 -0
  94. ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
  95. ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
  96. ml_tools/ML_scaler/__init__.py +10 -0
  97. ml_tools/ML_scaler/_imprimir.py +8 -0
  98. ml_tools/ML_trainer/__init__.py +20 -0
  99. ml_tools/ML_trainer/_base_trainer.py +297 -0
  100. ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
  101. ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
  102. ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
  103. ml_tools/ML_trainer/_imprimir.py +10 -0
  104. ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
  105. ml_tools/ML_utilities/_artifact_finder.py +382 -0
  106. ml_tools/ML_utilities/_imprimir.py +16 -0
  107. ml_tools/ML_utilities/_inspection.py +325 -0
  108. ml_tools/ML_utilities/_train_tools.py +205 -0
  109. ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
  110. ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
  111. ml_tools/ML_vision_transformers/_imprimir.py +14 -0
  112. ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
  113. ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
  114. ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
  115. ml_tools/PSO_optimization/_imprimir.py +10 -0
  116. ml_tools/SQL/__init__.py +7 -0
  117. ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
  118. ml_tools/SQL/_imprimir.py +8 -0
  119. ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
  120. ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
  121. ml_tools/VIF/_imprimir.py +10 -0
  122. ml_tools/_core/__init__.py +7 -1
  123. ml_tools/_core/_logger.py +8 -18
  124. ml_tools/_core/_schema_load_ops.py +43 -0
  125. ml_tools/_core/_script_info.py +2 -2
  126. ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
  127. ml_tools/data_exploration/_analysis.py +214 -0
  128. ml_tools/data_exploration/_cleaning.py +566 -0
  129. ml_tools/data_exploration/_features.py +583 -0
  130. ml_tools/data_exploration/_imprimir.py +32 -0
  131. ml_tools/data_exploration/_plotting.py +487 -0
  132. ml_tools/data_exploration/_schema_ops.py +176 -0
  133. ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
  134. ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
  135. ml_tools/ensemble_evaluation/_imprimir.py +14 -0
  136. ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
  137. ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
  138. ml_tools/ensemble_inference/_imprimir.py +9 -0
  139. ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
  140. ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
  141. ml_tools/ensemble_learning/_imprimir.py +10 -0
  142. ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
  143. ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
  144. ml_tools/excel_handler/_imprimir.py +13 -0
  145. ml_tools/{keys.py → keys/__init__.py} +4 -1
  146. ml_tools/keys/_imprimir.py +11 -0
  147. ml_tools/{_core → keys}/_keys.py +2 -0
  148. ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
  149. ml_tools/math_utilities/_imprimir.py +11 -0
  150. ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
  151. ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
  152. ml_tools/optimization_tools/_imprimir.py +13 -0
  153. ml_tools/optimization_tools/_optimization_bounds.py +236 -0
  154. ml_tools/optimization_tools/_optimization_plots.py +218 -0
  155. ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
  156. ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
  157. ml_tools/path_manager/_imprimir.py +15 -0
  158. ml_tools/path_manager/_path_tools.py +346 -0
  159. ml_tools/plot_fonts/__init__.py +8 -0
  160. ml_tools/plot_fonts/_imprimir.py +8 -0
  161. ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
  162. ml_tools/schema/__init__.py +15 -0
  163. ml_tools/schema/_feature_schema.py +223 -0
  164. ml_tools/schema/_gui_schema.py +191 -0
  165. ml_tools/schema/_imprimir.py +10 -0
  166. ml_tools/{serde.py → serde/__init__.py} +4 -2
  167. ml_tools/serde/_imprimir.py +10 -0
  168. ml_tools/{_core → serde}/_serde.py +3 -8
  169. ml_tools/{utilities.py → utilities/__init__.py} +11 -6
  170. ml_tools/utilities/_imprimir.py +18 -0
  171. ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
  172. ml_tools/utilities/_utility_tools.py +192 -0
  173. dragon_ml_toolbox-19.13.0.dist-info/RECORD +0 -111
  174. ml_tools/ML_chaining_inference.py +0 -8
  175. ml_tools/ML_configuration.py +0 -86
  176. ml_tools/ML_configuration_pytab.py +0 -14
  177. ml_tools/ML_datasetmaster.py +0 -10
  178. ml_tools/ML_evaluation.py +0 -16
  179. ml_tools/ML_evaluation_multi.py +0 -12
  180. ml_tools/ML_finalize_handler.py +0 -8
  181. ml_tools/ML_inference.py +0 -12
  182. ml_tools/ML_models.py +0 -14
  183. ml_tools/ML_models_advanced.py +0 -14
  184. ml_tools/ML_models_pytab.py +0 -14
  185. ml_tools/ML_optimization.py +0 -14
  186. ml_tools/ML_optimization_pareto.py +0 -8
  187. ml_tools/ML_scaler.py +0 -8
  188. ml_tools/ML_sequence_datasetmaster.py +0 -8
  189. ml_tools/ML_sequence_evaluation.py +0 -10
  190. ml_tools/ML_sequence_inference.py +0 -8
  191. ml_tools/ML_sequence_models.py +0 -8
  192. ml_tools/ML_trainer.py +0 -12
  193. ml_tools/ML_vision_datasetmaster.py +0 -12
  194. ml_tools/ML_vision_evaluation.py +0 -10
  195. ml_tools/ML_vision_inference.py +0 -8
  196. ml_tools/ML_vision_models.py +0 -18
  197. ml_tools/SQL.py +0 -8
  198. ml_tools/_core/_ETL_cleaning.py +0 -694
  199. ml_tools/_core/_IO_tools.py +0 -498
  200. ml_tools/_core/_ML_callbacks.py +0 -702
  201. ml_tools/_core/_ML_configuration.py +0 -1332
  202. ml_tools/_core/_ML_configuration_pytab.py +0 -102
  203. ml_tools/_core/_ML_evaluation.py +0 -867
  204. ml_tools/_core/_ML_evaluation_multi.py +0 -544
  205. ml_tools/_core/_ML_inference.py +0 -646
  206. ml_tools/_core/_ML_models.py +0 -668
  207. ml_tools/_core/_ML_models_pytab.py +0 -693
  208. ml_tools/_core/_ML_trainer.py +0 -2323
  209. ml_tools/_core/_ML_utilities.py +0 -886
  210. ml_tools/_core/_ML_vision_models.py +0 -644
  211. ml_tools/_core/_data_exploration.py +0 -1901
  212. ml_tools/_core/_optimization_tools.py +0 -493
  213. ml_tools/_core/_schema.py +0 -359
  214. ml_tools/plot_fonts.py +0 -8
  215. ml_tools/schema.py +0 -12
  216. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
  217. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
  218. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  219. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
ml_tools/_core/_schema.py DELETED
@@ -1,359 +0,0 @@
1
- from typing import NamedTuple, Tuple, Optional, Dict, Union, Any
2
- from pathlib import Path
3
- import json
4
-
5
- from ._IO_tools import save_list_strings
6
- from ._keys import DatasetKeys, SchemaKeys
7
- from ._logger import get_logger
8
- from ._path_manager import make_fullpath
9
- from ._script_info import _script_info
10
-
11
-
12
- _LOGGER = get_logger("FeatureSchema")
13
-
14
-
15
- __all__ = [
16
- "FeatureSchema",
17
- "create_guischema_template",
18
- "make_multibinary_groups",
19
- ]
20
-
21
-
22
- class FeatureSchema(NamedTuple):
23
- """Holds the final, definitive schema for the model pipeline."""
24
-
25
- # The final, ordered list of all feature names
26
- feature_names: Tuple[str, ...]
27
-
28
- # List of all continuous feature names
29
- continuous_feature_names: Tuple[str, ...]
30
-
31
- # List of all categorical feature names
32
- categorical_feature_names: Tuple[str, ...]
33
-
34
- # Map of {column_index: cardinality} for categorical features
35
- categorical_index_map: Optional[Dict[int, int]]
36
-
37
- # Map string-to-int category values (e.g., {'color': {'red': 0, 'blue': 1}})
38
- categorical_mappings: Optional[Dict[str, Dict[str, int]]]
39
-
40
- def to_json(self, directory: Union[str, Path], verbose: bool = True) -> None:
41
- """
42
- Saves the schema as 'FeatureSchema.json' to the provided directory.
43
-
44
- Handles conversion of Tuple->List and IntKeys->StrKeys automatically.
45
- """
46
- # validate path
47
- dir_path = make_fullpath(directory, enforce="directory")
48
- file_path = dir_path / SchemaKeys.SCHEMA_FILENAME
49
-
50
- try:
51
- # Convert named tuple to dict
52
- data = self._asdict()
53
-
54
- # Write to disk
55
- with open(file_path, 'w', encoding='utf-8') as f:
56
- json.dump(data, f, indent=4)
57
-
58
- if verbose:
59
- _LOGGER.info(f"FeatureSchema saved to '{dir_path.name}/{SchemaKeys.SCHEMA_FILENAME}'")
60
-
61
- except (IOError, TypeError) as e:
62
- _LOGGER.error(f"Failed to save FeatureSchema to JSON: {e}")
63
- raise e
64
-
65
- @classmethod
66
- def from_json(cls, directory: Union[str, Path], verbose: bool = True) -> 'FeatureSchema':
67
- """
68
- Loads a 'FeatureSchema.json' from the provided directory.
69
-
70
- Restores Tuples from Lists and Integer Keys from Strings.
71
- """
72
- # validate directory
73
- dir_path = make_fullpath(directory, enforce="directory")
74
- file_path = dir_path / SchemaKeys.SCHEMA_FILENAME
75
-
76
- if not file_path.exists():
77
- _LOGGER.error(f"FeatureSchema file not found at '{directory}'")
78
- raise FileNotFoundError()
79
-
80
- try:
81
- with open(file_path, 'r', encoding='utf-8') as f:
82
- data: Dict[str, Any] = json.load(f)
83
-
84
- # 1. Restore Tuples (JSON loads them as lists)
85
- feature_names = tuple(data.get("feature_names", []))
86
- cont_names = tuple(data.get("continuous_feature_names", []))
87
- cat_names = tuple(data.get("categorical_feature_names", []))
88
-
89
- # 2. Restore Integer Keys for categorical_index_map
90
- raw_map = data.get("categorical_index_map")
91
- cat_index_map: Optional[Dict[int, int]] = None
92
- if raw_map is not None:
93
- cat_index_map = {int(k): v for k, v in raw_map.items()}
94
-
95
- # 3. Mappings (keys are strings, no conversion needed)
96
- cat_mappings = data.get("categorical_mappings")
97
-
98
- schema = cls(
99
- feature_names=feature_names,
100
- continuous_feature_names=cont_names,
101
- categorical_feature_names=cat_names,
102
- categorical_index_map=cat_index_map,
103
- categorical_mappings=cat_mappings
104
- )
105
-
106
- if verbose:
107
- _LOGGER.info(f"FeatureSchema loaded from '{dir_path.name}'")
108
-
109
- return schema
110
-
111
- except (IOError, ValueError, KeyError) as e:
112
- _LOGGER.error(f"Failed to load FeatureSchema from '{dir_path}': {e}")
113
- raise e
114
-
115
- def _save_helper(self, artifact: Tuple[str, ...], directory: Union[str,Path], filename: str, verbose: bool):
116
- to_save = list(artifact)
117
-
118
- # empty check
119
- if not to_save:
120
- _LOGGER.warning(f"Skipping save for '{filename}': The feature list is empty.")
121
- return
122
-
123
- save_list_strings(list_strings=to_save,
124
- directory=directory,
125
- filename=filename,
126
- verbose=verbose)
127
-
128
- def save_all_features(self, directory: Union[str,Path], verbose: bool=True):
129
- """
130
- Saves all feature names to a text file.
131
-
132
- Args:
133
- directory: The directory where the file will be saved.
134
- verbose: If True, prints a confirmation message upon saving.
135
- """
136
- self._save_helper(artifact=self.feature_names,
137
- directory=directory,
138
- filename=DatasetKeys.FEATURE_NAMES,
139
- verbose=verbose)
140
-
141
- def save_continuous_features(self, directory: Union[str,Path], verbose: bool=True):
142
- """
143
- Saves continuous feature names to a text file.
144
-
145
- Args:
146
- directory: The directory where the file will be saved.
147
- verbose: If True, prints a confirmation message upon saving.
148
- """
149
- self._save_helper(artifact=self.continuous_feature_names,
150
- directory=directory,
151
- filename=DatasetKeys.CONTINUOUS_NAMES,
152
- verbose=verbose)
153
-
154
- def save_categorical_features(self, directory: Union[str,Path], verbose: bool=True):
155
- """
156
- Saves categorical feature names to a text file.
157
-
158
- Args:
159
- directory: The directory where the file will be saved.
160
- verbose: If True, prints a confirmation message upon saving.
161
- """
162
- self._save_helper(artifact=self.categorical_feature_names,
163
- directory=directory,
164
- filename=DatasetKeys.CATEGORICAL_NAMES,
165
- verbose=verbose)
166
-
167
- def save_artifacts(self, directory: Union[str,Path]):
168
- """
169
- Saves feature names, categorical feature names, continuous feature names to separate text files.
170
- """
171
- self.save_all_features(directory=directory, verbose=True)
172
- self.save_continuous_features(directory=directory, verbose=True)
173
- self.save_categorical_features(directory=directory, verbose=True)
174
-
175
- def __repr__(self) -> str:
176
- """Returns a concise representation of the schema's contents."""
177
- total = len(self.feature_names)
178
- cont = len(self.continuous_feature_names)
179
- cat = len(self.categorical_feature_names)
180
- index_map = self.categorical_index_map is not None
181
- cat_map = self.categorical_mappings is not None
182
- return (
183
- f"FeatureSchema(total={total}, continuous={cont}, categorical={cat}, index_map={index_map}, categorical_map={cat_map})"
184
- )
185
-
186
-
187
- def create_guischema_template(
188
- directory: Union[str, Path],
189
- feature_schema: FeatureSchema,
190
- targets: list[str],
191
- continuous_ranges: Dict[str, Tuple[float, float]],
192
- multibinary_groups: Union[Dict[str, list[str]], None] = None,
193
- ) -> None:
194
- """
195
- Generates a 'GUISchema.json' boilerplate file based on the Model FeatureSchema.
196
-
197
- The generated JSON contains entries with empty "gui_name" fields for manual mapping.
198
- Leave 'gui_name' empty to use auto-formatted Title Case.
199
-
200
- Args:
201
- directory (str | Path): Where to save the json file.
202
- feature_schema (FeatureSchema): The source FeatureSchema object.
203
- targets (list[str]): List of target names as used in the ML pipeline.
204
- continuous_ranges (Dict[str, Tuple[float, float]]): Dict {model_name: (min, max)}.
205
- multibinary_groups (Dict[str, list[str]] | None): Optional Dict {GUI_Group_Name: [model_col_1, model_col_2]}.
206
- Used to group binary columns into a single multi-select list.
207
- """
208
- dir_path = make_fullpath(directory, make=True, enforce="directory")
209
-
210
- schema = feature_schema
211
- output_data: Dict[str, Any] = {
212
- SchemaKeys.TARGETS: [],
213
- SchemaKeys.CONTINUOUS: [],
214
- SchemaKeys.BINARY: [],
215
- SchemaKeys.MULTIBINARY: {}, # Structure: GroupName: [{model: x, gui: ""}]
216
- SchemaKeys.CATEGORICAL: []
217
- }
218
-
219
- # Track handled columns to prevent duplicates in binary/categorical
220
- handled_cols = set()
221
-
222
- # 1. Targets
223
- for t in targets:
224
- output_data[SchemaKeys.TARGETS].append({
225
- SchemaKeys.MODEL_NAME: t,
226
- SchemaKeys.GUI_NAME: "" # User to fill
227
- })
228
-
229
- # 2. Continuous
230
- # Validate ranges against schema
231
- schema_cont_set = set(schema.continuous_feature_names)
232
- for name, min_max in continuous_ranges.items():
233
- if name in schema_cont_set:
234
- output_data[SchemaKeys.CONTINUOUS].append({
235
- SchemaKeys.MODEL_NAME: name,
236
- SchemaKeys.GUI_NAME: "",
237
- SchemaKeys.MIN_VALUE: min_max[0],
238
- SchemaKeys.MAX_VALUE: min_max[1]
239
- })
240
- handled_cols.add(name)
241
- else:
242
- _LOGGER.warning(f"GUISchema: Provided range for '{name}', but it is not in FeatureSchema continuous list.")
243
-
244
- # 3. Multi-Binary Groups
245
- if multibinary_groups:
246
- # Check for validity within the generic feature list
247
- all_feats = set(schema.feature_names)
248
-
249
- for group_name, cols in multibinary_groups.items():
250
- # Validation: Groups cannot be empty
251
- if not cols:
252
- # warn and skip
253
- _LOGGER.warning(f"GUISchema: Multi-binary group '{group_name}' is empty and will be skipped.")
254
- continue
255
-
256
- group_options = []
257
- for col in cols:
258
- # Validation: Columns must exist in schema
259
- if col not in all_feats:
260
- # warn and skip
261
- _LOGGER.warning(f"GUISchema: Multi-binary column '{col}' in group '{group_name}' not found in FeatureSchema. Skipping.")
262
- continue
263
- # else, add to group
264
- group_options.append({
265
- SchemaKeys.MODEL_NAME: col,
266
- SchemaKeys.GUI_NAME: ""
267
- })
268
- handled_cols.add(col)
269
- output_data[SchemaKeys.MULTIBINARY][group_name] = group_options
270
-
271
- # 4. Binary & Categorical (Derived from Schema Mappings)
272
- if schema.categorical_mappings:
273
- for name, mapping in schema.categorical_mappings.items():
274
- if name in handled_cols:
275
- continue
276
-
277
- # Heuristic: Cardinality 2 = Binary, >2 = Categorical
278
- if len(mapping) == 2:
279
- output_data[SchemaKeys.BINARY].append({
280
- SchemaKeys.MODEL_NAME: name,
281
- SchemaKeys.GUI_NAME: "" # User to fill
282
- })
283
- else:
284
- # For categorical, we also allow renaming the specific options
285
- options_with_names = {k: "" for k in mapping.keys()} # Default gui_option = model_option
286
-
287
- output_data[SchemaKeys.CATEGORICAL].append({
288
- SchemaKeys.MODEL_NAME: name,
289
- SchemaKeys.GUI_NAME: "", # User to fill feature name
290
- SchemaKeys.MAPPING: mapping, # Original mapping
291
- SchemaKeys.OPTIONAL_LABELS: options_with_names # User can edit keys here
292
- })
293
-
294
- save_path = dir_path / SchemaKeys.GUI_SCHEMA_FILENAME
295
- try:
296
- with open(save_path, 'w', encoding='utf-8') as f:
297
- json.dump(output_data, f, indent=4)
298
- _LOGGER.info(f"GUISchema template generated at: '{dir_path.name}/{SchemaKeys.GUI_SCHEMA_FILENAME}'")
299
- except IOError as e:
300
- _LOGGER.error(f"Failed to save GUISchema template: {e}")
301
-
302
-
303
- def make_multibinary_groups(
304
- feature_schema: FeatureSchema,
305
- group_prefixes: list[str],
306
- separator: str = "_"
307
- ) -> Dict[str, list[str]]:
308
- """
309
- Helper to automate creating the multibinary_groups dictionary for create_guischema_template.
310
-
311
- Iterates through provided prefixes and groups categorical features that contain
312
- the pattern '{prefix}{separator}'.
313
-
314
- Args:
315
- feature_schema: The loaded FeatureSchema containing categorical feature names.
316
- group_prefixes: A list of group prefixes to search for.
317
- separator: The separator used in Multibinary Encoding (default '_').
318
-
319
- Returns:
320
- Dict[str, list[str]]: A dictionary mapping group names to their found column names.
321
- """
322
- groups: Dict[str, list[str]] = {}
323
-
324
- # check that categorical features exist
325
- if not feature_schema.categorical_feature_names:
326
- _LOGGER.error("FeatureSchema has no categorical features defined.")
327
- raise ValueError()
328
-
329
- # validate separator
330
- if not separator or not isinstance(separator, str):
331
- _LOGGER.error(f"Invalid separator '{separator}' of type {type(separator)}.")
332
- raise ValueError()
333
-
334
- for prefix in group_prefixes:
335
- if not prefix or not isinstance(prefix, str):
336
- _LOGGER.error(f"Invalid prefix '{prefix}' of type {type(prefix)}.")
337
- raise ValueError()
338
-
339
- search_term = f"{prefix}{separator}"
340
-
341
- # check if substring exists in the column name. must begin with prefix+separator
342
- cols = [
343
- name for name in feature_schema.categorical_feature_names
344
- if name.startswith(search_term)
345
- ]
346
-
347
- if cols:
348
- groups[prefix] = cols
349
- else:
350
- _LOGGER.warning(f"No columns found for group '{prefix}' using search term '{search_term}'")
351
-
352
- # log resulting groups
353
- _LOGGER.info(f"Multibinary groups created: {list(groups.keys())}")
354
-
355
- return groups
356
-
357
-
358
- def info():
359
- _script_info(__all__)
ml_tools/plot_fonts.py DELETED
@@ -1,8 +0,0 @@
1
- from ._core._plot_fonts import (
2
- configure_cjk_fonts,
3
- info
4
- )
5
-
6
- __all__ = [
7
- "configure_cjk_fonts"
8
- ]
ml_tools/schema.py DELETED
@@ -1,12 +0,0 @@
1
- from ._core._schema import (
2
- FeatureSchema,
3
- create_guischema_template,
4
- make_multibinary_groups,
5
- info
6
- )
7
-
8
- __all__ = [
9
- "FeatureSchema",
10
- "create_guischema_template",
11
- "make_multibinary_groups",
12
- ]