dragon-ml-toolbox 4.0.0__tar.gz → 4.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {dragon_ml_toolbox-4.0.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-4.2.0}/PKG-INFO +47 -8
  2. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/README.md +35 -5
  3. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0/dragon_ml_toolbox.egg-info}/PKG-INFO +47 -8
  4. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +1 -0
  5. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/dragon_ml_toolbox.egg-info/requires.txt +14 -2
  6. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/PSO_optimization.py +110 -51
  7. dragon_ml_toolbox-4.2.0/ml_tools/SQL.py +272 -0
  8. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/path_manager.py +13 -5
  9. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/pyproject.toml +21 -4
  10. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/LICENSE +0 -0
  11. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/LICENSE-THIRD-PARTY.md +0 -0
  12. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  13. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  14. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ETL_engineering.py +0 -0
  15. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/GUI_tools.py +0 -0
  16. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/MICE_imputation.py +0 -0
  17. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ML_callbacks.py +0 -0
  18. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ML_evaluation.py +0 -0
  19. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ML_inference.py +0 -0
  20. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ML_trainer.py +0 -0
  21. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/RNN_forecast.py +0 -0
  22. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/VIF_factor.py +0 -0
  23. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/__init__.py +0 -0
  24. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/_logger.py +0 -0
  25. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/_pytorch_models.py +0 -0
  26. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/_script_info.py +0 -0
  27. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/custom_logger.py +0 -0
  28. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/data_exploration.py +0 -0
  29. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/datasetmaster.py +0 -0
  30. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ensemble_inference.py +0 -0
  31. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ensemble_learning.py +0 -0
  32. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/handle_excel.py +0 -0
  33. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/keys.py +0 -0
  34. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/utilities.py +0 -0
  35. {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 4.0.0
3
+ Version: 4.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -12,6 +12,11 @@ Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  License-File: LICENSE-THIRD-PARTY.md
15
+ Provides-Extra: base
16
+ Requires-Dist: pandas; extra == "base"
17
+ Requires-Dist: numpy; extra == "base"
18
+ Requires-Dist: polars; extra == "base"
19
+ Requires-Dist: joblib; extra == "base"
15
20
  Provides-Extra: ml
16
21
  Requires-Dist: numpy; extra == "ml"
17
22
  Requires-Dist: pandas; extra == "ml"
@@ -57,16 +62,20 @@ Provides-Extra: gui-boost
57
62
  Requires-Dist: numpy; extra == "gui-boost"
58
63
  Requires-Dist: joblib; extra == "gui-boost"
59
64
  Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-boost"
60
- Requires-Dist: pyinstaller; extra == "gui-boost"
61
65
  Requires-Dist: xgboost; extra == "gui-boost"
62
66
  Requires-Dist: lightgbm; extra == "gui-boost"
63
67
  Provides-Extra: gui-torch
64
68
  Requires-Dist: numpy; extra == "gui-torch"
65
69
  Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-torch"
66
- Requires-Dist: pyinstaller; extra == "gui-torch"
67
70
  Provides-Extra: plot
68
71
  Requires-Dist: matplotlib; extra == "plot"
69
72
  Requires-Dist: seaborn; extra == "plot"
73
+ Provides-Extra: pyinstaller
74
+ Requires-Dist: pyinstaller; extra == "pyinstaller"
75
+ Provides-Extra: nuitka
76
+ Requires-Dist: nuitka; extra == "nuitka"
77
+ Requires-Dist: zstandard; extra == "nuitka"
78
+ Requires-Dist: ordered-set; extra == "nuitka"
70
79
  Dynamic: license-file
71
80
 
72
81
  # dragon-ml-toolbox
@@ -107,7 +116,6 @@ Install from the conda-forge channel:
107
116
  ```bash
108
117
  conda install -c conda-forge dragon-ml-toolbox
109
118
  ```
110
- **Note:** This version is outdated or broken due to dependency incompatibilities. Use PyPi instead.
111
119
 
112
120
  ## Modular Installation
113
121
 
@@ -142,6 +150,7 @@ ML_trainer
142
150
  ML_inference
143
151
  path_manager
144
152
  PSO_optimization
153
+ SQL
145
154
  RNN_forecast
146
155
  utilities
147
156
  ```
@@ -156,7 +165,7 @@ pip install "dragon-ml-toolbox[mice]"
156
165
 
157
166
  #### Modules:
158
167
 
159
- ```bash
168
+ ```Bash
160
169
  custom_logger
161
170
  MICE_imputation
162
171
  VIF_factor
@@ -174,7 +183,7 @@ pip install "dragon-ml-toolbox[excel]"
174
183
 
175
184
  #### Modules:
176
185
 
177
- ```bash
186
+ ```Bash
178
187
  custom_logger
179
188
  handle_excel
180
189
  path_manager
@@ -194,7 +203,7 @@ pip install "dragon-ml-toolbox[gui-boost,plot]"
194
203
 
195
204
  #### Modules:
196
205
 
197
- ```bash
206
+ ```Bash
198
207
  GUI_tools
199
208
  ensemble_inference
200
209
  path_manager
@@ -214,12 +223,42 @@ pip install "dragon-ml-toolbox[gui-torch,plot]"
214
223
 
215
224
  #### Modules:
216
225
 
217
- ```bash
226
+ ```Bash
218
227
  GUI_tools
219
228
  ML_inference
220
229
  path_manager
221
230
  ```
222
231
 
232
+ ### 🎫 Base Tools [base]
233
+
234
+ General purpose functions and classes.
235
+
236
+ ```Bash
237
+ pip install "dragon-ml-toolbox[base]"
238
+ ```
239
+
240
+ #### Modules:
241
+
242
+ ```Bash
243
+ ETL_Engineering
244
+ custom_logger
245
+ SQL
246
+ utilities
247
+ path_manager
248
+ ```
249
+
250
+ ### ⚒️ APP bundlers
251
+
252
+ Choose one if needed.
253
+
254
+ ```Bash
255
+ pip install "dragon-ml-toolbox[pyinstaller]"
256
+ ```
257
+
258
+ ```Bash
259
+ pip install "dragon-ml-toolbox[nuitka]"
260
+ ```
261
+
223
262
  ## Usage
224
263
 
225
264
  After installation, import modules like this:
@@ -36,7 +36,6 @@ Install from the conda-forge channel:
36
36
  ```bash
37
37
  conda install -c conda-forge dragon-ml-toolbox
38
38
  ```
39
- **Note:** This version is outdated or broken due to dependency incompatibilities. Use PyPi instead.
40
39
 
41
40
  ## Modular Installation
42
41
 
@@ -71,6 +70,7 @@ ML_trainer
71
70
  ML_inference
72
71
  path_manager
73
72
  PSO_optimization
73
+ SQL
74
74
  RNN_forecast
75
75
  utilities
76
76
  ```
@@ -85,7 +85,7 @@ pip install "dragon-ml-toolbox[mice]"
85
85
 
86
86
  #### Modules:
87
87
 
88
- ```bash
88
+ ```Bash
89
89
  custom_logger
90
90
  MICE_imputation
91
91
  VIF_factor
@@ -103,7 +103,7 @@ pip install "dragon-ml-toolbox[excel]"
103
103
 
104
104
  #### Modules:
105
105
 
106
- ```bash
106
+ ```Bash
107
107
  custom_logger
108
108
  handle_excel
109
109
  path_manager
@@ -123,7 +123,7 @@ pip install "dragon-ml-toolbox[gui-boost,plot]"
123
123
 
124
124
  #### Modules:
125
125
 
126
- ```bash
126
+ ```Bash
127
127
  GUI_tools
128
128
  ensemble_inference
129
129
  path_manager
@@ -143,12 +143,42 @@ pip install "dragon-ml-toolbox[gui-torch,plot]"
143
143
 
144
144
  #### Modules:
145
145
 
146
- ```bash
146
+ ```Bash
147
147
  GUI_tools
148
148
  ML_inference
149
149
  path_manager
150
150
  ```
151
151
 
152
+ ### 🎫 Base Tools [base]
153
+
154
+ General purpose functions and classes.
155
+
156
+ ```Bash
157
+ pip install "dragon-ml-toolbox[base]"
158
+ ```
159
+
160
+ #### Modules:
161
+
162
+ ```Bash
163
+ ETL_Engineering
164
+ custom_logger
165
+ SQL
166
+ utilities
167
+ path_manager
168
+ ```
169
+
170
+ ### ⚒️ APP bundlers
171
+
172
+ Choose one if needed.
173
+
174
+ ```Bash
175
+ pip install "dragon-ml-toolbox[pyinstaller]"
176
+ ```
177
+
178
+ ```Bash
179
+ pip install "dragon-ml-toolbox[nuitka]"
180
+ ```
181
+
152
182
  ## Usage
153
183
 
154
184
  After installation, import modules like this:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 4.0.0
3
+ Version: 4.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -12,6 +12,11 @@ Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  License-File: LICENSE-THIRD-PARTY.md
15
+ Provides-Extra: base
16
+ Requires-Dist: pandas; extra == "base"
17
+ Requires-Dist: numpy; extra == "base"
18
+ Requires-Dist: polars; extra == "base"
19
+ Requires-Dist: joblib; extra == "base"
15
20
  Provides-Extra: ml
16
21
  Requires-Dist: numpy; extra == "ml"
17
22
  Requires-Dist: pandas; extra == "ml"
@@ -57,16 +62,20 @@ Provides-Extra: gui-boost
57
62
  Requires-Dist: numpy; extra == "gui-boost"
58
63
  Requires-Dist: joblib; extra == "gui-boost"
59
64
  Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-boost"
60
- Requires-Dist: pyinstaller; extra == "gui-boost"
61
65
  Requires-Dist: xgboost; extra == "gui-boost"
62
66
  Requires-Dist: lightgbm; extra == "gui-boost"
63
67
  Provides-Extra: gui-torch
64
68
  Requires-Dist: numpy; extra == "gui-torch"
65
69
  Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-torch"
66
- Requires-Dist: pyinstaller; extra == "gui-torch"
67
70
  Provides-Extra: plot
68
71
  Requires-Dist: matplotlib; extra == "plot"
69
72
  Requires-Dist: seaborn; extra == "plot"
73
+ Provides-Extra: pyinstaller
74
+ Requires-Dist: pyinstaller; extra == "pyinstaller"
75
+ Provides-Extra: nuitka
76
+ Requires-Dist: nuitka; extra == "nuitka"
77
+ Requires-Dist: zstandard; extra == "nuitka"
78
+ Requires-Dist: ordered-set; extra == "nuitka"
70
79
  Dynamic: license-file
71
80
 
72
81
  # dragon-ml-toolbox
@@ -107,7 +116,6 @@ Install from the conda-forge channel:
107
116
  ```bash
108
117
  conda install -c conda-forge dragon-ml-toolbox
109
118
  ```
110
- **Note:** This version is outdated or broken due to dependency incompatibilities. Use PyPi instead.
111
119
 
112
120
  ## Modular Installation
113
121
 
@@ -142,6 +150,7 @@ ML_trainer
142
150
  ML_inference
143
151
  path_manager
144
152
  PSO_optimization
153
+ SQL
145
154
  RNN_forecast
146
155
  utilities
147
156
  ```
@@ -156,7 +165,7 @@ pip install "dragon-ml-toolbox[mice]"
156
165
 
157
166
  #### Modules:
158
167
 
159
- ```bash
168
+ ```Bash
160
169
  custom_logger
161
170
  MICE_imputation
162
171
  VIF_factor
@@ -174,7 +183,7 @@ pip install "dragon-ml-toolbox[excel]"
174
183
 
175
184
  #### Modules:
176
185
 
177
- ```bash
186
+ ```Bash
178
187
  custom_logger
179
188
  handle_excel
180
189
  path_manager
@@ -194,7 +203,7 @@ pip install "dragon-ml-toolbox[gui-boost,plot]"
194
203
 
195
204
  #### Modules:
196
205
 
197
- ```bash
206
+ ```Bash
198
207
  GUI_tools
199
208
  ensemble_inference
200
209
  path_manager
@@ -214,12 +223,42 @@ pip install "dragon-ml-toolbox[gui-torch,plot]"
214
223
 
215
224
  #### Modules:
216
225
 
217
- ```bash
226
+ ```Bash
218
227
  GUI_tools
219
228
  ML_inference
220
229
  path_manager
221
230
  ```
222
231
 
232
+ ### 🎫 Base Tools [base]
233
+
234
+ General purpose functions and classes.
235
+
236
+ ```Bash
237
+ pip install "dragon-ml-toolbox[base]"
238
+ ```
239
+
240
+ #### Modules:
241
+
242
+ ```Bash
243
+ ETL_Engineering
244
+ custom_logger
245
+ SQL
246
+ utilities
247
+ path_manager
248
+ ```
249
+
250
+ ### ⚒️ APP bundlers
251
+
252
+ Choose one if needed.
253
+
254
+ ```Bash
255
+ pip install "dragon-ml-toolbox[pyinstaller]"
256
+ ```
257
+
258
+ ```Bash
259
+ pip install "dragon-ml-toolbox[nuitka]"
260
+ ```
261
+
223
262
  ## Usage
224
263
 
225
264
  After installation, import modules like this:
@@ -16,6 +16,7 @@ ml_tools/ML_inference.py
16
16
  ml_tools/ML_trainer.py
17
17
  ml_tools/PSO_optimization.py
18
18
  ml_tools/RNN_forecast.py
19
+ ml_tools/SQL.py
19
20
  ml_tools/VIF_factor.py
20
21
  ml_tools/__init__.py
21
22
  ml_tools/_logger.py
@@ -19,6 +19,12 @@ shap
19
19
  tqdm
20
20
  Pillow
21
21
 
22
+ [base]
23
+ pandas
24
+ numpy
25
+ polars
26
+ joblib
27
+
22
28
  [excel]
23
29
  pandas
24
30
  openpyxl
@@ -32,14 +38,12 @@ ipywidgets
32
38
  numpy
33
39
  joblib
34
40
  FreeSimpleGUI>=5.2
35
- pyinstaller
36
41
  xgboost
37
42
  lightgbm
38
43
 
39
44
  [gui-torch]
40
45
  numpy
41
46
  FreeSimpleGUI>=5.2
42
- pyinstaller
43
47
 
44
48
  [mice]
45
49
  numpy<2.0
@@ -53,10 +57,18 @@ statsmodels
53
57
  lightgbm<=4.5.0
54
58
  shap
55
59
 
60
+ [nuitka]
61
+ nuitka
62
+ zstandard
63
+ ordered-set
64
+
56
65
  [plot]
57
66
  matplotlib
58
67
  seaborn
59
68
 
69
+ [pyinstaller]
70
+ pyinstaller
71
+
60
72
  [pytorch]
61
73
  torch
62
74
  torchvision
@@ -9,7 +9,6 @@ from .utilities import (
9
9
  threshold_binary_values,
10
10
  threshold_binary_values_batch,
11
11
  deserialize_object,
12
- save_dataframe,
13
12
  yield_dataframes_from_dir)
14
13
  from .path_manager import sanitize_filename, make_fullpath, list_files_by_extension, list_csv_paths
15
14
  import torch
@@ -19,6 +18,8 @@ import seaborn as sns
19
18
  from ._logger import _LOGGER
20
19
  from .keys import ModelSaveKeys
21
20
  from ._script_info import _script_info
21
+ from .SQL import DatabaseManager
22
+ from contextlib import nullcontext
22
23
 
23
24
 
24
25
  __all__ = [
@@ -182,45 +183,73 @@ def _set_feature_names(size: int, names: Union[list[str], None]):
182
183
  else:
183
184
  assert len(names) == size, "List with feature names do not match the number of features"
184
185
  return names
185
-
186
186
 
187
- def _save_results(*dicts, save_dir: Union[str,Path], target_name: str):
188
- combined_dict = dict()
189
- for single_dict in dicts:
190
- combined_dict.update(single_dict)
187
+
188
+ def _save_result(result_dict: dict,
189
+ save_format: Literal['csv', 'sqlite', 'both'],
190
+ csv_path: Path,
191
+ db_manager: Optional[DatabaseManager] = None,
192
+ db_table_name: Optional[str] = None):
193
+ """
194
+ Handles saving a single result to CSV, SQLite, or both.
195
+ """
196
+ # Save to CSV
197
+ if save_format in ['csv', 'both']:
198
+ _save_or_append_to_csv(result_dict, csv_path)
199
+
200
+ # Save to SQLite
201
+ if save_format in ['sqlite', 'both']:
202
+ if db_manager and db_table_name:
203
+ db_manager.insert_row(db_table_name, result_dict)
204
+ else:
205
+ _LOGGER.warning("SQLite saving requested but db_manager or table_name not provided.")
206
+
207
+
208
+ def _save_or_append_to_csv(data_dict: dict, save_path: Path):
209
+ """
210
+ Saves or appends a dictionary of data as a single row to a CSV file.
211
+
212
+ If the file doesn't exist, it creates it and writes the header.
213
+ If the file exists, it appends the new data without the header.
214
+ """
215
+ df_row = pd.DataFrame([data_dict])
191
216
 
192
- df = pd.DataFrame(combined_dict)
217
+ file_exists = save_path.exists()
193
218
 
194
- save_dataframe(df=df, save_dir=save_dir, filename=f"Optimization_{target_name}")
219
+ df_row.to_csv(
220
+ save_path,
221
+ mode='a', # 'a' for append mode
222
+ index=False, # Don't write the DataFrame index
223
+ header=not file_exists # Write header only if file does NOT exist
224
+ )
195
225
 
196
226
 
197
- def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, random_state: int):
198
- """Helper for a single PSO run."""
227
+ def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, random_state: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DatabaseManager], db_table_name: str):
228
+ """Helper for a single PSO run that also handles saving."""
199
229
  pso_args.update({"seed": random_state})
200
230
 
201
231
  best_features, best_target, *_ = _pso(**pso_args)
202
232
 
203
- # Flip best_target if maximization was used
204
233
  if objective_function.task == "maximization":
205
234
  best_target = -best_target
206
235
 
207
- # Threshold binary features
208
236
  binary_number = objective_function.binary_features
209
237
  best_features_threshold = threshold_binary_values(best_features, binary_number)
210
238
 
211
- # Name features and target
212
239
  best_features_named = {name: value for name, value in zip(feature_names, best_features_threshold)}
213
240
  best_target_named = {target_name: best_target}
214
241
 
242
+ # Save the result using the new helper
243
+ combined_dict = {**best_features_named, **best_target_named}
244
+ _save_result(combined_dict, save_format, csv_path, db_manager, db_table_name)
245
+
215
246
  return best_features_named, best_target_named
216
247
 
217
248
 
218
- def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, repetitions: int):
219
- """Helper for post-hoc PSO analysis."""
220
- all_best_targets = []
221
- all_best_features = [[] for _ in range(len(feature_names))]
222
-
223
- for _ in range(repetitions):
249
+ def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, repetitions: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DatabaseManager], db_table_name: str):
250
+ """Helper for post-hoc analysis that saves results incrementally."""
251
+ progress = trange(repetitions, desc="Post-Hoc PSO", unit="run")
252
+ for _ in progress:
224
253
  best_features, best_target, *_ = _pso(**pso_args)
225
254
 
226
255
  if objective_function.task == "maximization":
@@ -229,28 +258,25 @@ def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, fea
229
258
  binary_number = objective_function.binary_features
230
259
  best_features_threshold = threshold_binary_values(best_features, binary_number)
231
260
 
232
- for i, best_feature in enumerate(best_features_threshold):
233
- all_best_features[i].append(best_feature)
234
- all_best_targets.append(best_target)
235
-
236
- # Name features and target
237
- all_best_features_named = {name: lst for name, lst in zip(feature_names, all_best_features)}
238
- all_best_targets_named = {target_name: all_best_targets}
239
-
240
- return all_best_features_named, all_best_targets_named
261
+ result_dict = {name: value for name, value in zip(feature_names, best_features_threshold)}
262
+ result_dict[target_name] = best_target
263
+
264
+ # Save each result incrementally
265
+ _save_result(result_dict, save_format, csv_path, db_manager, db_table_name)
241
266
 
242
267
 
243
268
  def run_pso(lower_boundaries: list[float],
244
269
  upper_boundaries: list[float],
245
270
  objective_function: ObjectiveFunction,
246
271
  save_results_dir: Union[str,Path],
272
+ save_format: Literal['csv', 'sqlite', 'both'] = 'csv',
247
273
  auto_binary_boundaries: bool=True,
248
274
  target_name: Union[str, None]=None,
249
275
  feature_names: Union[list[str], None]=None,
250
276
  swarm_size: int=200,
251
277
  max_iterations: int=3000,
252
278
  random_state: int=101,
253
- post_hoc_analysis: Optional[int]=10) -> Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]:
279
+ post_hoc_analysis: Optional[int]=10) -> Optional[Tuple[Dict[str, float], Dict[str, float]]]:
254
280
  """
255
281
  Executes Particle Swarm Optimization (PSO) to optimize a given objective function and saves the results as a CSV file.
256
282
 
@@ -264,6 +290,11 @@ def run_pso(lower_boundaries: list[float],
264
290
  A callable object encapsulating a tree-based regression model.
265
291
  save_results_dir : str | Path
266
292
  Directory path to save the results CSV file.
293
+ save_format : {'csv', 'sqlite', 'both'}, default 'csv'
294
+ The format for saving optimization results.
295
+ - 'csv': Saves results to a CSV file.
296
+ - 'sqlite': Saves results to an SQLite database file. ⚠️ If a database exists, new tables will be created using the target name.
297
+ - 'both': Saves results to both formats.
267
298
  auto_binary_boundaries : bool
268
299
  Use `ObjectiveFunction.binary_features` to append as many binary boundaries as needed to `lower_boundaries` and `upper_boundaries` automatically.
269
300
  target_name : str or None, optional
@@ -279,14 +310,11 @@ def run_pso(lower_boundaries: list[float],
279
310
 
280
311
  Returns
281
312
  -------
282
- Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]
283
- If `post_hoc_analysis` is None, returns two dictionaries:
284
- - feature_names: Feature values (after inverse scaling) that yield the best result.
285
- - target_name: Best result obtained for the target variable.
286
-
287
- If `post_hoc_analysis` is an integer, returns two dictionaries:
288
- - feature_names: Lists of best feature values (after inverse scaling) for each repetition.
289
- - target_name: List of best target values across repetitions.
313
+ Tuple[Dict[str, float], Dict[str, float]] or None
314
+ - If `post_hoc_analysis` is None, returns two dictionaries containing the
315
+ single best features and the corresponding target value.
316
+ - If `post_hoc_analysis` is active, results are streamed directly to a CSV file
317
+ and this function returns `None`.
290
318
 
291
319
  Notes
292
320
  -----
@@ -311,8 +339,9 @@ def run_pso(lower_boundaries: list[float],
311
339
  # Append binary boundaries
312
340
  binary_number = objective_function.binary_features
313
341
  if auto_binary_boundaries and binary_number > 0:
314
- local_lower_boundaries.extend([0] * binary_number)
315
- local_upper_boundaries.extend([1] * binary_number)
342
+ # simplify binary search by constraining range
343
+ local_lower_boundaries.extend([0.45] * binary_number)
344
+ local_upper_boundaries.extend([0.55] * binary_number)
316
345
 
317
346
  # Set the total length of features
318
347
  size_of_features = len(local_lower_boundaries)
@@ -328,7 +357,25 @@ def run_pso(lower_boundaries: list[float],
328
357
  if target_name is None and objective_function.target_name is not None:
329
358
  target_name = objective_function.target_name
330
359
  if target_name is None:
331
- target_name = "Target"
360
+ raise ValueError(f"'target' name was not provided and was not found in the .joblib object.")
361
+
362
+ # --- Setup: Saving Infrastructure ---
363
+ sanitized_target_name = sanitize_filename(target_name)
364
+ save_dir_path = make_fullpath(save_results_dir, make=True, enforce="directory")
365
+ base_filename = f"Optimization_{sanitized_target_name}"
366
+ csv_path = save_dir_path / f"{base_filename}.csv"
367
+ db_path = save_dir_path / "Optimization.db"
368
+ db_table_name = f"{sanitized_target_name}"
369
+
370
+ if save_format in ['sqlite', 'both']:
371
+ # Dynamically create the schema for the database table
372
+ schema = {name: "REAL" for name in names}
373
+ schema[target_name] = "REAL"
374
+ schema = {"result_id": "INTEGER PRIMARY KEY AUTOINCREMENT", **schema}
375
+
376
+ # Create table
377
+ with DatabaseManager(db_path) as db:
378
+ db.create_table(db_table_name, schema)
332
379
 
333
380
  pso_arguments = {
334
381
  "func":objective_function,
@@ -340,17 +387,29 @@ def run_pso(lower_boundaries: list[float],
340
387
  "particle_output": False,
341
388
  }
342
389
 
343
- # Dispatcher
344
- if post_hoc_analysis is None or post_hoc_analysis <= 1:
345
- features, target = _run_single_pso(objective_function, pso_arguments, names, target_name, random_state)
346
- else:
347
- features, target = _run_post_hoc_pso(objective_function, pso_arguments, names, target_name, post_hoc_analysis)
348
-
349
- # --- Save Results ---
350
- save_results_path = make_fullpath(save_results_dir, make=True)
351
- _save_results(features, target, save_dir=save_results_path, target_name=target_name)
352
-
353
- return features, target # type: ignore
390
+ # --- Dispatcher ---
391
+ # Use a real or dummy context manager to handle the DB connection cleanly
392
+ db_context = DatabaseManager(db_path) if save_format in ['sqlite', 'both'] else nullcontext()
393
+
394
+ with db_context as db_manager:
395
+ if post_hoc_analysis is None or post_hoc_analysis <= 1:
396
+ # --- Single Run Logic ---
397
+ features_dict, target_dict = _run_single_pso(
398
+ objective_function, pso_arguments, names, target_name, random_state,
399
+ save_format, csv_path, db_manager, db_table_name
400
+ )
401
+ _LOGGER.info(f"✅ Single optimization complete.")
402
+ return features_dict, target_dict
403
+
404
+ else:
405
+ # --- Post-Hoc Analysis Logic ---
406
+ _LOGGER.info(f"🏁 Starting post-hoc analysis with {post_hoc_analysis} repetitions...")
407
+ _run_post_hoc_pso(
408
+ objective_function, pso_arguments, names, target_name, post_hoc_analysis,
409
+ save_format, csv_path, db_manager, db_table_name
410
+ )
411
+ _LOGGER.info("✅ Post-hoc analysis complete. Results saved.")
412
+ return None
354
413
 
355
414
 
356
415
  def _pso(func: ObjectiveFunction,
@@ -0,0 +1,272 @@
1
+ import sqlite3
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ from typing import Union, Dict, Any, Optional, List, Literal
5
+ from ._logger import _LOGGER
6
+ from ._script_info import _script_info
7
+ from .path_manager import make_fullpath
8
+
9
+
10
+ __all__ = [
11
+ "DatabaseManager",
12
+ ]
13
+
14
+
15
+ class DatabaseManager:
16
+ """
17
+ A user-friendly context manager for handling SQLite database operations.
18
+
19
+ This class abstracts the underlying sqlite3 connection and cursor management,
20
+ providing simple methods to execute queries, create tables, and handle data
21
+ insertion and retrieval using pandas DataFrames.
22
+
23
+ Parameters
24
+ ----------
25
+ db_path : Union[str, Path]
26
+ The file path to the SQLite database. If the file does not exist,
27
+ it will be created upon connection.
28
+
29
+ Example
30
+ -------
31
+ >>> schema = {
32
+ ... "id": "INTEGER PRIMARY KEY AUTOINCREMENT",
33
+ ... "run_name": "TEXT NOT NULL",
34
+ ... "feature_a": "REAL",
35
+ ... "score": "REAL"
36
+ ... }
37
+ >>> with DatabaseManager("my_results.db") as db:
38
+ ... db.create_table("experiments", schema)
39
+ ... data = {"run_name": "first_run", "feature_a": 0.123, "score": 95.5}
40
+ ... db.insert_row("experiments", data)
41
+ ... df = db.query_to_dataframe("SELECT * FROM experiments")
42
+ ... print(df)
43
+ """
44
+ def __init__(self, db_path: Union[str, Path]):
45
+ """Initializes the DatabaseManager with the path to the database file."""
46
+ if isinstance(db_path, str):
47
+ if not db_path.endswith(".db"):
48
+ db_path = db_path + ".db"
49
+ elif isinstance(db_path, Path):
50
+ if db_path.suffix != ".db":
51
+ db_path = db_path.with_suffix(".db")
52
+
53
+ self.db_path = make_fullpath(db_path, make=True, enforce="file")
54
+ self.conn: Optional[sqlite3.Connection] = None
55
+ self.cursor: Optional[sqlite3.Cursor] = None
56
+
57
+ def __enter__(self):
58
+ """Establishes the database connection and returns the manager instance."""
59
+ try:
60
+ self.conn = sqlite3.connect(self.db_path)
61
+ self.cursor = self.conn.cursor()
62
+ _LOGGER.info(f"✅ Successfully connected to database: {self.db_path}")
63
+ return self
64
+ except sqlite3.Error as e:
65
+ _LOGGER.error(f"❌ Database connection failed: {e}")
66
+ raise # Re-raise the exception after logging
67
+
68
+ def __exit__(self, exc_type, exc_val, exc_tb):
69
+ """Commits changes and closes the database connection."""
70
+ if self.conn:
71
+ if exc_type: # If an exception occurred, rollback
72
+ self.conn.rollback()
73
+ _LOGGER.warning("⚠️ Rolling back transaction due to an error.")
74
+ else: # Otherwise, commit the transaction
75
+ self.conn.commit()
76
+ self.conn.close()
77
+ _LOGGER.info(f"❇️ Database connection closed: {self.db_path.name}")
78
+
79
+ def create_table(self, table_name: str, schema: Dict[str, str], if_not_exists: bool = True):
80
+ """
81
+ Creates a new table in the database based on a provided schema.
82
+
83
+ Parameters
84
+ ----------
85
+ table_name : str
86
+ The name of the table to create.
87
+ schema : Dict[str, str]
88
+ A dictionary where keys are column names and values are their SQL data types
89
+ (e.g., {"id": "INTEGER PRIMARY KEY", "name": "TEXT NOT NULL"}).
90
+ if_not_exists : bool, default=True
91
+ If True, adds "IF NOT EXISTS" to the SQL statement to prevent errors
92
+ if the table already exists.
93
+ """
94
+ if not self.cursor:
95
+ raise sqlite3.Error("Database connection is not open.")
96
+
97
+ columns_def = ", ".join([f'"{col_name}" {col_type}' for col_name, col_type in schema.items()])
98
+ exists_clause = "IF NOT EXISTS" if if_not_exists else ""
99
+
100
+ query = f"CREATE TABLE {exists_clause} {table_name} ({columns_def})"
101
+
102
+ _LOGGER.info(f"🗂️ Executing: {query}")
103
+ self.cursor.execute(query)
104
+
105
+ def insert_row(self, table_name: str, data: Dict[str, Any]):
106
+ """
107
+ Inserts a single row of data into the specified table.
108
+
109
+ Parameters
110
+ ----------
111
+ table_name : str
112
+ The name of the target table.
113
+ data : Dict[str, Any]
114
+ A dictionary where keys correspond to column names and values are the
115
+ data to be inserted.
116
+ """
117
+ if not self.cursor:
118
+ raise sqlite3.Error("Database connection is not open.")
119
+
120
+ columns = ', '.join(f'"{k}"' for k in data.keys())
121
+ placeholders = ', '.join(['?'] * len(data))
122
+ values = list(data.values())
123
+
124
+ query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
125
+
126
+ self.cursor.execute(query, values)
127
+
128
+ def query_to_dataframe(self, query: str, params: Optional[tuple] = None) -> pd.DataFrame:
129
+ """
130
+ Executes a SELECT query and returns the results as a pandas DataFrame.
131
+
132
+ Parameters
133
+ ----------
134
+ query : str
135
+ The SQL SELECT statement to execute.
136
+ params : Optional[tuple], default=None
137
+ An optional tuple of parameters to pass to the query for safety
138
+ against SQL injection.
139
+
140
+ Returns
141
+ -------
142
+ pd.DataFrame
143
+ A DataFrame containing the query results.
144
+ """
145
+ if not self.conn:
146
+ raise sqlite3.Error("Database connection is not open.")
147
+
148
+ return pd.read_sql_query(query, self.conn, params=params)
149
+
150
+ def execute_sql(self, query: str, params: Optional[tuple] = None):
151
+ """
152
+ Executes an arbitrary SQL command that does not return data (e.g., UPDATE, DELETE).
153
+
154
+ Parameters
155
+ ----------
156
+ query : str
157
+ The SQL statement to execute.
158
+ params : Optional[tuple], default=None
159
+ An optional tuple of parameters for the query.
160
+ """
161
+ if not self.cursor:
162
+ raise sqlite3.Error("Database connection is not open.")
163
+
164
+ self.cursor.execute(query, params if params else ())
165
+
166
+ def insert_many(self, table_name: str, data: List[Dict[str, Any]]):
167
+ """
168
+ Inserts multiple rows into the specified table in a single, efficient transaction.
169
+
170
+ Parameters
171
+ ----------
172
+ table_name : str
173
+ The name of the target table.
174
+ data : List[Dict[str, Any]]
175
+ A list of dictionaries, where each dictionary represents a row to be inserted.
176
+ All dictionaries should have the same keys.
177
+ """
178
+ if not self.cursor:
179
+ raise sqlite3.Error("Database connection is not open.")
180
+ if not data:
181
+ _LOGGER.warning("⚠️ insert_many called with empty data list. No action taken.")
182
+ return
183
+
184
+ # Assume all dicts have the same keys as the first one
185
+ first_row = data[0]
186
+ columns = ', '.join(f'"{k}"' for k in first_row.keys())
187
+ placeholders = ', '.join(['?'] * len(first_row))
188
+
189
+ # Create a list of tuples, where each tuple is a row of values
190
+ values_to_insert = [list(row.values()) for row in data]
191
+
192
+ query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
193
+
194
+ self.cursor.executemany(query, values_to_insert)
195
+ _LOGGER.info(f"✅ Bulk inserted {len(values_to_insert)} rows into '{table_name}'.")
196
+
197
+ def insert_from_dataframe(self, table_name: str, df: pd.DataFrame, if_exists: Literal['fail', 'replace', 'append'] = 'append'):
198
+ """
199
+ Writes records from a pandas DataFrame to the specified SQL table.
200
+
201
+ Parameters
202
+ ----------
203
+ table_name : str
204
+ The name of the target SQL table.
205
+ df : pd.DataFrame
206
+ The DataFrame to be written.
207
+ if_exists : str, default 'append'
208
+ How to behave if the table already exists.
209
+ - 'fail': Raise a ValueError.
210
+ - 'replace': Drop the table before inserting new values.
211
+ - 'append': Insert new values to the existing table.
212
+ """
213
+ if not self.conn:
214
+ raise sqlite3.Error("Database connection is not open.")
215
+
216
+ df.to_sql(
217
+ table_name,
218
+ self.conn,
219
+ if_exists=if_exists,
220
+ index=False # Typically, we don't want to save the DataFrame index
221
+ )
222
+ _LOGGER.info(f"✅ Wrote {len(df)} rows from DataFrame to table '{table_name}' using mode '{if_exists}'.")
223
+
224
+ def list_tables(self) -> List[str]:
225
+ """Returns a list of all table names in the database."""
226
+ if not self.cursor:
227
+ raise sqlite3.Error("Database connection is not open.")
228
+
229
+ self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
230
+ # The result of the fetch is a list of tuples, e.g., [('table1',), ('table2',)]
231
+ return [table[0] for table in self.cursor.fetchall()]
232
+
233
+ def get_table_schema(self, table_name: str) -> pd.DataFrame:
234
+ """
235
+ Retrieves the schema of a specific table and returns it as a DataFrame.
236
+
237
+ Returns a DataFrame with columns: cid, name, type, notnull, dflt_value, pk
238
+ """
239
+ if not self.conn:
240
+ raise sqlite3.Error("Database connection is not open.")
241
+
242
+ # PRAGMA is a special SQL command in SQLite for database metadata
243
+ return pd.read_sql_query(f'PRAGMA table_info("{table_name}");', self.conn)
244
+
245
+ def create_index(self, table_name: str, column_name: str, unique: bool = False):
246
+ """
247
+ Creates an index on a column of a specified table to speed up queries.
248
+
249
+ Parameters
250
+ ----------
251
+ table_name : str
252
+ The name of the table containing the column.
253
+ column_name : str
254
+ The name of the column to be indexed.
255
+ unique : bool, default=False
256
+ If True, creates a unique index, which ensures all values in the
257
+ column are unique.
258
+ """
259
+ if not self.cursor:
260
+ raise sqlite3.Error("Database connection is not open.")
261
+
262
+ index_name = f"idx_{table_name}_{column_name}"
263
+ unique_clause = "UNIQUE" if unique else ""
264
+
265
+ query = f"CREATE {unique_clause} INDEX IF NOT EXISTS {index_name} ON {table_name} ({column_name})"
266
+
267
+ _LOGGER.info(f"🗂️ Executing: {query}")
268
+ self.cursor.execute(query)
269
+
270
+
271
+ def info():
272
+ _script_info(__all__)
@@ -49,18 +49,26 @@ class PathManager:
49
49
  for dir_name in base_directories:
50
50
  # This logic works for both dev mode and bundled mode
51
51
  self._paths[dir_name] = package_root / dir_name
52
-
52
+
53
53
  def _get_bundle_root(self) -> tuple[bool, Optional[str]]:
54
54
  """
55
- Checks if the app is running in a PyInstaller bundle and returns the root path.
55
+ Checks if the app is running in a PyInstaller or Nuitka bundle and returns the root path.
56
56
 
57
57
  Returns:
58
- A tuple (is_bundled, bundle_root_path). `bundle_root_path` is the
59
- path to the temporary directory `_MEIPASS` if bundled, otherwise None.
58
+ A tuple (is_bundled, bundle_root_path).
60
59
  """
60
+ # --- PyInstaller Check ---
61
61
  if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
62
- # This is the standard way to check for a PyInstaller bundle
62
+ # The bundle root for PyInstaller is the temporary _MEIPASS directory
63
63
  return True, sys._MEIPASS # type: ignore
64
+
65
+ # --- Nuitka Check ---
66
+ elif '__nuitka_binary_dir' in sys.__dict__:
67
+ # For Nuitka, the root is the directory of the binary.
68
+ # Unlike PyInstaller's _MEIPASS, this is the final install location.
69
+ return True, sys.__dict__['__nuitka_binary_dir']
70
+
71
+ # --- Not Bundled ---
64
72
  else:
65
73
  return False, None
66
74
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "4.0.0"
3
+ version = "4.2.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }
@@ -19,6 +19,14 @@ Homepage = "https://github.com/DrAg0n-BoRn/ML_tools"
19
19
  Changelog = "https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md"
20
20
 
21
21
  [project.optional-dependencies]
22
+ # Base all purpose tools
23
+ base = [
24
+ "pandas",
25
+ "numpy",
26
+ "polars",
27
+ "joblib"
28
+ ]
29
+
22
30
  # Machine Learning main toolbox. Additionally Requires PyTorch with CUDA / MPS support if pytorch models are used
23
31
  ML = [
24
32
  "numpy",
@@ -77,16 +85,14 @@ gui-boost = [
77
85
  "numpy",
78
86
  "joblib",
79
87
  "FreeSimpleGUI>=5.2",
80
- "pyinstaller",
81
88
  "xgboost",
82
- "lightgbm"
89
+ "lightgbm",
83
90
  ]
84
91
 
85
92
  # GUI for Pytorch - Additionally Requires PyTorch with CUDA / MPS support
86
93
  gui-torch = [
87
94
  "numpy",
88
95
  "FreeSimpleGUI>=5.2",
89
- "pyinstaller",
90
96
  ]
91
97
 
92
98
  # For GUIs using plotting features
@@ -95,6 +101,17 @@ plot = [
95
101
  "seaborn"
96
102
  ]
97
103
 
104
+ # APP Bundlers - Choose one
105
+ pyinstaller = [
106
+ "pyinstaller"
107
+ ]
108
+
109
+ nuitka = [
110
+ "nuitka",
111
+ "zstandard",
112
+ "ordered-set"
113
+ ]
114
+
98
115
  [build-system]
99
116
  requires = ["setuptools>=61.0"]
100
117
  build-backend = "setuptools.build_meta"