dragon-ml-toolbox 4.0.0__tar.gz → 4.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-4.0.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-4.2.0}/PKG-INFO +47 -8
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/README.md +35 -5
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0/dragon_ml_toolbox.egg-info}/PKG-INFO +47 -8
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +1 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/dragon_ml_toolbox.egg-info/requires.txt +14 -2
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/PSO_optimization.py +110 -51
- dragon_ml_toolbox-4.2.0/ml_tools/SQL.py +272 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/path_manager.py +13 -5
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/pyproject.toml +21 -4
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/LICENSE +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ETL_engineering.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ML_callbacks.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ML_evaluation.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ML_inference.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ML_trainer.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/_logger.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/_pytorch_models.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/_script_info.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/custom_logger.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/data_exploration.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/datasetmaster.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ensemble_inference.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/keys.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/ml_tools/utilities.py +0 -0
- {dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dragon-ml-toolbox
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.2.0
|
|
4
4
|
Summary: A collection of tools for data science and machine learning projects.
|
|
5
5
|
Author-email: Karl Loza <luigiloza@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -12,6 +12,11 @@ Requires-Python: >=3.10
|
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
License-File: LICENSE-THIRD-PARTY.md
|
|
15
|
+
Provides-Extra: base
|
|
16
|
+
Requires-Dist: pandas; extra == "base"
|
|
17
|
+
Requires-Dist: numpy; extra == "base"
|
|
18
|
+
Requires-Dist: polars; extra == "base"
|
|
19
|
+
Requires-Dist: joblib; extra == "base"
|
|
15
20
|
Provides-Extra: ml
|
|
16
21
|
Requires-Dist: numpy; extra == "ml"
|
|
17
22
|
Requires-Dist: pandas; extra == "ml"
|
|
@@ -57,16 +62,20 @@ Provides-Extra: gui-boost
|
|
|
57
62
|
Requires-Dist: numpy; extra == "gui-boost"
|
|
58
63
|
Requires-Dist: joblib; extra == "gui-boost"
|
|
59
64
|
Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-boost"
|
|
60
|
-
Requires-Dist: pyinstaller; extra == "gui-boost"
|
|
61
65
|
Requires-Dist: xgboost; extra == "gui-boost"
|
|
62
66
|
Requires-Dist: lightgbm; extra == "gui-boost"
|
|
63
67
|
Provides-Extra: gui-torch
|
|
64
68
|
Requires-Dist: numpy; extra == "gui-torch"
|
|
65
69
|
Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-torch"
|
|
66
|
-
Requires-Dist: pyinstaller; extra == "gui-torch"
|
|
67
70
|
Provides-Extra: plot
|
|
68
71
|
Requires-Dist: matplotlib; extra == "plot"
|
|
69
72
|
Requires-Dist: seaborn; extra == "plot"
|
|
73
|
+
Provides-Extra: pyinstaller
|
|
74
|
+
Requires-Dist: pyinstaller; extra == "pyinstaller"
|
|
75
|
+
Provides-Extra: nuitka
|
|
76
|
+
Requires-Dist: nuitka; extra == "nuitka"
|
|
77
|
+
Requires-Dist: zstandard; extra == "nuitka"
|
|
78
|
+
Requires-Dist: ordered-set; extra == "nuitka"
|
|
70
79
|
Dynamic: license-file
|
|
71
80
|
|
|
72
81
|
# dragon-ml-toolbox
|
|
@@ -107,7 +116,6 @@ Install from the conda-forge channel:
|
|
|
107
116
|
```bash
|
|
108
117
|
conda install -c conda-forge dragon-ml-toolbox
|
|
109
118
|
```
|
|
110
|
-
**Note:** This version is outdated or broken due to dependency incompatibilities. Use PyPi instead.
|
|
111
119
|
|
|
112
120
|
## Modular Installation
|
|
113
121
|
|
|
@@ -142,6 +150,7 @@ ML_trainer
|
|
|
142
150
|
ML_inference
|
|
143
151
|
path_manager
|
|
144
152
|
PSO_optimization
|
|
153
|
+
SQL
|
|
145
154
|
RNN_forecast
|
|
146
155
|
utilities
|
|
147
156
|
```
|
|
@@ -156,7 +165,7 @@ pip install "dragon-ml-toolbox[mice]"
|
|
|
156
165
|
|
|
157
166
|
#### Modules:
|
|
158
167
|
|
|
159
|
-
```
|
|
168
|
+
```Bash
|
|
160
169
|
custom_logger
|
|
161
170
|
MICE_imputation
|
|
162
171
|
VIF_factor
|
|
@@ -174,7 +183,7 @@ pip install "dragon-ml-toolbox[excel]"
|
|
|
174
183
|
|
|
175
184
|
#### Modules:
|
|
176
185
|
|
|
177
|
-
```
|
|
186
|
+
```Bash
|
|
178
187
|
custom_logger
|
|
179
188
|
handle_excel
|
|
180
189
|
path_manager
|
|
@@ -194,7 +203,7 @@ pip install "dragon-ml-toolbox[gui-boost,plot]"
|
|
|
194
203
|
|
|
195
204
|
#### Modules:
|
|
196
205
|
|
|
197
|
-
```
|
|
206
|
+
```Bash
|
|
198
207
|
GUI_tools
|
|
199
208
|
ensemble_inference
|
|
200
209
|
path_manager
|
|
@@ -214,12 +223,42 @@ pip install "dragon-ml-toolbox[gui-torch,plot]"
|
|
|
214
223
|
|
|
215
224
|
#### Modules:
|
|
216
225
|
|
|
217
|
-
```
|
|
226
|
+
```Bash
|
|
218
227
|
GUI_tools
|
|
219
228
|
ML_inference
|
|
220
229
|
path_manager
|
|
221
230
|
```
|
|
222
231
|
|
|
232
|
+
### 🎫 Base Tools [base]
|
|
233
|
+
|
|
234
|
+
General purpose functions and classes.
|
|
235
|
+
|
|
236
|
+
```Bash
|
|
237
|
+
pip install "dragon-ml-toolbox[base]"
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
#### Modules:
|
|
241
|
+
|
|
242
|
+
```Bash
|
|
243
|
+
ETL_Engineering
|
|
244
|
+
custom_logger
|
|
245
|
+
SQL
|
|
246
|
+
utilities
|
|
247
|
+
path_manager
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### ⚒️ APP bundlers
|
|
251
|
+
|
|
252
|
+
Choose one if needed.
|
|
253
|
+
|
|
254
|
+
```Bash
|
|
255
|
+
pip install "dragon-ml-toolbox[pyinstaller]"
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
```Bash
|
|
259
|
+
pip install "dragon-ml-toolbox[nuitka]"
|
|
260
|
+
```
|
|
261
|
+
|
|
223
262
|
## Usage
|
|
224
263
|
|
|
225
264
|
After installation, import modules like this:
|
|
@@ -36,7 +36,6 @@ Install from the conda-forge channel:
|
|
|
36
36
|
```bash
|
|
37
37
|
conda install -c conda-forge dragon-ml-toolbox
|
|
38
38
|
```
|
|
39
|
-
**Note:** This version is outdated or broken due to dependency incompatibilities. Use PyPi instead.
|
|
40
39
|
|
|
41
40
|
## Modular Installation
|
|
42
41
|
|
|
@@ -71,6 +70,7 @@ ML_trainer
|
|
|
71
70
|
ML_inference
|
|
72
71
|
path_manager
|
|
73
72
|
PSO_optimization
|
|
73
|
+
SQL
|
|
74
74
|
RNN_forecast
|
|
75
75
|
utilities
|
|
76
76
|
```
|
|
@@ -85,7 +85,7 @@ pip install "dragon-ml-toolbox[mice]"
|
|
|
85
85
|
|
|
86
86
|
#### Modules:
|
|
87
87
|
|
|
88
|
-
```
|
|
88
|
+
```Bash
|
|
89
89
|
custom_logger
|
|
90
90
|
MICE_imputation
|
|
91
91
|
VIF_factor
|
|
@@ -103,7 +103,7 @@ pip install "dragon-ml-toolbox[excel]"
|
|
|
103
103
|
|
|
104
104
|
#### Modules:
|
|
105
105
|
|
|
106
|
-
```
|
|
106
|
+
```Bash
|
|
107
107
|
custom_logger
|
|
108
108
|
handle_excel
|
|
109
109
|
path_manager
|
|
@@ -123,7 +123,7 @@ pip install "dragon-ml-toolbox[gui-boost,plot]"
|
|
|
123
123
|
|
|
124
124
|
#### Modules:
|
|
125
125
|
|
|
126
|
-
```
|
|
126
|
+
```Bash
|
|
127
127
|
GUI_tools
|
|
128
128
|
ensemble_inference
|
|
129
129
|
path_manager
|
|
@@ -143,12 +143,42 @@ pip install "dragon-ml-toolbox[gui-torch,plot]"
|
|
|
143
143
|
|
|
144
144
|
#### Modules:
|
|
145
145
|
|
|
146
|
-
```
|
|
146
|
+
```Bash
|
|
147
147
|
GUI_tools
|
|
148
148
|
ML_inference
|
|
149
149
|
path_manager
|
|
150
150
|
```
|
|
151
151
|
|
|
152
|
+
### 🎫 Base Tools [base]
|
|
153
|
+
|
|
154
|
+
General purpose functions and classes.
|
|
155
|
+
|
|
156
|
+
```Bash
|
|
157
|
+
pip install "dragon-ml-toolbox[base]"
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
#### Modules:
|
|
161
|
+
|
|
162
|
+
```Bash
|
|
163
|
+
ETL_Engineering
|
|
164
|
+
custom_logger
|
|
165
|
+
SQL
|
|
166
|
+
utilities
|
|
167
|
+
path_manager
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### ⚒️ APP bundlers
|
|
171
|
+
|
|
172
|
+
Choose one if needed.
|
|
173
|
+
|
|
174
|
+
```Bash
|
|
175
|
+
pip install "dragon-ml-toolbox[pyinstaller]"
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
```Bash
|
|
179
|
+
pip install "dragon-ml-toolbox[nuitka]"
|
|
180
|
+
```
|
|
181
|
+
|
|
152
182
|
## Usage
|
|
153
183
|
|
|
154
184
|
After installation, import modules like this:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dragon-ml-toolbox
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.2.0
|
|
4
4
|
Summary: A collection of tools for data science and machine learning projects.
|
|
5
5
|
Author-email: Karl Loza <luigiloza@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -12,6 +12,11 @@ Requires-Python: >=3.10
|
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
License-File: LICENSE-THIRD-PARTY.md
|
|
15
|
+
Provides-Extra: base
|
|
16
|
+
Requires-Dist: pandas; extra == "base"
|
|
17
|
+
Requires-Dist: numpy; extra == "base"
|
|
18
|
+
Requires-Dist: polars; extra == "base"
|
|
19
|
+
Requires-Dist: joblib; extra == "base"
|
|
15
20
|
Provides-Extra: ml
|
|
16
21
|
Requires-Dist: numpy; extra == "ml"
|
|
17
22
|
Requires-Dist: pandas; extra == "ml"
|
|
@@ -57,16 +62,20 @@ Provides-Extra: gui-boost
|
|
|
57
62
|
Requires-Dist: numpy; extra == "gui-boost"
|
|
58
63
|
Requires-Dist: joblib; extra == "gui-boost"
|
|
59
64
|
Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-boost"
|
|
60
|
-
Requires-Dist: pyinstaller; extra == "gui-boost"
|
|
61
65
|
Requires-Dist: xgboost; extra == "gui-boost"
|
|
62
66
|
Requires-Dist: lightgbm; extra == "gui-boost"
|
|
63
67
|
Provides-Extra: gui-torch
|
|
64
68
|
Requires-Dist: numpy; extra == "gui-torch"
|
|
65
69
|
Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-torch"
|
|
66
|
-
Requires-Dist: pyinstaller; extra == "gui-torch"
|
|
67
70
|
Provides-Extra: plot
|
|
68
71
|
Requires-Dist: matplotlib; extra == "plot"
|
|
69
72
|
Requires-Dist: seaborn; extra == "plot"
|
|
73
|
+
Provides-Extra: pyinstaller
|
|
74
|
+
Requires-Dist: pyinstaller; extra == "pyinstaller"
|
|
75
|
+
Provides-Extra: nuitka
|
|
76
|
+
Requires-Dist: nuitka; extra == "nuitka"
|
|
77
|
+
Requires-Dist: zstandard; extra == "nuitka"
|
|
78
|
+
Requires-Dist: ordered-set; extra == "nuitka"
|
|
70
79
|
Dynamic: license-file
|
|
71
80
|
|
|
72
81
|
# dragon-ml-toolbox
|
|
@@ -107,7 +116,6 @@ Install from the conda-forge channel:
|
|
|
107
116
|
```bash
|
|
108
117
|
conda install -c conda-forge dragon-ml-toolbox
|
|
109
118
|
```
|
|
110
|
-
**Note:** This version is outdated or broken due to dependency incompatibilities. Use PyPi instead.
|
|
111
119
|
|
|
112
120
|
## Modular Installation
|
|
113
121
|
|
|
@@ -142,6 +150,7 @@ ML_trainer
|
|
|
142
150
|
ML_inference
|
|
143
151
|
path_manager
|
|
144
152
|
PSO_optimization
|
|
153
|
+
SQL
|
|
145
154
|
RNN_forecast
|
|
146
155
|
utilities
|
|
147
156
|
```
|
|
@@ -156,7 +165,7 @@ pip install "dragon-ml-toolbox[mice]"
|
|
|
156
165
|
|
|
157
166
|
#### Modules:
|
|
158
167
|
|
|
159
|
-
```
|
|
168
|
+
```Bash
|
|
160
169
|
custom_logger
|
|
161
170
|
MICE_imputation
|
|
162
171
|
VIF_factor
|
|
@@ -174,7 +183,7 @@ pip install "dragon-ml-toolbox[excel]"
|
|
|
174
183
|
|
|
175
184
|
#### Modules:
|
|
176
185
|
|
|
177
|
-
```
|
|
186
|
+
```Bash
|
|
178
187
|
custom_logger
|
|
179
188
|
handle_excel
|
|
180
189
|
path_manager
|
|
@@ -194,7 +203,7 @@ pip install "dragon-ml-toolbox[gui-boost,plot]"
|
|
|
194
203
|
|
|
195
204
|
#### Modules:
|
|
196
205
|
|
|
197
|
-
```
|
|
206
|
+
```Bash
|
|
198
207
|
GUI_tools
|
|
199
208
|
ensemble_inference
|
|
200
209
|
path_manager
|
|
@@ -214,12 +223,42 @@ pip install "dragon-ml-toolbox[gui-torch,plot]"
|
|
|
214
223
|
|
|
215
224
|
#### Modules:
|
|
216
225
|
|
|
217
|
-
```
|
|
226
|
+
```Bash
|
|
218
227
|
GUI_tools
|
|
219
228
|
ML_inference
|
|
220
229
|
path_manager
|
|
221
230
|
```
|
|
222
231
|
|
|
232
|
+
### 🎫 Base Tools [base]
|
|
233
|
+
|
|
234
|
+
General purpose functions and classes.
|
|
235
|
+
|
|
236
|
+
```Bash
|
|
237
|
+
pip install "dragon-ml-toolbox[base]"
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
#### Modules:
|
|
241
|
+
|
|
242
|
+
```Bash
|
|
243
|
+
ETL_Engineering
|
|
244
|
+
custom_logger
|
|
245
|
+
SQL
|
|
246
|
+
utilities
|
|
247
|
+
path_manager
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### ⚒️ APP bundlers
|
|
251
|
+
|
|
252
|
+
Choose one if needed.
|
|
253
|
+
|
|
254
|
+
```Bash
|
|
255
|
+
pip install "dragon-ml-toolbox[pyinstaller]"
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
```Bash
|
|
259
|
+
pip install "dragon-ml-toolbox[nuitka]"
|
|
260
|
+
```
|
|
261
|
+
|
|
223
262
|
## Usage
|
|
224
263
|
|
|
225
264
|
After installation, import modules like this:
|
|
@@ -19,6 +19,12 @@ shap
|
|
|
19
19
|
tqdm
|
|
20
20
|
Pillow
|
|
21
21
|
|
|
22
|
+
[base]
|
|
23
|
+
pandas
|
|
24
|
+
numpy
|
|
25
|
+
polars
|
|
26
|
+
joblib
|
|
27
|
+
|
|
22
28
|
[excel]
|
|
23
29
|
pandas
|
|
24
30
|
openpyxl
|
|
@@ -32,14 +38,12 @@ ipywidgets
|
|
|
32
38
|
numpy
|
|
33
39
|
joblib
|
|
34
40
|
FreeSimpleGUI>=5.2
|
|
35
|
-
pyinstaller
|
|
36
41
|
xgboost
|
|
37
42
|
lightgbm
|
|
38
43
|
|
|
39
44
|
[gui-torch]
|
|
40
45
|
numpy
|
|
41
46
|
FreeSimpleGUI>=5.2
|
|
42
|
-
pyinstaller
|
|
43
47
|
|
|
44
48
|
[mice]
|
|
45
49
|
numpy<2.0
|
|
@@ -53,10 +57,18 @@ statsmodels
|
|
|
53
57
|
lightgbm<=4.5.0
|
|
54
58
|
shap
|
|
55
59
|
|
|
60
|
+
[nuitka]
|
|
61
|
+
nuitka
|
|
62
|
+
zstandard
|
|
63
|
+
ordered-set
|
|
64
|
+
|
|
56
65
|
[plot]
|
|
57
66
|
matplotlib
|
|
58
67
|
seaborn
|
|
59
68
|
|
|
69
|
+
[pyinstaller]
|
|
70
|
+
pyinstaller
|
|
71
|
+
|
|
60
72
|
[pytorch]
|
|
61
73
|
torch
|
|
62
74
|
torchvision
|
|
@@ -9,7 +9,6 @@ from .utilities import (
|
|
|
9
9
|
threshold_binary_values,
|
|
10
10
|
threshold_binary_values_batch,
|
|
11
11
|
deserialize_object,
|
|
12
|
-
save_dataframe,
|
|
13
12
|
yield_dataframes_from_dir)
|
|
14
13
|
from .path_manager import sanitize_filename, make_fullpath, list_files_by_extension, list_csv_paths
|
|
15
14
|
import torch
|
|
@@ -19,6 +18,8 @@ import seaborn as sns
|
|
|
19
18
|
from ._logger import _LOGGER
|
|
20
19
|
from .keys import ModelSaveKeys
|
|
21
20
|
from ._script_info import _script_info
|
|
21
|
+
from .SQL import DatabaseManager
|
|
22
|
+
from contextlib import nullcontext
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
__all__ = [
|
|
@@ -182,45 +183,73 @@ def _set_feature_names(size: int, names: Union[list[str], None]):
|
|
|
182
183
|
else:
|
|
183
184
|
assert len(names) == size, "List with feature names do not match the number of features"
|
|
184
185
|
return names
|
|
185
|
-
|
|
186
186
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
187
|
+
|
|
188
|
+
def _save_result(result_dict: dict,
|
|
189
|
+
save_format: Literal['csv', 'sqlite', 'both'],
|
|
190
|
+
csv_path: Path,
|
|
191
|
+
db_manager: Optional[DatabaseManager] = None,
|
|
192
|
+
db_table_name: Optional[str] = None):
|
|
193
|
+
"""
|
|
194
|
+
Handles saving a single result to CSV, SQLite, or both.
|
|
195
|
+
"""
|
|
196
|
+
# Save to CSV
|
|
197
|
+
if save_format in ['csv', 'both']:
|
|
198
|
+
_save_or_append_to_csv(result_dict, csv_path)
|
|
199
|
+
|
|
200
|
+
# Save to SQLite
|
|
201
|
+
if save_format in ['sqlite', 'both']:
|
|
202
|
+
if db_manager and db_table_name:
|
|
203
|
+
db_manager.insert_row(db_table_name, result_dict)
|
|
204
|
+
else:
|
|
205
|
+
_LOGGER.warning("SQLite saving requested but db_manager or table_name not provided.")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _save_or_append_to_csv(data_dict: dict, save_path: Path):
|
|
209
|
+
"""
|
|
210
|
+
Saves or appends a dictionary of data as a single row to a CSV file.
|
|
211
|
+
|
|
212
|
+
If the file doesn't exist, it creates it and writes the header.
|
|
213
|
+
If the file exists, it appends the new data without the header.
|
|
214
|
+
"""
|
|
215
|
+
df_row = pd.DataFrame([data_dict])
|
|
191
216
|
|
|
192
|
-
|
|
217
|
+
file_exists = save_path.exists()
|
|
193
218
|
|
|
194
|
-
|
|
219
|
+
df_row.to_csv(
|
|
220
|
+
save_path,
|
|
221
|
+
mode='a', # 'a' for append mode
|
|
222
|
+
index=False, # Don't write the DataFrame index
|
|
223
|
+
header=not file_exists # Write header only if file does NOT exist
|
|
224
|
+
)
|
|
195
225
|
|
|
196
226
|
|
|
197
|
-
def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, random_state: int):
|
|
198
|
-
"""Helper for a single PSO run."""
|
|
227
|
+
def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, random_state: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DatabaseManager], db_table_name: str):
|
|
228
|
+
"""Helper for a single PSO run that also handles saving."""
|
|
199
229
|
pso_args.update({"seed": random_state})
|
|
200
230
|
|
|
201
231
|
best_features, best_target, *_ = _pso(**pso_args)
|
|
202
232
|
|
|
203
|
-
# Flip best_target if maximization was used
|
|
204
233
|
if objective_function.task == "maximization":
|
|
205
234
|
best_target = -best_target
|
|
206
235
|
|
|
207
|
-
# Threshold binary features
|
|
208
236
|
binary_number = objective_function.binary_features
|
|
209
237
|
best_features_threshold = threshold_binary_values(best_features, binary_number)
|
|
210
238
|
|
|
211
|
-
# Name features and target
|
|
212
239
|
best_features_named = {name: value for name, value in zip(feature_names, best_features_threshold)}
|
|
213
240
|
best_target_named = {target_name: best_target}
|
|
214
241
|
|
|
242
|
+
# Save the result using the new helper
|
|
243
|
+
combined_dict = {**best_features_named, **best_target_named}
|
|
244
|
+
_save_result(combined_dict, save_format, csv_path, db_manager, db_table_name)
|
|
245
|
+
|
|
215
246
|
return best_features_named, best_target_named
|
|
216
247
|
|
|
217
248
|
|
|
218
|
-
def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, repetitions: int):
|
|
219
|
-
"""Helper for post-hoc
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
for _ in range(repetitions):
|
|
249
|
+
def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, repetitions: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DatabaseManager], db_table_name: str):
|
|
250
|
+
"""Helper for post-hoc analysis that saves results incrementally."""
|
|
251
|
+
progress = trange(repetitions, desc="Post-Hoc PSO", unit="run")
|
|
252
|
+
for _ in progress:
|
|
224
253
|
best_features, best_target, *_ = _pso(**pso_args)
|
|
225
254
|
|
|
226
255
|
if objective_function.task == "maximization":
|
|
@@ -229,28 +258,25 @@ def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, fea
|
|
|
229
258
|
binary_number = objective_function.binary_features
|
|
230
259
|
best_features_threshold = threshold_binary_values(best_features, binary_number)
|
|
231
260
|
|
|
232
|
-
for
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
all_best_features_named = {name: lst for name, lst in zip(feature_names, all_best_features)}
|
|
238
|
-
all_best_targets_named = {target_name: all_best_targets}
|
|
239
|
-
|
|
240
|
-
return all_best_features_named, all_best_targets_named
|
|
261
|
+
result_dict = {name: value for name, value in zip(feature_names, best_features_threshold)}
|
|
262
|
+
result_dict[target_name] = best_target
|
|
263
|
+
|
|
264
|
+
# Save each result incrementally
|
|
265
|
+
_save_result(result_dict, save_format, csv_path, db_manager, db_table_name)
|
|
241
266
|
|
|
242
267
|
|
|
243
268
|
def run_pso(lower_boundaries: list[float],
|
|
244
269
|
upper_boundaries: list[float],
|
|
245
270
|
objective_function: ObjectiveFunction,
|
|
246
271
|
save_results_dir: Union[str,Path],
|
|
272
|
+
save_format: Literal['csv', 'sqlite', 'both'] = 'csv',
|
|
247
273
|
auto_binary_boundaries: bool=True,
|
|
248
274
|
target_name: Union[str, None]=None,
|
|
249
275
|
feature_names: Union[list[str], None]=None,
|
|
250
276
|
swarm_size: int=200,
|
|
251
277
|
max_iterations: int=3000,
|
|
252
278
|
random_state: int=101,
|
|
253
|
-
post_hoc_analysis: Optional[int]=10) -> Tuple[Dict[str, float
|
|
279
|
+
post_hoc_analysis: Optional[int]=10) -> Optional[Tuple[Dict[str, float], Dict[str, float]]]:
|
|
254
280
|
"""
|
|
255
281
|
Executes Particle Swarm Optimization (PSO) to optimize a given objective function and saves the results as a CSV file.
|
|
256
282
|
|
|
@@ -264,6 +290,11 @@ def run_pso(lower_boundaries: list[float],
|
|
|
264
290
|
A callable object encapsulating a tree-based regression model.
|
|
265
291
|
save_results_dir : str | Path
|
|
266
292
|
Directory path to save the results CSV file.
|
|
293
|
+
save_format : {'csv', 'sqlite', 'both'}, default 'csv'
|
|
294
|
+
The format for saving optimization results.
|
|
295
|
+
- 'csv': Saves results to a CSV file.
|
|
296
|
+
- 'sqlite': Saves results to an SQLite database file. ⚠️ If a database exists, new tables will be created using the target name.
|
|
297
|
+
- 'both': Saves results to both formats.
|
|
267
298
|
auto_binary_boundaries : bool
|
|
268
299
|
Use `ObjectiveFunction.binary_features` to append as many binary boundaries as needed to `lower_boundaries` and `upper_boundaries` automatically.
|
|
269
300
|
target_name : str or None, optional
|
|
@@ -279,14 +310,11 @@ def run_pso(lower_boundaries: list[float],
|
|
|
279
310
|
|
|
280
311
|
Returns
|
|
281
312
|
-------
|
|
282
|
-
Tuple[Dict[str, float
|
|
283
|
-
If `post_hoc_analysis` is None, returns two dictionaries
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
If `post_hoc_analysis` is an integer, returns two dictionaries:
|
|
288
|
-
- feature_names: Lists of best feature values (after inverse scaling) for each repetition.
|
|
289
|
-
- target_name: List of best target values across repetitions.
|
|
313
|
+
Tuple[Dict[str, float], Dict[str, float]] or None
|
|
314
|
+
- If `post_hoc_analysis` is None, returns two dictionaries containing the
|
|
315
|
+
single best features and the corresponding target value.
|
|
316
|
+
- If `post_hoc_analysis` is active, results are streamed directly to a CSV file
|
|
317
|
+
and this function returns `None`.
|
|
290
318
|
|
|
291
319
|
Notes
|
|
292
320
|
-----
|
|
@@ -311,8 +339,9 @@ def run_pso(lower_boundaries: list[float],
|
|
|
311
339
|
# Append binary boundaries
|
|
312
340
|
binary_number = objective_function.binary_features
|
|
313
341
|
if auto_binary_boundaries and binary_number > 0:
|
|
314
|
-
|
|
315
|
-
|
|
342
|
+
# simplify binary search by constraining range
|
|
343
|
+
local_lower_boundaries.extend([0.45] * binary_number)
|
|
344
|
+
local_upper_boundaries.extend([0.55] * binary_number)
|
|
316
345
|
|
|
317
346
|
# Set the total length of features
|
|
318
347
|
size_of_features = len(local_lower_boundaries)
|
|
@@ -328,7 +357,25 @@ def run_pso(lower_boundaries: list[float],
|
|
|
328
357
|
if target_name is None and objective_function.target_name is not None:
|
|
329
358
|
target_name = objective_function.target_name
|
|
330
359
|
if target_name is None:
|
|
331
|
-
|
|
360
|
+
raise ValueError(f"'target' name was not provided and was not found in the .joblib object.")
|
|
361
|
+
|
|
362
|
+
# --- Setup: Saving Infrastructure ---
|
|
363
|
+
sanitized_target_name = sanitize_filename(target_name)
|
|
364
|
+
save_dir_path = make_fullpath(save_results_dir, make=True, enforce="directory")
|
|
365
|
+
base_filename = f"Optimization_{sanitized_target_name}"
|
|
366
|
+
csv_path = save_dir_path / f"{base_filename}.csv"
|
|
367
|
+
db_path = save_dir_path / "Optimization.db"
|
|
368
|
+
db_table_name = f"{sanitized_target_name}"
|
|
369
|
+
|
|
370
|
+
if save_format in ['sqlite', 'both']:
|
|
371
|
+
# Dynamically create the schema for the database table
|
|
372
|
+
schema = {name: "REAL" for name in names}
|
|
373
|
+
schema[target_name] = "REAL"
|
|
374
|
+
schema = {"result_id": "INTEGER PRIMARY KEY AUTOINCREMENT", **schema}
|
|
375
|
+
|
|
376
|
+
# Create table
|
|
377
|
+
with DatabaseManager(db_path) as db:
|
|
378
|
+
db.create_table(db_table_name, schema)
|
|
332
379
|
|
|
333
380
|
pso_arguments = {
|
|
334
381
|
"func":objective_function,
|
|
@@ -340,17 +387,29 @@ def run_pso(lower_boundaries: list[float],
|
|
|
340
387
|
"particle_output": False,
|
|
341
388
|
}
|
|
342
389
|
|
|
343
|
-
# Dispatcher
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
390
|
+
# --- Dispatcher ---
|
|
391
|
+
# Use a real or dummy context manager to handle the DB connection cleanly
|
|
392
|
+
db_context = DatabaseManager(db_path) if save_format in ['sqlite', 'both'] else nullcontext()
|
|
393
|
+
|
|
394
|
+
with db_context as db_manager:
|
|
395
|
+
if post_hoc_analysis is None or post_hoc_analysis <= 1:
|
|
396
|
+
# --- Single Run Logic ---
|
|
397
|
+
features_dict, target_dict = _run_single_pso(
|
|
398
|
+
objective_function, pso_arguments, names, target_name, random_state,
|
|
399
|
+
save_format, csv_path, db_manager, db_table_name
|
|
400
|
+
)
|
|
401
|
+
_LOGGER.info(f"✅ Single optimization complete.")
|
|
402
|
+
return features_dict, target_dict
|
|
403
|
+
|
|
404
|
+
else:
|
|
405
|
+
# --- Post-Hoc Analysis Logic ---
|
|
406
|
+
_LOGGER.info(f"🏁 Starting post-hoc analysis with {post_hoc_analysis} repetitions...")
|
|
407
|
+
_run_post_hoc_pso(
|
|
408
|
+
objective_function, pso_arguments, names, target_name, post_hoc_analysis,
|
|
409
|
+
save_format, csv_path, db_manager, db_table_name
|
|
410
|
+
)
|
|
411
|
+
_LOGGER.info("✅ Post-hoc analysis complete. Results saved.")
|
|
412
|
+
return None
|
|
354
413
|
|
|
355
414
|
|
|
356
415
|
def _pso(func: ObjectiveFunction,
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
import sqlite3
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Union, Dict, Any, Optional, List, Literal
|
|
5
|
+
from ._logger import _LOGGER
|
|
6
|
+
from ._script_info import _script_info
|
|
7
|
+
from .path_manager import make_fullpath
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"DatabaseManager",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DatabaseManager:
|
|
16
|
+
"""
|
|
17
|
+
A user-friendly context manager for handling SQLite database operations.
|
|
18
|
+
|
|
19
|
+
This class abstracts the underlying sqlite3 connection and cursor management,
|
|
20
|
+
providing simple methods to execute queries, create tables, and handle data
|
|
21
|
+
insertion and retrieval using pandas DataFrames.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
db_path : Union[str, Path]
|
|
26
|
+
The file path to the SQLite database. If the file does not exist,
|
|
27
|
+
it will be created upon connection.
|
|
28
|
+
|
|
29
|
+
Example
|
|
30
|
+
-------
|
|
31
|
+
>>> schema = {
|
|
32
|
+
... "id": "INTEGER PRIMARY KEY AUTOINCREMENT",
|
|
33
|
+
... "run_name": "TEXT NOT NULL",
|
|
34
|
+
... "feature_a": "REAL",
|
|
35
|
+
... "score": "REAL"
|
|
36
|
+
... }
|
|
37
|
+
>>> with DatabaseManager("my_results.db") as db:
|
|
38
|
+
... db.create_table("experiments", schema)
|
|
39
|
+
... data = {"run_name": "first_run", "feature_a": 0.123, "score": 95.5}
|
|
40
|
+
... db.insert_row("experiments", data)
|
|
41
|
+
... df = db.query_to_dataframe("SELECT * FROM experiments")
|
|
42
|
+
... print(df)
|
|
43
|
+
"""
|
|
44
|
+
def __init__(self, db_path: Union[str, Path]):
|
|
45
|
+
"""Initializes the DatabaseManager with the path to the database file."""
|
|
46
|
+
if isinstance(db_path, str):
|
|
47
|
+
if not db_path.endswith(".db"):
|
|
48
|
+
db_path = db_path + ".db"
|
|
49
|
+
elif isinstance(db_path, Path):
|
|
50
|
+
if db_path.suffix != ".db":
|
|
51
|
+
db_path = db_path.with_suffix(".db")
|
|
52
|
+
|
|
53
|
+
self.db_path = make_fullpath(db_path, make=True, enforce="file")
|
|
54
|
+
self.conn: Optional[sqlite3.Connection] = None
|
|
55
|
+
self.cursor: Optional[sqlite3.Cursor] = None
|
|
56
|
+
|
|
57
|
+
def __enter__(self):
|
|
58
|
+
"""Establishes the database connection and returns the manager instance."""
|
|
59
|
+
try:
|
|
60
|
+
self.conn = sqlite3.connect(self.db_path)
|
|
61
|
+
self.cursor = self.conn.cursor()
|
|
62
|
+
_LOGGER.info(f"✅ Successfully connected to database: {self.db_path}")
|
|
63
|
+
return self
|
|
64
|
+
except sqlite3.Error as e:
|
|
65
|
+
_LOGGER.error(f"❌ Database connection failed: {e}")
|
|
66
|
+
raise # Re-raise the exception after logging
|
|
67
|
+
|
|
68
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
69
|
+
"""Commits changes and closes the database connection."""
|
|
70
|
+
if self.conn:
|
|
71
|
+
if exc_type: # If an exception occurred, rollback
|
|
72
|
+
self.conn.rollback()
|
|
73
|
+
_LOGGER.warning("⚠️ Rolling back transaction due to an error.")
|
|
74
|
+
else: # Otherwise, commit the transaction
|
|
75
|
+
self.conn.commit()
|
|
76
|
+
self.conn.close()
|
|
77
|
+
_LOGGER.info(f"❇️ Database connection closed: {self.db_path.name}")
|
|
78
|
+
|
|
79
|
+
def create_table(self, table_name: str, schema: Dict[str, str], if_not_exists: bool = True):
|
|
80
|
+
"""
|
|
81
|
+
Creates a new table in the database based on a provided schema.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
table_name : str
|
|
86
|
+
The name of the table to create.
|
|
87
|
+
schema : Dict[str, str]
|
|
88
|
+
A dictionary where keys are column names and values are their SQL data types
|
|
89
|
+
(e.g., {"id": "INTEGER PRIMARY KEY", "name": "TEXT NOT NULL"}).
|
|
90
|
+
if_not_exists : bool, default=True
|
|
91
|
+
If True, adds "IF NOT EXISTS" to the SQL statement to prevent errors
|
|
92
|
+
if the table already exists.
|
|
93
|
+
"""
|
|
94
|
+
if not self.cursor:
|
|
95
|
+
raise sqlite3.Error("Database connection is not open.")
|
|
96
|
+
|
|
97
|
+
columns_def = ", ".join([f'"{col_name}" {col_type}' for col_name, col_type in schema.items()])
|
|
98
|
+
exists_clause = "IF NOT EXISTS" if if_not_exists else ""
|
|
99
|
+
|
|
100
|
+
query = f"CREATE TABLE {exists_clause} {table_name} ({columns_def})"
|
|
101
|
+
|
|
102
|
+
_LOGGER.info(f"🗂️ Executing: {query}")
|
|
103
|
+
self.cursor.execute(query)
|
|
104
|
+
|
|
105
|
+
def insert_row(self, table_name: str, data: Dict[str, Any]):
|
|
106
|
+
"""
|
|
107
|
+
Inserts a single row of data into the specified table.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
table_name : str
|
|
112
|
+
The name of the target table.
|
|
113
|
+
data : Dict[str, Any]
|
|
114
|
+
A dictionary where keys correspond to column names and values are the
|
|
115
|
+
data to be inserted.
|
|
116
|
+
"""
|
|
117
|
+
if not self.cursor:
|
|
118
|
+
raise sqlite3.Error("Database connection is not open.")
|
|
119
|
+
|
|
120
|
+
columns = ', '.join(f'"{k}"' for k in data.keys())
|
|
121
|
+
placeholders = ', '.join(['?'] * len(data))
|
|
122
|
+
values = list(data.values())
|
|
123
|
+
|
|
124
|
+
query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
|
|
125
|
+
|
|
126
|
+
self.cursor.execute(query, values)
|
|
127
|
+
|
|
128
|
+
def query_to_dataframe(self, query: str, params: Optional[tuple] = None) -> pd.DataFrame:
|
|
129
|
+
"""
|
|
130
|
+
Executes a SELECT query and returns the results as a pandas DataFrame.
|
|
131
|
+
|
|
132
|
+
Parameters
|
|
133
|
+
----------
|
|
134
|
+
query : str
|
|
135
|
+
The SQL SELECT statement to execute.
|
|
136
|
+
params : Optional[tuple], default=None
|
|
137
|
+
An optional tuple of parameters to pass to the query for safety
|
|
138
|
+
against SQL injection.
|
|
139
|
+
|
|
140
|
+
Returns
|
|
141
|
+
-------
|
|
142
|
+
pd.DataFrame
|
|
143
|
+
A DataFrame containing the query results.
|
|
144
|
+
"""
|
|
145
|
+
if not self.conn:
|
|
146
|
+
raise sqlite3.Error("Database connection is not open.")
|
|
147
|
+
|
|
148
|
+
return pd.read_sql_query(query, self.conn, params=params)
|
|
149
|
+
|
|
150
|
+
def execute_sql(self, query: str, params: Optional[tuple] = None):
|
|
151
|
+
"""
|
|
152
|
+
Executes an arbitrary SQL command that does not return data (e.g., UPDATE, DELETE).
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
query : str
|
|
157
|
+
The SQL statement to execute.
|
|
158
|
+
params : Optional[tuple], default=None
|
|
159
|
+
An optional tuple of parameters for the query.
|
|
160
|
+
"""
|
|
161
|
+
if not self.cursor:
|
|
162
|
+
raise sqlite3.Error("Database connection is not open.")
|
|
163
|
+
|
|
164
|
+
self.cursor.execute(query, params if params else ())
|
|
165
|
+
|
|
166
|
+
def insert_many(self, table_name: str, data: List[Dict[str, Any]]):
|
|
167
|
+
"""
|
|
168
|
+
Inserts multiple rows into the specified table in a single, efficient transaction.
|
|
169
|
+
|
|
170
|
+
Parameters
|
|
171
|
+
----------
|
|
172
|
+
table_name : str
|
|
173
|
+
The name of the target table.
|
|
174
|
+
data : List[Dict[str, Any]]
|
|
175
|
+
A list of dictionaries, where each dictionary represents a row to be inserted.
|
|
176
|
+
All dictionaries should have the same keys.
|
|
177
|
+
"""
|
|
178
|
+
if not self.cursor:
|
|
179
|
+
raise sqlite3.Error("Database connection is not open.")
|
|
180
|
+
if not data:
|
|
181
|
+
_LOGGER.warning("⚠️ insert_many called with empty data list. No action taken.")
|
|
182
|
+
return
|
|
183
|
+
|
|
184
|
+
# Assume all dicts have the same keys as the first one
|
|
185
|
+
first_row = data[0]
|
|
186
|
+
columns = ', '.join(f'"{k}"' for k in first_row.keys())
|
|
187
|
+
placeholders = ', '.join(['?'] * len(first_row))
|
|
188
|
+
|
|
189
|
+
# Create a list of tuples, where each tuple is a row of values
|
|
190
|
+
values_to_insert = [list(row.values()) for row in data]
|
|
191
|
+
|
|
192
|
+
query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
|
|
193
|
+
|
|
194
|
+
self.cursor.executemany(query, values_to_insert)
|
|
195
|
+
_LOGGER.info(f"✅ Bulk inserted {len(values_to_insert)} rows into '{table_name}'.")
|
|
196
|
+
|
|
197
|
+
def insert_from_dataframe(self, table_name: str, df: pd.DataFrame, if_exists: Literal['fail', 'replace', 'append'] = 'append'):
|
|
198
|
+
"""
|
|
199
|
+
Writes records from a pandas DataFrame to the specified SQL table.
|
|
200
|
+
|
|
201
|
+
Parameters
|
|
202
|
+
----------
|
|
203
|
+
table_name : str
|
|
204
|
+
The name of the target SQL table.
|
|
205
|
+
df : pd.DataFrame
|
|
206
|
+
The DataFrame to be written.
|
|
207
|
+
if_exists : str, default 'append'
|
|
208
|
+
How to behave if the table already exists.
|
|
209
|
+
- 'fail': Raise a ValueError.
|
|
210
|
+
- 'replace': Drop the table before inserting new values.
|
|
211
|
+
- 'append': Insert new values to the existing table.
|
|
212
|
+
"""
|
|
213
|
+
if not self.conn:
|
|
214
|
+
raise sqlite3.Error("Database connection is not open.")
|
|
215
|
+
|
|
216
|
+
df.to_sql(
|
|
217
|
+
table_name,
|
|
218
|
+
self.conn,
|
|
219
|
+
if_exists=if_exists,
|
|
220
|
+
index=False # Typically, we don't want to save the DataFrame index
|
|
221
|
+
)
|
|
222
|
+
_LOGGER.info(f"✅ Wrote {len(df)} rows from DataFrame to table '{table_name}' using mode '{if_exists}'.")
|
|
223
|
+
|
|
224
|
+
def list_tables(self) -> List[str]:
|
|
225
|
+
"""Returns a list of all table names in the database."""
|
|
226
|
+
if not self.cursor:
|
|
227
|
+
raise sqlite3.Error("Database connection is not open.")
|
|
228
|
+
|
|
229
|
+
self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
|
230
|
+
# The result of the fetch is a list of tuples, e.g., [('table1',), ('table2',)]
|
|
231
|
+
return [table[0] for table in self.cursor.fetchall()]
|
|
232
|
+
|
|
233
|
+
def get_table_schema(self, table_name: str) -> pd.DataFrame:
|
|
234
|
+
"""
|
|
235
|
+
Retrieves the schema of a specific table and returns it as a DataFrame.
|
|
236
|
+
|
|
237
|
+
Returns a DataFrame with columns: cid, name, type, notnull, dflt_value, pk
|
|
238
|
+
"""
|
|
239
|
+
if not self.conn:
|
|
240
|
+
raise sqlite3.Error("Database connection is not open.")
|
|
241
|
+
|
|
242
|
+
# PRAGMA is a special SQL command in SQLite for database metadata
|
|
243
|
+
return pd.read_sql_query(f'PRAGMA table_info("{table_name}");', self.conn)
|
|
244
|
+
|
|
245
|
+
def create_index(self, table_name: str, column_name: str, unique: bool = False):
|
|
246
|
+
"""
|
|
247
|
+
Creates an index on a column of a specified table to speed up queries.
|
|
248
|
+
|
|
249
|
+
Parameters
|
|
250
|
+
----------
|
|
251
|
+
table_name : str
|
|
252
|
+
The name of the table containing the column.
|
|
253
|
+
column_name : str
|
|
254
|
+
The name of the column to be indexed.
|
|
255
|
+
unique : bool, default=False
|
|
256
|
+
If True, creates a unique index, which ensures all values in the
|
|
257
|
+
column are unique.
|
|
258
|
+
"""
|
|
259
|
+
if not self.cursor:
|
|
260
|
+
raise sqlite3.Error("Database connection is not open.")
|
|
261
|
+
|
|
262
|
+
index_name = f"idx_{table_name}_{column_name}"
|
|
263
|
+
unique_clause = "UNIQUE" if unique else ""
|
|
264
|
+
|
|
265
|
+
query = f"CREATE {unique_clause} INDEX IF NOT EXISTS {index_name} ON {table_name} ({column_name})"
|
|
266
|
+
|
|
267
|
+
_LOGGER.info(f"🗂️ Executing: {query}")
|
|
268
|
+
self.cursor.execute(query)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def info():
|
|
272
|
+
_script_info(__all__)
|
|
@@ -49,18 +49,26 @@ class PathManager:
|
|
|
49
49
|
for dir_name in base_directories:
|
|
50
50
|
# This logic works for both dev mode and bundled mode
|
|
51
51
|
self._paths[dir_name] = package_root / dir_name
|
|
52
|
-
|
|
52
|
+
|
|
53
53
|
def _get_bundle_root(self) -> tuple[bool, Optional[str]]:
|
|
54
54
|
"""
|
|
55
|
-
Checks if the app is running in a PyInstaller bundle and returns the root path.
|
|
55
|
+
Checks if the app is running in a PyInstaller or Nuitka bundle and returns the root path.
|
|
56
56
|
|
|
57
57
|
Returns:
|
|
58
|
-
A tuple (is_bundled, bundle_root_path).
|
|
59
|
-
path to the temporary directory `_MEIPASS` if bundled, otherwise None.
|
|
58
|
+
A tuple (is_bundled, bundle_root_path).
|
|
60
59
|
"""
|
|
60
|
+
# --- PyInstaller Check ---
|
|
61
61
|
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
|
|
62
|
-
#
|
|
62
|
+
# The bundle root for PyInstaller is the temporary _MEIPASS directory
|
|
63
63
|
return True, sys._MEIPASS # type: ignore
|
|
64
|
+
|
|
65
|
+
# --- Nuitka Check ---
|
|
66
|
+
elif '__nuitka_binary_dir' in sys.__dict__:
|
|
67
|
+
# For Nuitka, the root is the directory of the binary.
|
|
68
|
+
# Unlike PyInstaller's _MEIPASS, this is the final install location.
|
|
69
|
+
return True, sys.__dict__['__nuitka_binary_dir']
|
|
70
|
+
|
|
71
|
+
# --- Not Bundled ---
|
|
64
72
|
else:
|
|
65
73
|
return False, None
|
|
66
74
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "dragon-ml-toolbox"
|
|
3
|
-
version = "4.
|
|
3
|
+
version = "4.2.0"
|
|
4
4
|
description = "A collection of tools for data science and machine learning projects."
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Karl Loza", email = "luigiloza@gmail.com" }
|
|
@@ -19,6 +19,14 @@ Homepage = "https://github.com/DrAg0n-BoRn/ML_tools"
|
|
|
19
19
|
Changelog = "https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md"
|
|
20
20
|
|
|
21
21
|
[project.optional-dependencies]
|
|
22
|
+
# Base all purpose tools
|
|
23
|
+
base = [
|
|
24
|
+
"pandas",
|
|
25
|
+
"numpy",
|
|
26
|
+
"polars",
|
|
27
|
+
"joblib"
|
|
28
|
+
]
|
|
29
|
+
|
|
22
30
|
# Machine Learning main toolbox. Additionally Requires PyTorch with CUDA / MPS support if pytorch models are used
|
|
23
31
|
ML = [
|
|
24
32
|
"numpy",
|
|
@@ -77,16 +85,14 @@ gui-boost = [
|
|
|
77
85
|
"numpy",
|
|
78
86
|
"joblib",
|
|
79
87
|
"FreeSimpleGUI>=5.2",
|
|
80
|
-
"pyinstaller",
|
|
81
88
|
"xgboost",
|
|
82
|
-
"lightgbm"
|
|
89
|
+
"lightgbm",
|
|
83
90
|
]
|
|
84
91
|
|
|
85
92
|
# GUI for Pytorch - Additionally Requires PyTorch with CUDA / MPS support
|
|
86
93
|
gui-torch = [
|
|
87
94
|
"numpy",
|
|
88
95
|
"FreeSimpleGUI>=5.2",
|
|
89
|
-
"pyinstaller",
|
|
90
96
|
]
|
|
91
97
|
|
|
92
98
|
# For GUIs using plotting features
|
|
@@ -95,6 +101,17 @@ plot = [
|
|
|
95
101
|
"seaborn"
|
|
96
102
|
]
|
|
97
103
|
|
|
104
|
+
# APP Bundlers - Choose one
|
|
105
|
+
pyinstaller = [
|
|
106
|
+
"pyinstaller"
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
nuitka = [
|
|
110
|
+
"nuitka",
|
|
111
|
+
"zstandard",
|
|
112
|
+
"ordered-set"
|
|
113
|
+
]
|
|
114
|
+
|
|
98
115
|
[build-system]
|
|
99
116
|
requires = ["setuptools>=61.0"]
|
|
100
117
|
build-backend = "setuptools.build_meta"
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/dragon_ml_toolbox.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{dragon_ml_toolbox-4.0.0 → dragon_ml_toolbox-4.2.0}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|