junshan-kit 2.4.7__py2.py3-none-any.whl → 2.4.9__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of junshan-kit might be problematic. Click here for more details.
- junshan_kit/DataHub.py +114 -0
- junshan_kit/DataProcessor.py +114 -24
- junshan_kit/DataSets.py +186 -37
- junshan_kit/{Models.py → ModelsHub.py} +5 -0
- junshan_kit/ParametersHub.py +404 -0
- junshan_kit/Print_Info.py +6 -2
- junshan_kit/TrainingHub.py +75 -0
- junshan_kit/kit.py +94 -30
- {junshan_kit-2.4.7.dist-info → junshan_kit-2.4.9.dist-info}/METADATA +2 -2
- junshan_kit-2.4.9.dist-info/RECORD +12 -0
- junshan_kit/ComOptimizers.py +0 -126
- junshan_kit/ExperimentHub.py +0 -338
- junshan_kit/SPBM.py +0 -350
- junshan_kit/SPBM_func.py +0 -601
- junshan_kit/TrainingParas.py +0 -470
- junshan_kit/check_args.py +0 -116
- junshan_kit/datahub.py +0 -281
- junshan_kit-2.4.7.dist-info/RECORD +0 -16
- {junshan_kit-2.4.7.dist-info → junshan_kit-2.4.9.dist-info}/WHEEL +0 -0
junshan_kit/kit.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import zipfile
|
|
9
|
-
import os, time
|
|
9
|
+
import os, time, openml
|
|
10
10
|
|
|
11
11
|
from selenium import webdriver
|
|
12
12
|
from selenium.webdriver.common.by import By
|
|
@@ -35,7 +35,7 @@ def unzip_file(zip_path: str, unzip_folder: str):
|
|
|
35
35
|
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
|
36
36
|
zip_ref.extractall(unzip_folder)
|
|
37
37
|
|
|
38
|
-
print(f"
|
|
38
|
+
print(f"- Extracted '{zip_path}' to '{os.path.abspath(unzip_folder)}'")
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
# =============================================================
|
|
@@ -73,13 +73,13 @@ class JianguoyunDownloaderChrome:
|
|
|
73
73
|
self.driver = webdriver.Chrome(options=self.chrome_options)
|
|
74
74
|
|
|
75
75
|
def open_page(self):
|
|
76
|
-
print(f"
|
|
76
|
+
print(f">>> Opening page: {self.url}")
|
|
77
77
|
self.driver.get(self.url)
|
|
78
|
-
print(f"
|
|
78
|
+
print(f">>> Page loaded: {self.driver.title}")
|
|
79
79
|
|
|
80
80
|
def click_download_button(self):
|
|
81
81
|
"""Find and click the 'Download' button (supports English and Chinese)."""
|
|
82
|
-
print("
|
|
82
|
+
print(">>> Searching for the download button...")
|
|
83
83
|
wait = WebDriverWait(self.driver, 30)
|
|
84
84
|
|
|
85
85
|
try:
|
|
@@ -97,33 +97,33 @@ class JianguoyunDownloaderChrome:
|
|
|
97
97
|
|
|
98
98
|
# Click using JavaScript to avoid overlay or interaction issues
|
|
99
99
|
self.driver.execute_script("arguments[0].click();", button)
|
|
100
|
-
print(f"
|
|
100
|
+
print(f">>> Download button clicked. Files will be saved to: {self.download_path}")
|
|
101
101
|
|
|
102
102
|
# If the cloud service opens a new tab, switch to it
|
|
103
103
|
time.sleep(3)
|
|
104
104
|
if len(self.driver.window_handles) > 1:
|
|
105
105
|
self.driver.switch_to.window(self.driver.window_handles[-1])
|
|
106
|
-
print("
|
|
106
|
+
print(">>> Switched to the new download tab.")
|
|
107
107
|
|
|
108
108
|
except Exception as e:
|
|
109
|
-
print("
|
|
109
|
+
print(">>> Failed to find or click the download button:", e)
|
|
110
110
|
raise
|
|
111
111
|
|
|
112
112
|
|
|
113
113
|
def wait_for_downloads(self, timeout=3600):
|
|
114
|
-
print("
|
|
114
|
+
print(">>> Waiting for downloads to finish...")
|
|
115
115
|
start_time = time.time()
|
|
116
116
|
while time.time() - start_time < timeout:
|
|
117
117
|
downloading = [f for f in os.listdir(self.download_path) if f.endswith(".crdownload")]
|
|
118
118
|
if not downloading:
|
|
119
|
-
print("
|
|
119
|
+
print(">>> Download completed!")
|
|
120
120
|
return
|
|
121
121
|
time.sleep(2)
|
|
122
|
-
print("
|
|
122
|
+
print(">>> Timeout: download not completed within 1 hour")
|
|
123
123
|
|
|
124
124
|
def close(self):
|
|
125
125
|
self.driver.quit()
|
|
126
|
-
print("
|
|
126
|
+
print(">>> Browser closed.")
|
|
127
127
|
|
|
128
128
|
def run(self):
|
|
129
129
|
print('*' * 60)
|
|
@@ -132,7 +132,7 @@ class JianguoyunDownloaderChrome:
|
|
|
132
132
|
self.click_download_button()
|
|
133
133
|
self.wait_for_downloads()
|
|
134
134
|
except Exception as e:
|
|
135
|
-
print("
|
|
135
|
+
print(">>> Error:", e)
|
|
136
136
|
finally:
|
|
137
137
|
self.close()
|
|
138
138
|
print('*' * 60)
|
|
@@ -169,13 +169,13 @@ class JianguoyunDownloaderFirefox:
|
|
|
169
169
|
self.driver = webdriver.Firefox(service=service, options=options)
|
|
170
170
|
|
|
171
171
|
def open_page(self):
|
|
172
|
-
print(f"
|
|
172
|
+
print(f">>> Opening page: {self.url}")
|
|
173
173
|
self.driver.get(self.url)
|
|
174
|
-
print(f"
|
|
174
|
+
print(f">>> Page loaded: {self.driver.title}")
|
|
175
175
|
|
|
176
176
|
def click_download_button(self):
|
|
177
177
|
"""Find and click the 'Download' button (supports English and Chinese)."""
|
|
178
|
-
print("
|
|
178
|
+
print(">>> Searching for the download button...")
|
|
179
179
|
wait = WebDriverWait(self.driver, 30)
|
|
180
180
|
|
|
181
181
|
try:
|
|
@@ -193,21 +193,21 @@ class JianguoyunDownloaderFirefox:
|
|
|
193
193
|
|
|
194
194
|
# Click using JavaScript to avoid overlay or interaction issues
|
|
195
195
|
self.driver.execute_script("arguments[0].click();", button)
|
|
196
|
-
print(f"
|
|
196
|
+
print(f">>> Download button clicked. Files will be saved to: {self.download_path}")
|
|
197
197
|
|
|
198
198
|
# If the cloud service opens a new tab, switch to it
|
|
199
199
|
time.sleep(3)
|
|
200
200
|
if len(self.driver.window_handles) > 1:
|
|
201
201
|
self.driver.switch_to.window(self.driver.window_handles[-1])
|
|
202
|
-
print("
|
|
202
|
+
print(">>> Switched to the new download tab.")
|
|
203
203
|
|
|
204
204
|
except Exception as e:
|
|
205
|
-
print("
|
|
205
|
+
print(">>> Failed to find or click the download button:", e)
|
|
206
206
|
raise
|
|
207
207
|
|
|
208
208
|
def wait_for_download(self, timeout=3600):
|
|
209
209
|
"""Wait until all downloads are finished (auto-detects browser type)."""
|
|
210
|
-
print("
|
|
210
|
+
print(">>> Waiting for downloads to finish...")
|
|
211
211
|
start_time = time.time()
|
|
212
212
|
|
|
213
213
|
# Determine the temporary file extension based on the browser type
|
|
@@ -216,13 +216,13 @@ class JianguoyunDownloaderFirefox:
|
|
|
216
216
|
while time.time() - start_time < timeout:
|
|
217
217
|
downloading = [f for f in os.listdir(self.download_path) if f.endswith(temp_ext)]
|
|
218
218
|
if not downloading:
|
|
219
|
-
print("
|
|
219
|
+
print(">>> Download completed!")
|
|
220
220
|
return True
|
|
221
221
|
time.sleep(2)
|
|
222
222
|
|
|
223
223
|
|
|
224
224
|
def close(self):
|
|
225
|
-
print("
|
|
225
|
+
print(">>> Closing browser...")
|
|
226
226
|
self.driver.quit()
|
|
227
227
|
|
|
228
228
|
def run(self):
|
|
@@ -232,17 +232,81 @@ class JianguoyunDownloaderFirefox:
|
|
|
232
232
|
self.click_download_button()
|
|
233
233
|
self.wait_for_download(timeout=3600)
|
|
234
234
|
except Exception as e:
|
|
235
|
-
print("
|
|
235
|
+
print(">>> Error:", e)
|
|
236
236
|
finally:
|
|
237
237
|
self.close()
|
|
238
238
|
print('*' * 60)
|
|
239
239
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
240
|
+
|
|
241
|
+
def download_openml_data(data_name):
|
|
242
|
+
"""
|
|
243
|
+
Returns
|
|
244
|
+
-------
|
|
245
|
+
X : ndarray, dataframe, or sparse matrix, shape (n_samples, n_columns)
|
|
246
|
+
Dataset
|
|
247
|
+
y : ndarray or pd.Series, shape (n_samples, ) or None
|
|
248
|
+
Target column
|
|
249
|
+
categorical_indicator : boolean ndarray
|
|
250
|
+
Mask that indicate categorical features.
|
|
251
|
+
attribute_names : List[str]
|
|
252
|
+
List of attribute names.
|
|
253
|
+
"""
|
|
254
|
+
openml.config.set_root_cache_directory(f"./exp_data/{data_name}")
|
|
255
|
+
dataset = openml.datasets.get_dataset(f'{data_name}', download_data=True)
|
|
256
|
+
X, y, categorical_indicator, attribute_names = dataset.get_data(dataset_format="dataframe")
|
|
257
|
+
|
|
258
|
+
return X, y, categorical_indicator, attribute_names
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def import_data_path_to_ignore():
|
|
262
|
+
def get_folder_size(folder_path):
|
|
263
|
+
"""Recursively calculate the total size of a folder (in bytes)."""
|
|
264
|
+
total_size = 0
|
|
265
|
+
for root, dirs, files in os.walk(folder_path):
|
|
266
|
+
for f in files:
|
|
267
|
+
try:
|
|
268
|
+
total_size += os.path.getsize(os.path.join(root, f))
|
|
269
|
+
except Exception:
|
|
270
|
+
pass
|
|
271
|
+
return total_size
|
|
272
|
+
|
|
273
|
+
def list_and_ignore_large_folders(folder_path, limit_mb=99):
|
|
274
|
+
"""List folder sizes and append large ones (> limit_mb) to .gitignore."""
|
|
275
|
+
gitignore_path = os.path.join(os.getcwd(), ".gitignore")
|
|
276
|
+
ignore_list = []
|
|
277
|
+
|
|
278
|
+
# Read existing .gitignore entries to avoid duplicates
|
|
279
|
+
if os.path.exists(gitignore_path):
|
|
280
|
+
with open(gitignore_path, "r", encoding="utf-8") as f:
|
|
281
|
+
existing_ignores = set(line.strip() for line in f if line.strip())
|
|
282
|
+
else:
|
|
283
|
+
existing_ignores = set()
|
|
284
|
+
|
|
285
|
+
for entry in os.scandir(folder_path):
|
|
286
|
+
if entry.is_dir():
|
|
287
|
+
folder_size_mb = get_folder_size(entry.path) / (1024 * 1024)
|
|
288
|
+
print(f"{entry.path}/ - {folder_size_mb:.2f} MB")
|
|
289
|
+
|
|
290
|
+
if folder_size_mb > limit_mb:
|
|
291
|
+
rel_path = os.path.relpath(entry.path, start=os.getcwd())
|
|
292
|
+
if rel_path not in existing_ignores:
|
|
293
|
+
ignore_list.append(rel_path)
|
|
294
|
+
elif entry.is_file():
|
|
295
|
+
file_size_mb = os.path.getsize(entry.path) / (1024 * 1024)
|
|
296
|
+
print(f"{entry.path} - {file_size_mb:.2f} MB")
|
|
297
|
+
|
|
298
|
+
# Append new paths to .gitignore
|
|
299
|
+
if ignore_list:
|
|
300
|
+
with open(gitignore_path, "a", encoding="utf-8") as f:
|
|
301
|
+
for p in ignore_list:
|
|
302
|
+
f.write(p + "\n")
|
|
303
|
+
print(f"\n✅ The following paths have been added to .gitignore:\n" + "\n".join(ignore_list))
|
|
304
|
+
else:
|
|
305
|
+
print("\nNo folders exceed the size limit (99 MB).")
|
|
306
|
+
|
|
307
|
+
folder_path = "./exp_data"
|
|
308
|
+
list_and_ignore_large_folders(folder_path, limit_mb=99)
|
|
309
|
+
|
|
310
|
+
|
|
247
311
|
|
|
248
312
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: junshan_kit
|
|
3
|
-
Version: 2.4.
|
|
3
|
+
Version: 2.4.9
|
|
4
4
|
Summary: This is an optimization tool.
|
|
5
5
|
Author-email: Junshan Yin <junshanyin@163.com>
|
|
6
6
|
Requires-Dist: kaggle==1.7.4.5
|
|
7
7
|
Requires-Dist: kagglehub==0.3.13
|
|
8
|
-
Requires-Dist:
|
|
8
|
+
Requires-Dist: openml==0.15.1
|
|
9
9
|
Requires-Dist: selenium==4.36.0
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
junshan_kit/DataHub.py,sha256=D9G2vjCNvDLer4qoKdowgWJChLMQQn7cVhAPZLvRrbE,3332
|
|
2
|
+
junshan_kit/DataProcessor.py,sha256=-6qjG52NDYq746vBPpc0uW2cfbc4syqSWZIzTxJt6fE,11806
|
|
3
|
+
junshan_kit/DataSets.py,sha256=hwGnJsb-Lj90lk6VBwmsDBb3-IA_WgUWzAKayHyq2AI,13391
|
|
4
|
+
junshan_kit/ModelsHub.py,sha256=z9NyC4PTxo3wCxa2XxOfcjrw9NcDs0LCjBGCp6Z-90s,7084
|
|
5
|
+
junshan_kit/ParametersHub.py,sha256=zG3Dus5PZ57j0ZC-Xq4PGQSMt0MYzelhe-g2ZQMyL90,11244
|
|
6
|
+
junshan_kit/Print_Info.py,sha256=yiGc6Qlprj0ds6w2DP7ScAgTBZwswxXqxuIrQ3_liL8,3111
|
|
7
|
+
junshan_kit/TrainingHub.py,sha256=YDs6u__5RVxlY6f5jOILhAi-DRAe2FXkRtc7dvdrENY,2457
|
|
8
|
+
junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
junshan_kit/kit.py,sha256=F9f5qqn9ve-UVoYtXlFmNGl4YJ3eEy6T1yRrC0s-Wpw,12367
|
|
10
|
+
junshan_kit-2.4.9.dist-info/METADATA,sha256=msRObtJFPgLAjRSCFhpEkL4RS_Vh0n5fNC1xRp-0gYs,267
|
|
11
|
+
junshan_kit-2.4.9.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
|
|
12
|
+
junshan_kit-2.4.9.dist-info/RECORD,,
|
junshan_kit/ComOptimizers.py
DELETED
|
@@ -1,126 +0,0 @@
|
|
|
1
|
-
import torch, os, time
|
|
2
|
-
from torch.optim.optimizer import Optimizer
|
|
3
|
-
from torch.nn.utils import parameters_to_vector, vector_to_parameters
|
|
4
|
-
import junshan_kit.SPBM_func as SPBM_func
|
|
5
|
-
|
|
6
|
-
class SPSmax(Optimizer):
|
|
7
|
-
def __init__(self, params, model, hyperparams, Paras):
|
|
8
|
-
defaults = dict()
|
|
9
|
-
super().__init__(params, defaults)
|
|
10
|
-
self.model = model
|
|
11
|
-
self.c = hyperparams['c']
|
|
12
|
-
self.gamma = hyperparams['gamma']
|
|
13
|
-
if 'f_star' not in Paras or Paras['f_star'] is None:
|
|
14
|
-
self.f_star = 0
|
|
15
|
-
else:
|
|
16
|
-
self.f_star = Paras['f_star']
|
|
17
|
-
self.step_size = []
|
|
18
|
-
|
|
19
|
-
def step(self, closure=None):
|
|
20
|
-
if closure is None:
|
|
21
|
-
raise RuntimeError("Closure required for SPSmax")
|
|
22
|
-
|
|
23
|
-
# Reset the gradient and perform forward computation
|
|
24
|
-
loss = closure()
|
|
25
|
-
|
|
26
|
-
with torch.no_grad():
|
|
27
|
-
xk = parameters_to_vector(self.model.parameters())
|
|
28
|
-
# print(torch.norm(xk))
|
|
29
|
-
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
|
30
|
-
|
|
31
|
-
# Step-size
|
|
32
|
-
step_size = (loss - self.f_star) / ((self.c * torch.norm(g_k, p=2) ** 2) + 1e-8)
|
|
33
|
-
step_size = min(step_size, self.gamma)
|
|
34
|
-
self.step_size.append(step_size)
|
|
35
|
-
|
|
36
|
-
# Update
|
|
37
|
-
xk = xk - step_size * g_k
|
|
38
|
-
|
|
39
|
-
# print(len(self.f_his))
|
|
40
|
-
vector_to_parameters(xk, self.model.parameters())
|
|
41
|
-
|
|
42
|
-
# emporarily return loss (tensor type)
|
|
43
|
-
return loss
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class ALR_SMAG(Optimizer):
|
|
47
|
-
def __init__(self, params, model, hyperparams, Paras):
|
|
48
|
-
defaults = dict()
|
|
49
|
-
super().__init__(params, defaults)
|
|
50
|
-
self.model = model
|
|
51
|
-
self.c = hyperparams['c']
|
|
52
|
-
self.eta_max = hyperparams['eta_max']
|
|
53
|
-
self.beta = hyperparams['beta']
|
|
54
|
-
if 'f_star' not in Paras or Paras['f_star'] is None:
|
|
55
|
-
self.f_star = 0
|
|
56
|
-
else:
|
|
57
|
-
self.f_star = Paras['f_star']
|
|
58
|
-
self.step_size = []
|
|
59
|
-
self.d_k = torch.zeros_like(parameters_to_vector(self.model.parameters()))
|
|
60
|
-
|
|
61
|
-
def step(self, closure=None):
|
|
62
|
-
if closure is None:
|
|
63
|
-
raise RuntimeError("Closure required for SPSmax")
|
|
64
|
-
|
|
65
|
-
# Reset the gradient and perform forward computation
|
|
66
|
-
loss = closure()
|
|
67
|
-
|
|
68
|
-
with torch.no_grad():
|
|
69
|
-
xk = parameters_to_vector(self.model.parameters())
|
|
70
|
-
# print(torch.norm(xk))
|
|
71
|
-
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
|
72
|
-
|
|
73
|
-
self.d_k = self.beta * self.d_k + g_k
|
|
74
|
-
# Step-size
|
|
75
|
-
step_size = (loss - self.f_star) / ((self.c * torch.norm(self.d_k, p=2) ** 2) + 1e-8)
|
|
76
|
-
step_size = min(step_size, self.eta_max)
|
|
77
|
-
self.step_size.append(step_size)
|
|
78
|
-
|
|
79
|
-
# Update
|
|
80
|
-
xk = xk - step_size * g_k
|
|
81
|
-
|
|
82
|
-
# print(len(self.f_his))
|
|
83
|
-
vector_to_parameters(xk, self.model.parameters())
|
|
84
|
-
|
|
85
|
-
# emporarily return loss (tensor type)
|
|
86
|
-
return loss
|
|
87
|
-
|
|
88
|
-
# ------------ Bundle Method --------------------
|
|
89
|
-
class Bundle(Optimizer):
|
|
90
|
-
def __init__(self, params, model, hyperparams, Paras):
|
|
91
|
-
defaults = dict()
|
|
92
|
-
super().__init__(params, defaults)
|
|
93
|
-
self.model = model
|
|
94
|
-
self.cutting_num = hyperparams['cutting_number']
|
|
95
|
-
self.delta = hyperparams['delta']
|
|
96
|
-
self.Paras = Paras
|
|
97
|
-
|
|
98
|
-
self.x_his, self.g_his, self.f_his = [], [], []
|
|
99
|
-
|
|
100
|
-
def step(self, closure=None):
|
|
101
|
-
if closure is None:
|
|
102
|
-
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
|
103
|
-
|
|
104
|
-
# Reset the gradient and perform forward computation
|
|
105
|
-
loss = closure()
|
|
106
|
-
|
|
107
|
-
with torch.no_grad():
|
|
108
|
-
xk = parameters_to_vector(self.model.parameters())
|
|
109
|
-
# print(torch.norm(xk))
|
|
110
|
-
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
|
111
|
-
|
|
112
|
-
# Add cutting plane
|
|
113
|
-
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
|
114
|
-
|
|
115
|
-
# the coefficient of dual problem
|
|
116
|
-
Gk, rk, ek = SPBM_func.get_var(x_his, f_his, g_his, self.delta)
|
|
117
|
-
|
|
118
|
-
# SOVER (dual)
|
|
119
|
-
xk = SPBM_func.bundle(Gk, ek, xk, self.delta, self.Paras)
|
|
120
|
-
|
|
121
|
-
# print(len(self.f_his))
|
|
122
|
-
vector_to_parameters(xk, self.model.parameters())
|
|
123
|
-
|
|
124
|
-
# loss(tensor)
|
|
125
|
-
return loss
|
|
126
|
-
|