junshan-kit 2.4.7__py2.py3-none-any.whl → 2.4.9__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of junshan-kit might be problematic. Click here for more details.

junshan_kit/kit.py CHANGED
@@ -6,7 +6,7 @@
6
6
  """
7
7
 
8
8
  import zipfile
9
- import os, time
9
+ import os, time, openml
10
10
 
11
11
  from selenium import webdriver
12
12
  from selenium.webdriver.common.by import By
@@ -35,7 +35,7 @@ def unzip_file(zip_path: str, unzip_folder: str):
35
35
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
36
36
  zip_ref.extractall(unzip_folder)
37
37
 
38
- print(f" Extracted '{zip_path}' to '{os.path.abspath(unzip_folder)}'")
38
+ print(f"- Extracted '{zip_path}' to '{os.path.abspath(unzip_folder)}'")
39
39
 
40
40
 
41
41
  # =============================================================
@@ -73,13 +73,13 @@ class JianguoyunDownloaderChrome:
73
73
  self.driver = webdriver.Chrome(options=self.chrome_options)
74
74
 
75
75
  def open_page(self):
76
- print(f"🌐 Opening page: {self.url}")
76
+ print(f">>> Opening page: {self.url}")
77
77
  self.driver.get(self.url)
78
- print(f" Page loaded: {self.driver.title}")
78
+ print(f">>> Page loaded: {self.driver.title}")
79
79
 
80
80
  def click_download_button(self):
81
81
  """Find and click the 'Download' button (supports English and Chinese)."""
82
- print("🔍 Searching for the download button...")
82
+ print(">>> Searching for the download button...")
83
83
  wait = WebDriverWait(self.driver, 30)
84
84
 
85
85
  try:
@@ -97,33 +97,33 @@ class JianguoyunDownloaderChrome:
97
97
 
98
98
  # Click using JavaScript to avoid overlay or interaction issues
99
99
  self.driver.execute_script("arguments[0].click();", button)
100
- print(f" Download button clicked. Files will be saved to: {self.download_path}")
100
+ print(f">>> Download button clicked. Files will be saved to: {self.download_path}")
101
101
 
102
102
  # If the cloud service opens a new tab, switch to it
103
103
  time.sleep(3)
104
104
  if len(self.driver.window_handles) > 1:
105
105
  self.driver.switch_to.window(self.driver.window_handles[-1])
106
- print("📂 Switched to the new download tab.")
106
+ print(">>> Switched to the new download tab.")
107
107
 
108
108
  except Exception as e:
109
- print(" Failed to find or click the download button:", e)
109
+ print(">>> Failed to find or click the download button:", e)
110
110
  raise
111
111
 
112
112
 
113
113
  def wait_for_downloads(self, timeout=3600):
114
- print(" Waiting for downloads to finish...")
114
+ print(">>> Waiting for downloads to finish...")
115
115
  start_time = time.time()
116
116
  while time.time() - start_time < timeout:
117
117
  downloading = [f for f in os.listdir(self.download_path) if f.endswith(".crdownload")]
118
118
  if not downloading:
119
- print(" Download completed!")
119
+ print(">>> Download completed!")
120
120
  return
121
121
  time.sleep(2)
122
- print("⚠️ Timeout: download not completed within 1 hour")
122
+ print(">>> Timeout: download not completed within 1 hour")
123
123
 
124
124
  def close(self):
125
125
  self.driver.quit()
126
- print("🚪 Browser closed.")
126
+ print(">>> Browser closed.")
127
127
 
128
128
  def run(self):
129
129
  print('*' * 60)
@@ -132,7 +132,7 @@ class JianguoyunDownloaderChrome:
132
132
  self.click_download_button()
133
133
  self.wait_for_downloads()
134
134
  except Exception as e:
135
- print(" Error:", e)
135
+ print(">>> Error:", e)
136
136
  finally:
137
137
  self.close()
138
138
  print('*' * 60)
@@ -169,13 +169,13 @@ class JianguoyunDownloaderFirefox:
169
169
  self.driver = webdriver.Firefox(service=service, options=options)
170
170
 
171
171
  def open_page(self):
172
- print(f"🌐 Opening page: {self.url}")
172
+ print(f">>> Opening page: {self.url}")
173
173
  self.driver.get(self.url)
174
- print(f" Page loaded: {self.driver.title}")
174
+ print(f">>> Page loaded: {self.driver.title}")
175
175
 
176
176
  def click_download_button(self):
177
177
  """Find and click the 'Download' button (supports English and Chinese)."""
178
- print("🔍 Searching for the download button...")
178
+ print(">>> Searching for the download button...")
179
179
  wait = WebDriverWait(self.driver, 30)
180
180
 
181
181
  try:
@@ -193,21 +193,21 @@ class JianguoyunDownloaderFirefox:
193
193
 
194
194
  # Click using JavaScript to avoid overlay or interaction issues
195
195
  self.driver.execute_script("arguments[0].click();", button)
196
- print(f" Download button clicked. Files will be saved to: {self.download_path}")
196
+ print(f">>> Download button clicked. Files will be saved to: {self.download_path}")
197
197
 
198
198
  # If the cloud service opens a new tab, switch to it
199
199
  time.sleep(3)
200
200
  if len(self.driver.window_handles) > 1:
201
201
  self.driver.switch_to.window(self.driver.window_handles[-1])
202
- print("📂 Switched to the new download tab.")
202
+ print(">>> Switched to the new download tab.")
203
203
 
204
204
  except Exception as e:
205
- print(" Failed to find or click the download button:", e)
205
+ print(">>> Failed to find or click the download button:", e)
206
206
  raise
207
207
 
208
208
  def wait_for_download(self, timeout=3600):
209
209
  """Wait until all downloads are finished (auto-detects browser type)."""
210
- print(" Waiting for downloads to finish...")
210
+ print(">>> Waiting for downloads to finish...")
211
211
  start_time = time.time()
212
212
 
213
213
  # Determine the temporary file extension based on the browser type
@@ -216,13 +216,13 @@ class JianguoyunDownloaderFirefox:
216
216
  while time.time() - start_time < timeout:
217
217
  downloading = [f for f in os.listdir(self.download_path) if f.endswith(temp_ext)]
218
218
  if not downloading:
219
- print(" Download completed!")
219
+ print(">>> Download completed!")
220
220
  return True
221
221
  time.sleep(2)
222
222
 
223
223
 
224
224
  def close(self):
225
- print("🛑 Closing browser...")
225
+ print(">>> Closing browser...")
226
226
  self.driver.quit()
227
227
 
228
228
  def run(self):
@@ -232,17 +232,81 @@ class JianguoyunDownloaderFirefox:
232
232
  self.click_download_button()
233
233
  self.wait_for_download(timeout=3600)
234
234
  except Exception as e:
235
- print(" Error:", e)
235
+ print(">>> Error:", e)
236
236
  finally:
237
237
  self.close()
238
238
  print('*' * 60)
239
239
 
240
- def set_seed(seed=42):
241
- torch.manual_seed(seed)
242
- torch.cuda.manual_seed_all(seed)
243
- np.random.seed(seed)
244
- random.seed(seed)
245
- torch.backends.cudnn.deterministic = True
246
- torch.backends.cudnn.benchmark = False
240
+
241
+ def download_openml_data(data_name):
242
+ """
243
+ Returns
244
+ -------
245
+ X : ndarray, dataframe, or sparse matrix, shape (n_samples, n_columns)
246
+ Dataset
247
+ y : ndarray or pd.Series, shape (n_samples, ) or None
248
+ Target column
249
+ categorical_indicator : boolean ndarray
250
+ Mask that indicate categorical features.
251
+ attribute_names : List[str]
252
+ List of attribute names.
253
+ """
254
+ openml.config.set_root_cache_directory(f"./exp_data/{data_name}")
255
+ dataset = openml.datasets.get_dataset(f'{data_name}', download_data=True)
256
+ X, y, categorical_indicator, attribute_names = dataset.get_data(dataset_format="dataframe")
257
+
258
+ return X, y, categorical_indicator, attribute_names
259
+
260
+
261
+ def import_data_path_to_ignore():
262
+ def get_folder_size(folder_path):
263
+ """Recursively calculate the total size of a folder (in bytes)."""
264
+ total_size = 0
265
+ for root, dirs, files in os.walk(folder_path):
266
+ for f in files:
267
+ try:
268
+ total_size += os.path.getsize(os.path.join(root, f))
269
+ except Exception:
270
+ pass
271
+ return total_size
272
+
273
+ def list_and_ignore_large_folders(folder_path, limit_mb=99):
274
+ """List folder sizes and append large ones (> limit_mb) to .gitignore."""
275
+ gitignore_path = os.path.join(os.getcwd(), ".gitignore")
276
+ ignore_list = []
277
+
278
+ # Read existing .gitignore entries to avoid duplicates
279
+ if os.path.exists(gitignore_path):
280
+ with open(gitignore_path, "r", encoding="utf-8") as f:
281
+ existing_ignores = set(line.strip() for line in f if line.strip())
282
+ else:
283
+ existing_ignores = set()
284
+
285
+ for entry in os.scandir(folder_path):
286
+ if entry.is_dir():
287
+ folder_size_mb = get_folder_size(entry.path) / (1024 * 1024)
288
+ print(f"{entry.path}/ - {folder_size_mb:.2f} MB")
289
+
290
+ if folder_size_mb > limit_mb:
291
+ rel_path = os.path.relpath(entry.path, start=os.getcwd())
292
+ if rel_path not in existing_ignores:
293
+ ignore_list.append(rel_path)
294
+ elif entry.is_file():
295
+ file_size_mb = os.path.getsize(entry.path) / (1024 * 1024)
296
+ print(f"{entry.path} - {file_size_mb:.2f} MB")
297
+
298
+ # Append new paths to .gitignore
299
+ if ignore_list:
300
+ with open(gitignore_path, "a", encoding="utf-8") as f:
301
+ for p in ignore_list:
302
+ f.write(p + "\n")
303
+ print(f"\n✅ The following paths have been added to .gitignore:\n" + "\n".join(ignore_list))
304
+ else:
305
+ print("\nNo folders exceed the size limit (99 MB).")
306
+
307
+ folder_path = "./exp_data"
308
+ list_and_ignore_large_folders(folder_path, limit_mb=99)
309
+
310
+
247
311
 
248
312
 
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: junshan_kit
3
- Version: 2.4.7
3
+ Version: 2.4.9
4
4
  Summary: This is an optimization tool.
5
5
  Author-email: Junshan Yin <junshanyin@163.com>
6
6
  Requires-Dist: kaggle==1.7.4.5
7
7
  Requires-Dist: kagglehub==0.3.13
8
- Requires-Dist: pandas==2.3.3
8
+ Requires-Dist: openml==0.15.1
9
9
  Requires-Dist: selenium==4.36.0
@@ -0,0 +1,12 @@
1
+ junshan_kit/DataHub.py,sha256=D9G2vjCNvDLer4qoKdowgWJChLMQQn7cVhAPZLvRrbE,3332
2
+ junshan_kit/DataProcessor.py,sha256=-6qjG52NDYq746vBPpc0uW2cfbc4syqSWZIzTxJt6fE,11806
3
+ junshan_kit/DataSets.py,sha256=hwGnJsb-Lj90lk6VBwmsDBb3-IA_WgUWzAKayHyq2AI,13391
4
+ junshan_kit/ModelsHub.py,sha256=z9NyC4PTxo3wCxa2XxOfcjrw9NcDs0LCjBGCp6Z-90s,7084
5
+ junshan_kit/ParametersHub.py,sha256=zG3Dus5PZ57j0ZC-Xq4PGQSMt0MYzelhe-g2ZQMyL90,11244
6
+ junshan_kit/Print_Info.py,sha256=yiGc6Qlprj0ds6w2DP7ScAgTBZwswxXqxuIrQ3_liL8,3111
7
+ junshan_kit/TrainingHub.py,sha256=YDs6u__5RVxlY6f5jOILhAi-DRAe2FXkRtc7dvdrENY,2457
8
+ junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ junshan_kit/kit.py,sha256=F9f5qqn9ve-UVoYtXlFmNGl4YJ3eEy6T1yRrC0s-Wpw,12367
10
+ junshan_kit-2.4.9.dist-info/METADATA,sha256=msRObtJFPgLAjRSCFhpEkL4RS_Vh0n5fNC1xRp-0gYs,267
11
+ junshan_kit-2.4.9.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
12
+ junshan_kit-2.4.9.dist-info/RECORD,,
@@ -1,126 +0,0 @@
1
- import torch, os, time
2
- from torch.optim.optimizer import Optimizer
3
- from torch.nn.utils import parameters_to_vector, vector_to_parameters
4
- import junshan_kit.SPBM_func as SPBM_func
5
-
6
- class SPSmax(Optimizer):
7
- def __init__(self, params, model, hyperparams, Paras):
8
- defaults = dict()
9
- super().__init__(params, defaults)
10
- self.model = model
11
- self.c = hyperparams['c']
12
- self.gamma = hyperparams['gamma']
13
- if 'f_star' not in Paras or Paras['f_star'] is None:
14
- self.f_star = 0
15
- else:
16
- self.f_star = Paras['f_star']
17
- self.step_size = []
18
-
19
- def step(self, closure=None):
20
- if closure is None:
21
- raise RuntimeError("Closure required for SPSmax")
22
-
23
- # Reset the gradient and perform forward computation
24
- loss = closure()
25
-
26
- with torch.no_grad():
27
- xk = parameters_to_vector(self.model.parameters())
28
- # print(torch.norm(xk))
29
- g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
30
-
31
- # Step-size
32
- step_size = (loss - self.f_star) / ((self.c * torch.norm(g_k, p=2) ** 2) + 1e-8)
33
- step_size = min(step_size, self.gamma)
34
- self.step_size.append(step_size)
35
-
36
- # Update
37
- xk = xk - step_size * g_k
38
-
39
- # print(len(self.f_his))
40
- vector_to_parameters(xk, self.model.parameters())
41
-
42
- # emporarily return loss (tensor type)
43
- return loss
44
-
45
-
46
- class ALR_SMAG(Optimizer):
47
- def __init__(self, params, model, hyperparams, Paras):
48
- defaults = dict()
49
- super().__init__(params, defaults)
50
- self.model = model
51
- self.c = hyperparams['c']
52
- self.eta_max = hyperparams['eta_max']
53
- self.beta = hyperparams['beta']
54
- if 'f_star' not in Paras or Paras['f_star'] is None:
55
- self.f_star = 0
56
- else:
57
- self.f_star = Paras['f_star']
58
- self.step_size = []
59
- self.d_k = torch.zeros_like(parameters_to_vector(self.model.parameters()))
60
-
61
- def step(self, closure=None):
62
- if closure is None:
63
- raise RuntimeError("Closure required for SPSmax")
64
-
65
- # Reset the gradient and perform forward computation
66
- loss = closure()
67
-
68
- with torch.no_grad():
69
- xk = parameters_to_vector(self.model.parameters())
70
- # print(torch.norm(xk))
71
- g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
72
-
73
- self.d_k = self.beta * self.d_k + g_k
74
- # Step-size
75
- step_size = (loss - self.f_star) / ((self.c * torch.norm(self.d_k, p=2) ** 2) + 1e-8)
76
- step_size = min(step_size, self.eta_max)
77
- self.step_size.append(step_size)
78
-
79
- # Update
80
- xk = xk - step_size * g_k
81
-
82
- # print(len(self.f_his))
83
- vector_to_parameters(xk, self.model.parameters())
84
-
85
- # emporarily return loss (tensor type)
86
- return loss
87
-
88
- # ------------ Bundle Method --------------------
89
- class Bundle(Optimizer):
90
- def __init__(self, params, model, hyperparams, Paras):
91
- defaults = dict()
92
- super().__init__(params, defaults)
93
- self.model = model
94
- self.cutting_num = hyperparams['cutting_number']
95
- self.delta = hyperparams['delta']
96
- self.Paras = Paras
97
-
98
- self.x_his, self.g_his, self.f_his = [], [], []
99
-
100
- def step(self, closure=None):
101
- if closure is None:
102
- raise RuntimeError("Closure required for CuttingPlaneOptimizer")
103
-
104
- # Reset the gradient and perform forward computation
105
- loss = closure()
106
-
107
- with torch.no_grad():
108
- xk = parameters_to_vector(self.model.parameters())
109
- # print(torch.norm(xk))
110
- g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
111
-
112
- # Add cutting plane
113
- x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
114
-
115
- # the coefficient of dual problem
116
- Gk, rk, ek = SPBM_func.get_var(x_his, f_his, g_his, self.delta)
117
-
118
- # SOVER (dual)
119
- xk = SPBM_func.bundle(Gk, ek, xk, self.delta, self.Paras)
120
-
121
- # print(len(self.f_his))
122
- vector_to_parameters(xk, self.model.parameters())
123
-
124
- # loss(tensor)
125
- return loss
126
-