junshan-kit 2.7.3__py2.py3-none-any.whl → 2.8.5__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,9 +8,11 @@
8
8
  import pandas as pd
9
9
  import numpy as np
10
10
  import torch, bz2
11
+ from typing import Optional
11
12
  from torch.utils.data import random_split, Subset
12
13
  from sklearn.datasets import load_svmlight_file
13
14
  from sklearn.preprocessing import StandardScaler
15
+ from junshan_kit import ParametersHub
14
16
 
15
17
 
16
18
  class CSV_TO_Pandas:
@@ -386,7 +388,7 @@ def get_libsvm_bz2_data(train_path, test_path, data_name, Paras, split = True):
386
388
  transform = "-1 → 0 for binary, y-1 for multi-class"
387
389
  train_data = LibSVMDataset_bz2(train_path)
388
390
 
389
- if data_name in ["Duke", "Ijcnn"]:
391
+ if data_name in ["Duke", "Ijcnn", "RCV1"]:
390
392
  test_data = LibSVMDataset_bz2(test_path)
391
393
  split = False
392
394
  else:
@@ -410,3 +412,48 @@ def get_libsvm_bz2_data(train_path, test_path, data_name, Paras, split = True):
410
412
  # assert False
411
413
 
412
414
  return train_dataset, test_dataset, transform
415
+
416
+
417
+ def subset(dataset, ratio_or_num, seed=None) -> Subset:
418
+ """
419
+ Randomly sample a subset from a dataset.
420
+
421
+ Parameters
422
+ ----------
423
+ dataset : torch.utils.data.Dataset
424
+ The dataset to sample from.
425
+ ratio_or_num : float or int
426
+ If float in (0, 1], treated as sampling ratio.
427
+ Otherwise, treated as absolute number of samples.
428
+ seed : int, optional
429
+ Random seed for reproducibility.
430
+
431
+ Returns
432
+ -------
433
+ torch.utils.data.Subset
434
+ A randomly sampled subset of the dataset.
435
+ """
436
+
437
+ if ratio_or_num < 0:
438
+ raise ValueError(f"ratio_or_num must be non-negative, got {ratio_or_num}")
439
+
440
+ dataset_len = len(dataset)
441
+
442
+ # Determine number of samples
443
+ if isinstance(ratio_or_num, float) and 0 < ratio_or_num <= 1:
444
+ num = max(1, int(round(dataset_len * ratio_or_num)))
445
+ else:
446
+ num = int(ratio_or_num)
447
+
448
+ # Clamp to valid range
449
+ num = min(max(num, 1), dataset_len)
450
+
451
+ # Create and seed generator
452
+ generator = torch.Generator()
453
+ if seed is not None:
454
+ generator.manual_seed(seed)
455
+
456
+ # Random sampling
457
+ indices = torch.randperm(dataset_len, generator=generator)[:num].tolist()
458
+
459
+ return Subset(dataset, indices)
junshan_kit/FiguresHub.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  ----------------------------------------------------------------------
3
3
  >>> Author : Junshan Yin
4
- >>> Last Updated : 2025-11-14
4
+ >>> Last Updated : 2025-12-19
5
5
  ----------------------------------------------------------------------
6
6
  """
7
7
  import math, os
@@ -22,6 +22,8 @@ def marker_schedule(marker_schedule=None):
22
22
  "SPSmax": "4", # tri-right
23
23
  "SPBM-PF": "*", # star
24
24
  "SPBM-TR": "s", # star
25
+ "SPBM-PF-NoneCut": "s", # circle
26
+ "SPBM-TR-NoneCut": "s", # circle
25
27
  }
26
28
 
27
29
  else:
@@ -64,6 +66,8 @@ def colors_schedule(colors_schedule=None):
64
66
  "SPSmax": "#BA6262",
65
67
  "SPBM-PF": "#1f77b4",
66
68
  "SPBM-TR": "#d62728",
69
+ "SPBM-PF-NoneCut": "#8c564b",
70
+ "SPBM-TR-NoneCut": "#e377c2",
67
71
  }
68
72
 
69
73
  else:
@@ -112,7 +116,7 @@ def Search_Paras(Paras, args, model_name, data_name, optimizer_name, metric_key
112
116
  fig.delaxes(axes[i])
113
117
 
114
118
 
115
- plt.suptitle(f'{model_name} on {data_name} - {optimizer_name} (training/test samples: {Paras["train_data_num"]}/{Paras["test_data_num"]}), {Paras["device"]}', fontsize=16)
119
+ plt.suptitle(f'{model_name} on {data_name} - {optimizer_name}, (training, test) = ({Paras['train_data_num']}/{Paras['train_data_all_num']}, {Paras['test_data_num']}/{Paras['test_data_all_num']}), {Paras["device"]}, batch_size: {Paras["batch_size"]}', fontsize=16)
116
120
  plt.tight_layout(rect=(0, 0, 1, 0.9))
117
121
 
118
122
  filename = f'{Paras["Results_folder"]}/{metric_key}_{ParametersHub.model_abbr(model_name)}_{data_name}_{optimizer_name}.pdf'
@@ -220,19 +224,19 @@ def Mul_Plot(model_name, info_dict, Exp_name = "SPBM", cols = 3, save_path = Non
220
224
  )
221
225
 
222
226
  plt.tight_layout()
223
- if save_name is None:
224
- save_path = f'{model_name}.pdf'
227
+ if save_path is None:
228
+ save_path_ = f'{model_name}.pdf'
225
229
  else:
226
- os.makedirs(save_name, exist_ok=True)
227
- save_path = f'{save_name}/{save_name}.pdf'
228
- plt.savefig(save_path, bbox_inches="tight")
230
+ os.makedirs(save_path, exist_ok=True)
231
+ save_path_ = f'{save_path}/{save_name}.pdf'
232
+ plt.savefig(save_path_, bbox_inches="tight")
229
233
  if fig_show:
230
234
  plt.show()
231
235
  plt.close() # Colse the fig
232
236
 
233
237
 
234
238
 
235
- def Opt_Paras_Plot(model_name, info_dict, Exp_name = "SPBM", svae_path = None, save_name = None, fig_show = False):
239
+ def Opt_Paras_Plot(model_name, info_dict, Exp_name = "SPBM", save_path = None, save_name = None, fig_show = False):
236
240
 
237
241
  mpl.rcParams['font.family'] = 'Times New Roman'
238
242
  mpl.rcParams["mathtext.fontset"] = "stix"
@@ -271,16 +275,16 @@ def Opt_Paras_Plot(model_name, info_dict, Exp_name = "SPBM", svae_path = None, s
271
275
  plt.tight_layout() # Adjust layout so the legend fits
272
276
  plt.xlabel("epochs") # Or whatever your x-axis represents
273
277
  plt.ylabel(f'{ParametersHub.fig_ylabel(info_dict[data_name]["metric_key"])}')
274
- if save_name is None:
275
- save_path = f'{model_name}.pdf'
278
+ if save_path is None:
279
+ save_path_ = f'{model_name}.pdf'
276
280
  else:
277
- os.makedirs(save_name, exist_ok=True)
278
- save_path = f'{save_name}/{save_name}.pdf'
279
- plt.savefig(save_path, bbox_inches="tight")
280
-
281
+ os.makedirs(save_path, exist_ok=True)
282
+ save_path_ = f'{save_path}/{save_name}.pdf'
283
+ plt.savefig(save_path_, bbox_inches="tight")
281
284
  if fig_show:
282
285
  plt.show()
283
286
 
284
287
  plt.close()
285
288
 
286
289
 
290
+
@@ -226,7 +226,6 @@ class TR_NoneCut(Optimizer):
226
226
  super().__init__(params, defaults)
227
227
  self.model = model
228
228
  self.cutting_num = hyperparams['cutting_number']
229
- self.M = hyperparams['M']
230
229
  self.delta = hyperparams['delta']
231
230
  self.Paras = Paras
232
231
 
@@ -254,7 +253,9 @@ class TR_NoneCut(Optimizer):
254
253
  Gk, rk, ek = SPBM_func.get_var(x_his, f_his, g_his, self.delta)
255
254
 
256
255
  # SOVER (dual)
257
- xk = SPBM_func.subproblem_tr_NoneLower(Gk, ek, xk, rk, self.Paras)
256
+ # xk = SPBM_func.subproblem_tr_NoneLower(Gk, ek, xk, rk, self.Paras)
257
+
258
+ xk = SPBM_func.subproblem_tr_2(Gk, ek, xk, rk, self.Paras)
258
259
 
259
260
  # print(len(self.f_his))
260
261
  vector_to_parameters(xk, self.model.parameters())
@@ -312,7 +313,6 @@ class PF_NoneCut(Optimizer):
312
313
  super().__init__(params, defaults)
313
314
  self.model = model
314
315
  self.cutting_num = hyperparams['cutting_number']
315
- self.M = hyperparams['M']
316
316
  self.delta = hyperparams['delta']
317
317
  self.Paras = Paras
318
318
 
@@ -340,7 +340,9 @@ class PF_NoneCut(Optimizer):
340
340
  Gk, rk, ek = SPBM_func.get_var(x_his, f_his, g_his, self.delta)
341
341
 
342
342
  # SOVER (dual)
343
- xk = SPBM_func.subproblem_pf_NoneLower(Gk, ek, xk, self.delta, self.Paras)
343
+ # xk = SPBM_func.subproblem_pf_NoneLower(Gk, ek, xk, self.delta, self.Paras)
344
+
345
+ xk = SPBM_func.subproblem_pf(Gk, ek, xk, self.delta, self.Paras)
344
346
 
345
347
  # print(len(self.f_his))
346
348
  vector_to_parameters(xk, self.model.parameters())
@@ -59,6 +59,8 @@ class args:
59
59
  "AIP": "Adult_Income_Prediction",
60
60
  "CCFD": "Credit_Card_Fraud_Detection",
61
61
  "Ijcnn": "Ijcnn",
62
+ "RCV1": "RCV1",
63
+ "w8a": "w8a",
62
64
  "DHI":"Diabetes_Health_Indicators",
63
65
  "EVP": "Electric_Vehicle_Population",
64
66
  "GHP": "Global_House_Purchase",
@@ -166,6 +168,13 @@ class args:
166
168
  # required=True,
167
169
  help = "search_grid: 1: "
168
170
  )
171
+
172
+ parser.add_argument(
173
+ "--OptParas",
174
+ type=int,
175
+ nargs=1,
176
+ help="Number of optimization steps for parameter tuning (default: 1)"
177
+ )
169
178
  # <args_from_command>
170
179
 
171
180
  args = parser.parse_args()
@@ -177,16 +186,19 @@ class args:
177
186
  # <args>
178
187
 
179
188
  def UpdateOtherParas(args, OtherParas):
189
+ # <time_str>
180
190
  if args.time_str is not None:
181
191
  time_str = args.time_str[0]
182
192
  else:
183
193
  time_str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
184
194
 
195
+ # <user_search_grid>
185
196
  if args.user_search_grid is not None:
186
197
  OtherParas["user_search_grid"] = args.user_search_grid[0]
187
198
  else:
188
199
  OtherParas["user_search_grid"] = None
189
200
 
201
+ # <send_email>
190
202
  if args.send_email is not None:
191
203
  OtherParas["from_email"] = args.send_email[0]
192
204
  OtherParas["to_email"] = args.send_email[1]
@@ -195,6 +207,11 @@ def UpdateOtherParas(args, OtherParas):
195
207
  else:
196
208
  OtherParas["send_email"] = False
197
209
 
210
+ if args.OptParas is not None:
211
+ OtherParas["SeleParasOn"] = False
212
+ else:
213
+ OtherParas["SeleParasOn"] = True
214
+
198
215
  OtherParas["time_str"] = time_str
199
216
  OtherParas["results_folder_name"] = f'Results_{OtherParas["exp_name"]}'
200
217
 
@@ -428,7 +445,6 @@ def optimizer_paras_dict(Paras, OtherParas)->dict:
428
445
  # ----------- SPBM-TR-NoneCut -----------
429
446
  "SPBM-TR-NoneCut": {
430
447
  "params": {
431
- "M": [1e-5],
432
448
  "delta": (
433
449
  [2**i for i in range(-8, 9)]
434
450
  if OtherParas["SeleParasOn"]
@@ -453,7 +469,6 @@ def optimizer_paras_dict(Paras, OtherParas)->dict:
453
469
  # ----------- SPBM-PF-NoneCut -----------
454
470
  "SPBM-PF-NoneCut": {
455
471
  "params": {
456
- "M": [1e-5],
457
472
  "delta": (
458
473
  [2**i for i in range(-8, 9)]
459
474
  if OtherParas["SeleParasOn"]
@@ -592,6 +607,7 @@ def set_marker_point(epoch_num: int) -> list:
592
607
  6: [0, 2, 4, 6],
593
608
  8: [0, 2, 4, 6, 8],
594
609
  10: [0, 2, 4, 6, 8, 10],
610
+ 50: [0, 10, 20, 30, 40, 50],
595
611
  100: [0, 20, 40, 60, 80, 100],
596
612
  200: [0, 40, 80, 120, 160, 200],
597
613
  }
junshan_kit/Print_Info.py CHANGED
@@ -13,7 +13,7 @@ def training_info(args, data_name, optimizer_name, hyperparams, Paras, model_nam
13
13
  if Paras["use_color"]:
14
14
  print("\033[90m" + "-" * 115 + "\033[0m")
15
15
  print(
16
- f"\033[32m✅ \033[34mDataset:\033[32m {data_name}, \t\033[34mBatch-size:\033[32m {args.bs}, \t\033[34m(training, test) = \033[32m ({Paras['train_data_num']}, {Paras['test_data_num']}), \t\033[34m device:\033[32m {Paras['device']}"
16
+ f"\033[32m✅ \033[34mDataset:\033[32m {data_name}, \t\033[34mBatch-size:\033[32m {args.bs}, \t\033[34m(training, test) = \033[32m ({Paras['train_data_num']}/{Paras['train_data_all_num']}, {Paras['test_data_num']}/{Paras['test_data_all_num']}), \t\033[34m device:\033[32m {Paras['device']}"
17
17
  )
18
18
  print(
19
19
  f"\033[32m✅ \033[34mOptimizer:\033[32m {optimizer_name}, \t\033[34mParams:\033[32m {hyperparams}"
@@ -27,7 +27,7 @@ def training_info(args, data_name, optimizer_name, hyperparams, Paras, model_nam
27
27
  else:
28
28
  print("-" * 115)
29
29
  print(
30
- f"✅ Dataset: {data_name}, \tBatch-size: {args.bs}, \t(training, test) = ({Paras['train_data_num']}, {Paras['test_data_num']}), \tdevice: {Paras['device']}"
30
+ f"✅ Dataset: {data_name}, \tBatch-size: {args.bs}, \t(training, test) = ({Paras['train_data_num']}/{Paras['train_data_all_num']}, {Paras['test_data_num']}/{Paras['test_data_all_num']}), \tdevice: {Paras['device']}"
31
31
  )
32
32
  print(f"✅ Optimizer: {optimizer_name}, \tParams: {hyperparams}")
33
33
  print(
@@ -90,8 +90,8 @@ def load_data(model_name, data_name, Paras):
90
90
  assert False
91
91
 
92
92
  # Computing the number of data
93
- Paras["train_data_num"] = len(train_dataset)
94
- Paras["test_data_num"] = len(test_dataset)
93
+ Paras["train_data_num"], Paras["test_data_num"] = len(train_dataset), len(test_dataset)
94
+ Paras["train_data_all_num"], Paras["test_data_all_num"] = len(train_dataset), len(test_dataset)
95
95
 
96
96
  return train_dataset, test_dataset, Paras
97
97
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: junshan_kit
3
- Version: 2.7.3
3
+ Version: 2.8.5
4
4
  Summary: This is an optimization tool.
5
5
  Author-email: Junshan Yin <junshanyin@163.com>
6
6
  Requires-Dist: cvxpy==1.6.5
@@ -1,20 +1,20 @@
1
1
  junshan_kit/BenchmarkFunctions.py,sha256=tXgZGg-CjTNz78nMyVEQflVFIJDgmmePytXjY_RT9BM,120
2
2
  junshan_kit/Check_Info.py,sha256=Z6Ls2S7Fl4h8S9s0NB8jP_YpSLZInvQAeyjIXzq5Bpc,1872
3
3
  junshan_kit/DataHub.py,sha256=6RCNr8dBTqK-8ey4m-baMU1qOsJP6swOFkaraGdk0fM,6801
4
- junshan_kit/DataProcessor.py,sha256=W2bzugcYnwQC403GdvSmGDBhfz8X1KxJBkOAVg1vHHk,14385
4
+ junshan_kit/DataProcessor.py,sha256=Uc9ixhnVmGf5PoGIe3vvhobH_ADtDAosG9MTjnB1KDQ,15677
5
5
  junshan_kit/DataSets.py,sha256=DcpwWRm1_B29hIDjOhvaeKAYYeBknEW2QqsS_qm8Hxs,13367
6
6
  junshan_kit/Evaluate_Metrics.py,sha256=PQBGU8fETIvDon1VMdouZ1dhG2n7XHYGbzs2EQUA9FM,3392
7
- junshan_kit/FiguresHub.py,sha256=116cvRUGUcBqIAs0_xiRzZCzgnPaqmgI5kvNu6cAd_Q,10181
7
+ junshan_kit/FiguresHub.py,sha256=TVbo9ioEECrH_iJjpt0HgkCoiAdFEcTdtiUtzDNYrJY,10455
8
8
  junshan_kit/ModelsHub.py,sha256=xM6cwLecq9vukrt1c9l7l9dy7mQn3yq0ZwrRg5f_CfM,7995
9
- junshan_kit/ParametersHub.py,sha256=RSgsSlH0bgehn27lleKfboT1MuLAyIMxZ5FWC-ANbhA,19822
10
- junshan_kit/Print_Info.py,sha256=uBLpeynOYSZTN8LbJupSH1SuLZ-7cMU3Yp3IlVJWB1s,4772
11
- junshan_kit/TrainingHub.py,sha256=WV3cUz4JsEdGTpbTqgnU3WmlKeob8RAOuL993EsADj0,11469
9
+ junshan_kit/ParametersHub.py,sha256=_LvkdV95vKSU4h2LtF7W63EaF5mRBg85ZHf0ymb28tA,20248
10
+ junshan_kit/Print_Info.py,sha256=7pfd_mGEuQdQGyz6kcSSvjVRCrPgi5RafQgi7ZSS9VU,4890
11
+ junshan_kit/TrainingHub.py,sha256=unoI8zzm0oekUxz-3retHCFhxwx6j8e6Tp9VQDywTPg,11565
12
12
  junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  junshan_kit/kit.py,sha256=tQGoJJQZW9BeadX2cuwhvOxX2riHBZG0iFExelS4MIY,11487
14
14
  junshan_kit/OptimizerHup/OptimizerFactory.py,sha256=x1_cE5ZSkKffdY0uCIirocBNj2X-u_R-V5jNawJ1EfA,4607
15
- junshan_kit/OptimizerHup/SPBM.py,sha256=2Yg8Fmc8OkYOrjevD8eAGfI-m-fefoOldybtlp4ZEEs,13730
15
+ junshan_kit/OptimizerHup/SPBM.py,sha256=h449QddeN0MvUIQeKcNxFsdxdBuhN354sGc_sN2LZR8,13816
16
16
  junshan_kit/OptimizerHup/SPBM_func.py,sha256=5Fz6eHYIVGMoR_CBDA_Xk_1dnPRq3K16DUNoNaWQ2Ag,17301
17
17
  junshan_kit/OptimizerHup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- junshan_kit-2.7.3.dist-info/METADATA,sha256=_mIq2RLQUdQECGnjRK4qABiPr41BGiK-aCkk4EQVKik,455
19
- junshan_kit-2.7.3.dist-info/WHEEL,sha256=aha0VrrYvgDJ3Xxl3db_g_MDIW-ZexDdrc_m-Hk8YY4,105
20
- junshan_kit-2.7.3.dist-info/RECORD,,
18
+ junshan_kit-2.8.5.dist-info/METADATA,sha256=vxUXFvPqaGc2liYh0A3B4CmlbdlrK1CVdRNx0i7RoRA,455
19
+ junshan_kit-2.8.5.dist-info/WHEEL,sha256=aha0VrrYvgDJ3Xxl3db_g_MDIW-ZexDdrc_m-Hk8YY4,105
20
+ junshan_kit-2.8.5.dist-info/RECORD,,