junshan-kit 2.3.9__py2.py3-none-any.whl → 2.4.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
junshan_kit/datahub.py ADDED
@@ -0,0 +1,281 @@
1
+ import torchvision,torch, random
2
+ import numpy as np
3
+ import torchvision.transforms as transforms
4
+ from torch.utils.data import random_split, Subset
5
+ from sklearn.datasets import load_svmlight_file
6
+ from torch.utils.data import Dataset
7
+ from torch.utils.data import TensorDataset
8
+ from torch.utils.data import DataLoader
9
+ import bz2
10
+
11
+
12
+ class LibSVMDataset_bz2(Dataset):
13
+ def __init__(self, path, data_name = None, Paras = None):
14
+ with bz2.open(path, 'rb') as f:
15
+ X, y = load_svmlight_file(f) # type: ignore
16
+
17
+ self.X, self.path = X, path
18
+
19
+ y = np.asanyarray(y)
20
+
21
+ if data_name is not None:
22
+ data_name = data_name.lower()
23
+
24
+ # Binary classification, with the label -1/1
25
+ if data_name in ["rcv1"]:
26
+ y = (y > 0).astype(int) # Convert to 0/1
27
+
28
+ # Multi-category, labels usually start with 1
29
+ elif data_name in [""]:
30
+ y = y - 1 # Start with 0
31
+
32
+ else:
33
+ # Default policy: Try to avoid CrossEntropyLoss errors
34
+ if np.min(y) < 0: # e.g. [-1, 1]
35
+ y = (y > 0).astype(int)
36
+ elif np.min(y) >= 1:
37
+ y = y - 1
38
+
39
+ self.y = y
40
+
41
+ def __len__(self):
42
+ return self.X.shape[0]
43
+
44
+ def __getitem__(self, idx):
45
+ xi = torch.tensor(self.X.getrow(idx).toarray(), dtype=torch.float32).squeeze(0)
46
+ yi = torch.tensor(self.y[idx], dtype=torch.float32)
47
+ return xi, yi
48
+
49
+ def __repr__(self):
50
+ num_samples = len(self.y)
51
+ num_features = self.X.shape[1]
52
+ num_classes = len(np.unique(self.y))
53
+ return (f"LibSVMDataset_bz2(\n"
54
+ f" num_samples = {num_samples},\n"
55
+ f" num_features = {num_features},\n"
56
+ f" num_classes = {num_classes}\n"
57
+ f" path = {self.path}\n"
58
+ f")")
59
+
60
+
61
+ def get_libsvm_bz2_data(train_path, test_path, data_name, Paras, split = True):
62
+
63
+ transform = "-1 → 0 for binary, y-1 for multi-class"
64
+ train_data = LibSVMDataset_bz2(train_path)
65
+
66
+ if data_name in ["Duke", "Ijcnn"]:
67
+ test_data = LibSVMDataset_bz2(test_path)
68
+ split = False
69
+ else:
70
+ test_data = Subset(train_data, [])
71
+
72
+
73
+ if split:
74
+ total_size = len(train_data)
75
+ train_size = int(Paras["train_ratio"] * total_size)
76
+ test_size = total_size - train_size
77
+
78
+ train_dataset, test_dataset = random_split(train_data, [train_size, test_size])
79
+
80
+ else:
81
+ train_dataset = train_data
82
+ # # Empty test dataset, keep the structure consistent
83
+ # test_dataset = Subset(train_data, [])
84
+ test_dataset = test_data
85
+
86
+ # print(test_dataset)
87
+ # assert False
88
+
89
+ return train_dataset, test_dataset, transform
90
+
91
+
92
+ # one——hot
93
+ class OneHot(Dataset):
94
+ def __init__(self, subset, num_classes):
95
+ self.subset = subset
96
+ self.num_classes = num_classes
97
+
98
+ def __len__(self):
99
+ return len(self.subset)
100
+
101
+ def __getitem__(self, idx):
102
+ x, y = self.subset[idx]
103
+ y_onehot = torch.nn.functional.one_hot(torch.tensor(y), num_classes=self.num_classes).float()
104
+ return x, y_onehot
105
+ # one_hot
106
+
107
+ # <LibSVMDataset>
108
+ class LibSVMDataset(Dataset):
109
+ def __init__(self, data_path, data_name=None):
110
+ X_sparse, y = load_svmlight_file(data_path) # type: ignore
111
+ self.X = torch.from_numpy(X_sparse.toarray()).float() # type: ignore
112
+
113
+ # Automatically process labels
114
+ y = np.asarray(y)
115
+
116
+ if data_name is not None:
117
+ data_name = data_name.lower()
118
+
119
+ # Binary classification, with the label -1/1
120
+ if data_name in ["a9a", "w8a", "ijcnn1"]:
121
+ y = (y > 0).astype(int) # Convert to 0/1
122
+
123
+ # Multi-category, labels usually start with 1
124
+ elif data_name in ["letter", "shuttle"]:
125
+ y = y - 1 # Start with 0
126
+
127
+ else:
128
+ # Default policy: Try to avoid CrossEntropyLoss errors
129
+ if np.min(y) < 0: # e.g. [-1, 1]
130
+ y = (y > 0).astype(int)
131
+ elif np.min(y) >= 1:
132
+ y = y - 1
133
+
134
+ self.y = torch.from_numpy(y).long()
135
+
136
+ def __len__(self):
137
+ return len(self.y)
138
+
139
+ def __getitem__(self, idx):
140
+ return self.X[idx], self.y[idx]
141
+
142
+ # <LibSVMDataset>
143
+
144
+ # <get_libsvm_data>
145
+ def _load_libsvm_dataset(train_path, test_path, data_name):
146
+ train_dataset = LibSVMDataset(train_path, data_name)
147
+ test_dataset = LibSVMDataset(test_path, data_name)
148
+ # libSVM typically features numerical characteristics and does not require image transformation
149
+ transform = None
150
+
151
+ return train_dataset, test_dataset, transform
152
+ # <get_libsvm_data>
153
+ # <ToTensor>
154
+ def get_libsvm_data(train_path, test_path, data_name):
155
+ # laod data
156
+ train_dataset, test_dataset, transform = _load_libsvm_dataset(train_path, test_path, data_name)
157
+ train_data = TensorDataset(train_dataset.X, train_dataset.y)
158
+ test_data = TensorDataset(test_dataset.X, test_dataset.y)
159
+
160
+ return train_data, test_data, transform
161
+ # <ToTensor>
162
+
163
+ # <mnist>
164
+ def MNIST(Paras, model_name):
165
+ """
166
+ Load the MNIST dataset and return both the training and test sets,
167
+ along with the transformation applied (ToTensor).
168
+ """
169
+ transform = torchvision.transforms.ToTensor()
170
+
171
+ train_dataset = torchvision.datasets.MNIST(
172
+ root='./exp_data/MNIST',
173
+ train=True,
174
+ download=True,
175
+ transform=transform
176
+ )
177
+
178
+ test_dataset = torchvision.datasets.MNIST(
179
+ root='./exp_data/MNIST',
180
+ train=False,
181
+ download=True,
182
+ transform=transform
183
+ )
184
+ # <binary_condition>
185
+ if Paras["model_type"][model_name] == "binary":
186
+ # <binary_condition>
187
+ train_mask = (train_dataset.targets == 0) | (train_dataset.targets == 1)
188
+ test_mask = (test_dataset.targets == 0) | (test_dataset.targets == 1)
189
+
190
+ train_indices = torch.nonzero(train_mask, as_tuple=True)[0]
191
+ test_indices = torch.nonzero(test_mask, as_tuple=True)[0]
192
+
193
+ train_dataset = torch.utils.data.Subset(train_dataset, train_indices.tolist())
194
+ test_dataset = torch.utils.data.Subset(test_dataset, test_indices.tolist())
195
+
196
+ return train_dataset, test_dataset, transform
197
+ # <mnist>
198
+
199
+ # <cifar100>
200
+ def CIFAR100(Paras, model_name):
201
+ """
202
+ Load the CIFAR-100 dataset with standard normalization and return both
203
+ the training and test sets, along with the transformation applied.
204
+ """
205
+ transform = transforms.Compose([
206
+ transforms.ToTensor(),
207
+ transforms.Normalize(mean=[0.5071, 0.4867, 0.4408],
208
+ std=[0.2675, 0.2565, 0.2761])
209
+ ])
210
+
211
+ train_dataset = torchvision.datasets.CIFAR100(
212
+ root='./exp_data/CIFAR100',
213
+ train=True,
214
+ download=True,
215
+ transform=transform
216
+ )
217
+
218
+ test_dataset = torchvision.datasets.CIFAR100(
219
+ root='./exp_data/CIFAR100',
220
+ train=False,
221
+ download=True,
222
+ transform=transform
223
+ )
224
+ if Paras["model_type"][model_name] == "binary":
225
+ train_mask = (torch.tensor(train_dataset.targets) == 0) | (torch.tensor(train_dataset.targets) == 1)
226
+ test_mask = (torch.tensor(test_dataset.targets) == 0) | (torch.tensor(test_dataset.targets) == 1)
227
+
228
+ train_indices = torch.nonzero(train_mask, as_tuple=True)[0]
229
+ test_indices = torch.nonzero(test_mask, as_tuple=True)[0]
230
+
231
+ train_dataset = torch.utils.data.Subset(train_dataset, train_indices.tolist())
232
+ test_dataset = torch.utils.data.Subset(test_dataset, test_indices.tolist())
233
+
234
+ return train_dataset, test_dataset, transform
235
+ # <cifar100>
236
+
237
+
238
+ # <caltech101_Resize_32>
239
+ def convert_to_rgb(img):
240
+ return img.convert("RGB") # Explicitly define to avoid lambda
241
+
242
+ def caltech101_Resize_32(seed, train_ratio=0.7, split=True):
243
+ def set_seed(seed=42):
244
+ torch.manual_seed(seed)
245
+ torch.cuda.manual_seed_all(seed)
246
+ np.random.seed(seed)
247
+ random.seed(seed)
248
+ torch.backends.cudnn.deterministic = True
249
+ torch.backends.cudnn.benchmark = False
250
+
251
+ set_seed(seed)
252
+ transform = transforms.Compose([
253
+ # transforms.Lambda(convert_to_rgb),
254
+ transforms.Grayscale(num_output_channels=3),
255
+ transforms.Resize((32, 32)),
256
+ transforms.ToTensor(),
257
+ transforms.Normalize(mean=[0.485, 0.456, 0.406],
258
+ std=[0.229, 0.224, 0.225])
259
+ ])
260
+
261
+ full_dataset = torchvision.datasets.Caltech101(
262
+ root='./exp_data/Caltech101',
263
+ download=True,
264
+ transform=transform
265
+ )
266
+
267
+ if split:
268
+ total_size = len(full_dataset)
269
+ train_size = int(train_ratio * total_size)
270
+ test_size = total_size - train_size
271
+
272
+ train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])
273
+
274
+ else:
275
+ train_dataset = full_dataset
276
+ # Empty test dataset, keep the structure consistent
277
+ test_dataset = Subset(full_dataset, [])
278
+
279
+ return train_dataset, test_dataset, transform
280
+
281
+ # <caltech101_Resize_32>
junshan_kit/kit.py CHANGED
@@ -237,6 +237,12 @@ class JianguoyunDownloaderFirefox:
237
237
  self.close()
238
238
  print('*' * 60)
239
239
 
240
-
240
+ def set_seed(seed=42):
241
+ torch.manual_seed(seed)
242
+ torch.cuda.manual_seed_all(seed)
243
+ np.random.seed(seed)
244
+ random.seed(seed)
245
+ torch.backends.cudnn.deterministic = True
246
+ torch.backends.cudnn.benchmark = False
241
247
 
242
248
 
@@ -1,10 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: junshan_kit
3
- Version: 2.3.9
3
+ Version: 2.4.0
4
4
  Summary: This is an optimization tool.
5
5
  Author-email: Junshan Yin <junshanyin@163.com>
6
6
  Requires-Dist: kaggle==1.7.4.5
7
7
  Requires-Dist: kagglehub==0.3.13
8
8
  Requires-Dist: pandas==2.3.3
9
- Requires-Dist: scikit-learn==1.7.1
10
9
  Requires-Dist: selenium==4.36.0
@@ -0,0 +1,14 @@
1
+ junshan_kit/DataProcessor.py,sha256=MOKMkq4OE32VyLkgUD-D2J5dORmUDLfylAir0UiI04E,8665
2
+ junshan_kit/DataSets.py,sha256=8_-2vgwYgXPZnNIt-4WYBu7tJpL8E9W3bcMT4jAc-KI,13831
3
+ junshan_kit/ExperimentHub.py,sha256=h4mCCDi5HzGjNWqq7K2XUV1odUEoWw8IORsoIwT-8bA,6526
4
+ junshan_kit/Models.py,sha256=GRTunJON1vLQz2IxgsoOKvjP-3zSJJLuB3CkJTAiImo,6884
5
+ junshan_kit/Optimizers.py,sha256=w-2PP5u8w4WYLe8tHDtFIHcTRVOKC5tHbJ5Tm1ydsYM,3069
6
+ junshan_kit/Print_Info.py,sha256=sF_-deHfaZ2N2nKVII2hkvNAdJynSdNuBYu0qn7pxG8,3122
7
+ junshan_kit/TrainingParas.py,sha256=hBiBxHkmollRA4jT93npIcdWdsLLD0laV5kCZqnyg5k,17324
8
+ junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ junshan_kit/check_args.py,sha256=MmgDAPWy9Btjs1EBHtqBu0W0G7nlg7TF2rTcdJ81YJs,3590
10
+ junshan_kit/datahub.py,sha256=4c3P2TORMZ4va6NrSiojDCpnY_CGDlJV-5PG3u1_Isk,9081
11
+ junshan_kit/kit.py,sha256=hpA4Zpn1VAuhdJSBBXswVum0CSk6QnB05GGLYoaRatQ,9792
12
+ junshan_kit-2.4.0.dist-info/METADATA,sha256=IUY3bnm8kFO294XlZmZ4JzA2A7iOzm9dOms2Sg7u8b4,266
13
+ junshan_kit-2.4.0.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
14
+ junshan_kit-2.4.0.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- junshan_kit/DataProcessor.py,sha256=MOKMkq4OE32VyLkgUD-D2J5dORmUDLfylAir0UiI04E,8665
2
- junshan_kit/DataSets.py,sha256=EgDPN7Sm6MLSwxBpJE_A5TN-6eVsjGLjFoZdgg-BnZ8,13819
3
- junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- junshan_kit/kit.py,sha256=tB1TpW9hW1EweK1RQwHOdUo7uG1QU4vSeyR0fdaSydo,9569
5
- junshan_kit-2.3.9.dist-info/METADATA,sha256=5bor_S4tl0sn3EwNiv-fsrnM4W0zrockIIO3xJ4sBrU,301
6
- junshan_kit-2.3.9.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
7
- junshan_kit-2.3.9.dist-info/RECORD,,