pyerualjetwork 4.3.8.dev14__py3-none-any.whl → 4.3.9b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyerualjetwork/__init__.py +1 -1
- pyerualjetwork/activation_functions.py +2 -2
- pyerualjetwork/activation_functions_cuda.py +63 -114
- pyerualjetwork/data_operations_cuda.py +1 -1
- pyerualjetwork/model_operations.py +14 -14
- pyerualjetwork/model_operations_cuda.py +16 -17
- pyerualjetwork/plan.py +87 -268
- pyerualjetwork/plan_cuda.py +82 -276
- pyerualjetwork/planeat.py +12 -44
- pyerualjetwork/planeat_cuda.py +9 -45
- pyerualjetwork/visualizations.py +29 -26
- pyerualjetwork/visualizations_cuda.py +19 -20
- {pyerualjetwork-4.3.8.dev14.dist-info → pyerualjetwork-4.3.9b0.dist-info}/METADATA +2 -19
- pyerualjetwork-4.3.9b0.dist-info/RECORD +24 -0
- pyerualjetwork-4.3.9b0.dist-info/top_level.txt +1 -0
- pyerualjetwork-4.3.8.dev14.dist-info/RECORD +0 -44
- pyerualjetwork-4.3.8.dev14.dist-info/top_level.txt +0 -2
- pyerualjetwork_afterburner/__init__.py +0 -11
- pyerualjetwork_afterburner/activation_functions.py +0 -290
- pyerualjetwork_afterburner/activation_functions_cuda.py +0 -289
- pyerualjetwork_afterburner/data_operations.py +0 -406
- pyerualjetwork_afterburner/data_operations_cuda.py +0 -461
- pyerualjetwork_afterburner/help.py +0 -17
- pyerualjetwork_afterburner/loss_functions.py +0 -21
- pyerualjetwork_afterburner/loss_functions_cuda.py +0 -21
- pyerualjetwork_afterburner/memory_operations.py +0 -298
- pyerualjetwork_afterburner/metrics.py +0 -190
- pyerualjetwork_afterburner/metrics_cuda.py +0 -163
- pyerualjetwork_afterburner/model_operations.py +0 -408
- pyerualjetwork_afterburner/model_operations_cuda.py +0 -420
- pyerualjetwork_afterburner/plan.py +0 -432
- pyerualjetwork_afterburner/plan_cuda.py +0 -441
- pyerualjetwork_afterburner/planeat.py +0 -793
- pyerualjetwork_afterburner/planeat_cuda.py +0 -840
- pyerualjetwork_afterburner/ui.py +0 -22
- pyerualjetwork_afterburner/visualizations.py +0 -823
- pyerualjetwork_afterburner/visualizations_cuda.py +0 -825
- {pyerualjetwork-4.3.8.dev14.dist-info → pyerualjetwork-4.3.9b0.dist-info}/WHEEL +0 -0
@@ -1,298 +0,0 @@
|
|
1
|
-
import psutil
|
2
|
-
import numpy as np
|
3
|
-
import cupy as cp
|
4
|
-
import logging
|
5
|
-
|
6
|
-
def get_available_cpu_memory():
|
7
|
-
"""
|
8
|
-
The function `get_available_memory` returns the amount of available memory in the system using the
|
9
|
-
`psutil` library.
|
10
|
-
:return: The function `get_available_memory()` returns the amount of available memory in bytes on
|
11
|
-
the system.
|
12
|
-
"""
|
13
|
-
return psutil.virtual_memory().available
|
14
|
-
|
15
|
-
def get_optimal_batch_size_for_cpu(x, data_size_bytes, available_memory):
|
16
|
-
"""
|
17
|
-
The function calculates the optimal batch size for a given data size and available memory based on
|
18
|
-
the size of each element.
|
19
|
-
|
20
|
-
:param x: `x` is a NumPy array representing the input data for which we want to determine the optimal batch size for processing on the CPU
|
21
|
-
|
22
|
-
:param data_size_bytes: The `data_size_bytes` parameter represents the size of the data in bytes that you want to process in batches
|
23
|
-
|
24
|
-
:param available_memory: The `available_memory` parameter represents the total memory available on the CPU in bytes. This function calculates the optimal batch size for processing data based on the provided parameters. Let me know if you need any further assistance or explanation!
|
25
|
-
|
26
|
-
:return: the optimal batch size for a given array `x` based on the available memory and the size of each element in bytes.
|
27
|
-
"""
|
28
|
-
safe_memory = available_memory * 0.25
|
29
|
-
element_size = data_size_bytes / x.size
|
30
|
-
return int(safe_memory / (element_size * 2))
|
31
|
-
|
32
|
-
def transfer_to_cpu(x, dtype=np.float32):
|
33
|
-
"""
|
34
|
-
The `transfer_to_cpu` function converts data to a specified data type on the CPU, handling memory constraints
|
35
|
-
by batching the conversion process and ensuring complete GPU memory cleanup.
|
36
|
-
|
37
|
-
param x: Input data to transfer to CPU (CuPy array)
|
38
|
-
|
39
|
-
param dtype: Target NumPy dtype for the output array (default: np.float32)
|
40
|
-
|
41
|
-
return: NumPy array with the specified dtype
|
42
|
-
"""
|
43
|
-
from .ui import loading_bars, initialize_loading_bar
|
44
|
-
try:
|
45
|
-
if isinstance(x, np.ndarray):
|
46
|
-
return x.astype(dtype) if x.dtype != dtype else x
|
47
|
-
|
48
|
-
x = x.astype(dtype=dtype, copy=False)
|
49
|
-
|
50
|
-
data_size = x.nbytes
|
51
|
-
available_memory = get_available_cpu_memory()
|
52
|
-
logging.debug(f"Data size: {data_size/1e6:.2f}MB, Available memory: {available_memory/1e6:.2f}MB")
|
53
|
-
|
54
|
-
pool = cp.get_default_memory_pool()
|
55
|
-
pinned_mempool = cp.cuda.PinnedMemoryPool()
|
56
|
-
|
57
|
-
if data_size <= available_memory * 0.25:
|
58
|
-
try:
|
59
|
-
final_result = np.array(x.get(), dtype=dtype, copy=False)
|
60
|
-
finally:
|
61
|
-
del x
|
62
|
-
pool.free_all_blocks()
|
63
|
-
pinned_mempool.free_all_blocks()
|
64
|
-
cp.cuda.runtime.deviceSynchronize()
|
65
|
-
return final_result
|
66
|
-
|
67
|
-
batch_size = max(get_optimal_batch_size_for_cpu(x, data_size, available_memory), 1)
|
68
|
-
total_batches = (len(x) + batch_size - 1) // batch_size
|
69
|
-
loading_bar = initialize_loading_bar(
|
70
|
-
total=total_batches,
|
71
|
-
desc='Transfering to CPU mem',
|
72
|
-
ncols=70,
|
73
|
-
bar_format=loading_bars()[0],
|
74
|
-
leave=False
|
75
|
-
)
|
76
|
-
logging.debug(f"Using batch size: {batch_size}")
|
77
|
-
|
78
|
-
try:
|
79
|
-
sample_chunk = x[0:1]
|
80
|
-
sample_array = np.array(sample_chunk.get(), dtype=dtype)
|
81
|
-
chunk_shape = sample_array.shape[1:] if len(sample_array.shape) > 1 else ()
|
82
|
-
total_shape = (len(x),) + chunk_shape
|
83
|
-
finally:
|
84
|
-
del sample_array
|
85
|
-
del sample_chunk
|
86
|
-
pool.free_all_blocks()
|
87
|
-
pinned_mempool.free_all_blocks()
|
88
|
-
|
89
|
-
chunks = np.empty(total_shape, dtype=dtype)
|
90
|
-
|
91
|
-
try:
|
92
|
-
for i in range(0, len(x), batch_size):
|
93
|
-
try:
|
94
|
-
end_idx = min(i + batch_size, len(x))
|
95
|
-
chunk = x[i:end_idx]
|
96
|
-
chunks[i:end_idx] = chunk.get().astype(dtype=dtype)
|
97
|
-
finally:
|
98
|
-
del chunk
|
99
|
-
pool.free_all_blocks()
|
100
|
-
pinned_mempool.free_all_blocks()
|
101
|
-
cp.cuda.runtime.deviceSynchronize()
|
102
|
-
|
103
|
-
loading_bar.update(1)
|
104
|
-
finally:
|
105
|
-
del x
|
106
|
-
pool.free_all_blocks()
|
107
|
-
pinned_mempool.free_all_blocks()
|
108
|
-
cp.cuda.runtime.deviceSynchronize()
|
109
|
-
|
110
|
-
return chunks
|
111
|
-
|
112
|
-
except Exception as e:
|
113
|
-
logging.error(f"Error in transfer_to_cpu: {str(e)}")
|
114
|
-
if 'x' in locals():
|
115
|
-
del x
|
116
|
-
if 'pool' in locals():
|
117
|
-
pool.free_all_blocks()
|
118
|
-
if 'pinned_mempool' in locals():
|
119
|
-
pinned_mempool.free_all_blocks()
|
120
|
-
cp.cuda.runtime.deviceSynchronize()
|
121
|
-
raise
|
122
|
-
|
123
|
-
def get_optimal_batch_size_for_gpu(x, data_size_bytes):
|
124
|
-
"""
|
125
|
-
The function calculates the optimal batch size for a GPU based on available memory and data size.
|
126
|
-
|
127
|
-
:param x: A list or array containing the data elements that will be processed on the GPU
|
128
|
-
:param data_size_bytes: The `data_size_bytes` parameter represents the total size of the data in
|
129
|
-
bytes that you want to process on the GPU. This could be the size of a single batch of data or the
|
130
|
-
total size of the dataset, depending on how you are structuring your computations
|
131
|
-
:return: the optimal batch size that can be used for processing the given data on the GPU, based on
|
132
|
-
the available free memory on the GPU and the size of the data elements.
|
133
|
-
"""
|
134
|
-
free_memory = cp.get_default_memory_pool().free_bytes()
|
135
|
-
device_memory = cp.cuda.runtime.memGetInfo()[0]
|
136
|
-
safe_memory = min(free_memory, device_memory) * 0.25
|
137
|
-
|
138
|
-
element_size = data_size_bytes / len(x)
|
139
|
-
return int(safe_memory / (element_size * 2))
|
140
|
-
|
141
|
-
|
142
|
-
def transfer_to_gpu(x, dtype=cp.float32):
|
143
|
-
"""
|
144
|
-
The `transfer_to_gpu` function in Python converts input data to GPU arrays, optimizing memory usage by
|
145
|
-
batching and handling out-of-memory errors.
|
146
|
-
|
147
|
-
:param x: The `x` parameter in the `transfer_to_gpu` function is the input data that you want to transfer to the GPU for processing. It can be either a NumPy array or a CuPy array. If it's a NumPy array, the function will convert it to a CuPy array and
|
148
|
-
|
149
|
-
:param dtype: The `dtype` parameter in the `transfer_to_gpu` function specifies the data type to which the input array `x` should be converted when moving it to the GPU. By default, it is set to `cp.float32`, which is a 32-bit floating-point data type provided by the CuPy
|
150
|
-
|
151
|
-
:return: The `transfer_to_gpu` function returns the input data `x` converted to a GPU array of type `dtype` (default is `cp.float32`). If the input `x` is already a GPU array with the same dtype, it returns `x` as is. If the data size of `x` exceeds 25% of the free GPU memory, it processes the data in batches to
|
152
|
-
"""
|
153
|
-
from .ui import loading_bars, initialize_loading_bar
|
154
|
-
try:
|
155
|
-
if isinstance(x, cp.ndarray):
|
156
|
-
return x.astype(dtype) if x.dtype != dtype else x
|
157
|
-
|
158
|
-
x = x.astype(dtype=dtype, copy=False)
|
159
|
-
data_size = x.nbytes
|
160
|
-
pinned_mempool = cp.cuda.PinnedMemoryPool()
|
161
|
-
free_gpu_memory = cp.cuda.runtime.memGetInfo()[0]
|
162
|
-
logging.debug(f"Data size: {data_size/1e6:.2f}MB, Free GPU memory: {free_gpu_memory/1e6:.2f}MB")
|
163
|
-
|
164
|
-
if data_size <= free_gpu_memory * 0.25:
|
165
|
-
new_x = cp.array(x, dtype=dtype, copy=False)
|
166
|
-
return new_x
|
167
|
-
|
168
|
-
batch_size = get_optimal_batch_size_for_gpu(x, data_size)
|
169
|
-
if batch_size == 0: batch_size = 1
|
170
|
-
|
171
|
-
loading_bar = initialize_loading_bar(total=len(x)/batch_size, desc='Transfering to GPU mem', ncols=70, bar_format=loading_bars()[0], leave=False)
|
172
|
-
|
173
|
-
logging.debug(f"Using batch size: {batch_size}")
|
174
|
-
current_threshold = 0.75
|
175
|
-
total_batches = (len(x) + batch_size - 1) // batch_size
|
176
|
-
|
177
|
-
sample_chunk = x[0:1]
|
178
|
-
sample_array = cp.array(sample_chunk, dtype=dtype)
|
179
|
-
chunk_shape = sample_array.shape[1:] if len(sample_array.shape) > 1 else ()
|
180
|
-
del sample_array
|
181
|
-
del sample_chunk
|
182
|
-
if chunk_shape:
|
183
|
-
total_shape = (len(x),) + chunk_shape
|
184
|
-
else:
|
185
|
-
total_shape = (len(x),)
|
186
|
-
|
187
|
-
del chunk_shape
|
188
|
-
chunks = cp.empty(total_shape, dtype=dtype)
|
189
|
-
del total_shape
|
190
|
-
|
191
|
-
for i in range(0, len(x), batch_size):
|
192
|
-
try:
|
193
|
-
chunk = x[i:i + batch_size]
|
194
|
-
chunk = cp.array(chunk, dtype=dtype)
|
195
|
-
chunks[i // batch_size] = chunk
|
196
|
-
del chunk
|
197
|
-
pinned_mempool.free_all_blocks()
|
198
|
-
|
199
|
-
if i > 0 and i % (batch_size * 5) == 0:
|
200
|
-
pool = cp.get_default_memory_pool()
|
201
|
-
current_threshold = adjust_gpu_memory_threshold(pool, free_gpu_memory, current_threshold)
|
202
|
-
if pool.used_bytes() > cp.cuda.runtime.memGetInfo()[0] * current_threshold:
|
203
|
-
pool.free_all_blocks()
|
204
|
-
|
205
|
-
|
206
|
-
loading_bar.update(1)
|
207
|
-
|
208
|
-
except cp.cuda.memory.OutOfMemoryError:
|
209
|
-
logging.error(f"GPU out of memory at batch {i//batch_size + 1}/{total_batches}")
|
210
|
-
cp.get_default_memory_pool().free_all_blocks()
|
211
|
-
batch_size = max(batch_size // 2, 1)
|
212
|
-
continue
|
213
|
-
|
214
|
-
except Exception as e:
|
215
|
-
logging.error(f"Error processing batch {i//batch_size + 1}/{total_batches}: {str(e)}")
|
216
|
-
raise
|
217
|
-
|
218
|
-
try:
|
219
|
-
del x
|
220
|
-
cp.get_default_memory_pool().free_all_blocks()
|
221
|
-
pinned_mempool.free_all_blocks()
|
222
|
-
return chunks
|
223
|
-
|
224
|
-
except Exception as e:
|
225
|
-
logging.error(f"Error concatenating results: {str(e)}")
|
226
|
-
raise
|
227
|
-
|
228
|
-
except Exception as e:
|
229
|
-
logging.error(f"Error in transfer_to_gpu: {str(e)}")
|
230
|
-
raise
|
231
|
-
|
232
|
-
def adjust_gpu_memory_threshold(pool, free_gpu_memory, current_threshold=0.75, min_threshold=0.5, max_threshold=0.9):
|
233
|
-
used_memory = pool.used_bytes()
|
234
|
-
usage_ratio = used_memory / free_gpu_memory
|
235
|
-
|
236
|
-
if usage_ratio > current_threshold:
|
237
|
-
current_threshold = max(min_threshold, current_threshold - 0.05)
|
238
|
-
elif usage_ratio < current_threshold * 0.8:
|
239
|
-
current_threshold = min(max_threshold, current_threshold + 0.05)
|
240
|
-
|
241
|
-
return current_threshold
|
242
|
-
|
243
|
-
|
244
|
-
def optimize_labels(y, one_hot_encoded=True, cuda=False):
|
245
|
-
"""
|
246
|
-
The function `optimize_labels` optimizes the data type of labels based on their length and encoding
|
247
|
-
format.
|
248
|
-
|
249
|
-
:param y: The `optimize_labels` function is designed to optimize the data type of the input labels
|
250
|
-
`y` based on certain conditions. The function checks if the labels are in one-hot encoded format or
|
251
|
-
not, and then based on the length of the labels and the specified data types (`uint8`, `uint
|
252
|
-
:param one_hot_encoded: The `one_hot_encoded` parameter in the `optimize_labels` function indicates
|
253
|
-
whether the labels are in one-hot encoded format or not. If `one_hot_encoded` is set to `True`, it
|
254
|
-
means that the labels are in one-hot encoded format, and the function will check the length of the,
|
255
|
-
defaults to True (optional)
|
256
|
-
:param cuda: The `cuda` parameter in the `optimize_labels` function is a boolean flag that indicates
|
257
|
-
whether to use CUDA for computations. If `cuda` is set to `True`, the function will use the CuPy
|
258
|
-
library for array operations, which can leverage GPU acceleration. If `cuda` is `False, defaults to
|
259
|
-
False (optional)
|
260
|
-
:return: The function `optimize_labels` returns the input array `y` after optimizing its data type
|
261
|
-
based on the specified conditions. If `one_hot_encoded` is True, it checks the length of the
|
262
|
-
elements in `y` and converts the data type to uint8, uint16, or uint32 accordingly. If
|
263
|
-
`one_hot_encoded` is False, it checks the length of `y` itself and
|
264
|
-
"""
|
265
|
-
|
266
|
-
if cuda: array_type = cp
|
267
|
-
else: array_type = np
|
268
|
-
|
269
|
-
dtype_uint8 = array_type.uint8
|
270
|
-
dtype_uint16 = array_type.uint16
|
271
|
-
dtype_uint32 = array_type.uint32
|
272
|
-
|
273
|
-
if one_hot_encoded:
|
274
|
-
if len(y[0]) < 256:
|
275
|
-
if y.dtype != dtype_uint8:
|
276
|
-
y = array_type.array(y, copy=False).astype(dtype_uint8, copy=False)
|
277
|
-
elif len(y[0]) <= 32767:
|
278
|
-
if y.dtype != dtype_uint16:
|
279
|
-
y = array_type.array(y, copy=False).astype(dtype_uint16, copy=False)
|
280
|
-
else:
|
281
|
-
if y.dtype != dtype_uint32:
|
282
|
-
y = array_type.array(y, copy=False).astype(dtype_uint32, copy=False)
|
283
|
-
|
284
|
-
return y
|
285
|
-
|
286
|
-
else:
|
287
|
-
|
288
|
-
if len(y) < 256:
|
289
|
-
if y.dtype != dtype_uint8:
|
290
|
-
y = array_type.array(y, copy=False).astype(dtype_uint8, copy=False)
|
291
|
-
elif len(y) <= 32767:
|
292
|
-
if y.dtype != dtype_uint16:
|
293
|
-
y = array_type.array(y, copy=False).astype(dtype_uint16, copy=False)
|
294
|
-
else:
|
295
|
-
if y.dtype != dtype_uint32:
|
296
|
-
y = array_type.array(y, copy=False).astype(dtype_uint32, copy=False)
|
297
|
-
|
298
|
-
return y
|
@@ -1,190 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
|
3
|
-
def metrics(y_ts, test_preds, average='weighted'):
|
4
|
-
"""
|
5
|
-
Calculates precision, recall and F1 score for a classification task.
|
6
|
-
|
7
|
-
Args:
|
8
|
-
y_ts (list or numpy.ndarray): True labels.
|
9
|
-
test_preds (list or numpy.ndarray): Predicted labels.
|
10
|
-
average (str): Type of averaging ('micro', 'macro', 'weighted').
|
11
|
-
|
12
|
-
Returns:
|
13
|
-
tuple: Precision, recall, F1 score.
|
14
|
-
"""
|
15
|
-
|
16
|
-
from .data_operations import decode_one_hot
|
17
|
-
|
18
|
-
y_test_d = decode_one_hot(y_ts)
|
19
|
-
y_test_d = np.array(y_test_d)
|
20
|
-
y_pred = np.array(test_preds)
|
21
|
-
|
22
|
-
if y_test_d.ndim > 1:
|
23
|
-
y_test_d = y_test_d.reshape(-1)
|
24
|
-
if y_pred.ndim > 1:
|
25
|
-
y_pred = y_pred.reshape(-1)
|
26
|
-
|
27
|
-
tp = {}
|
28
|
-
fp = {}
|
29
|
-
fn = {}
|
30
|
-
|
31
|
-
classes = np.unique(np.concatenate((y_test_d, y_pred)))
|
32
|
-
|
33
|
-
for c in classes:
|
34
|
-
tp[c] = 0
|
35
|
-
fp[c] = 0
|
36
|
-
fn[c] = 0
|
37
|
-
|
38
|
-
for c in classes:
|
39
|
-
for true, pred in zip(y_test_d, y_pred):
|
40
|
-
if true == c and pred == c:
|
41
|
-
tp[c] += 1
|
42
|
-
elif true != c and pred == c:
|
43
|
-
fp[c] += 1
|
44
|
-
elif true == c and pred != c:
|
45
|
-
fn[c] += 1
|
46
|
-
|
47
|
-
precision = {}
|
48
|
-
recall = {}
|
49
|
-
f1 = {}
|
50
|
-
|
51
|
-
for c in classes:
|
52
|
-
precision[c] = tp[c] / (tp[c] + fp[c]) if (tp[c] + fp[c]) > 0 else 0
|
53
|
-
recall[c] = tp[c] / (tp[c] + fn[c]) if (tp[c] + fn[c]) > 0 else 0
|
54
|
-
f1[c] = 2 * (precision[c] * recall[c]) / (precision[c] + recall[c]) if (precision[c] + recall[c]) > 0 else 0
|
55
|
-
|
56
|
-
if average == 'micro':
|
57
|
-
precision_val = np.sum(list(tp.values())) / (np.sum(list(tp.values())) + np.sum(list(fp.values()))) if (np.sum(list(tp.values())) + np.sum(list(fp.values()))) > 0 else 0
|
58
|
-
recall_val = np.sum(list(tp.values())) / (np.sum(list(tp.values())) + np.sum(list(fn.values()))) if (np.sum(list(tp.values())) + np.sum(list(fn.values()))) > 0 else 0
|
59
|
-
f1_val = 2 * (precision_val * recall_val) / (precision_val + recall_val) if (precision_val + recall_val) > 0 else 0
|
60
|
-
|
61
|
-
elif average == 'macro':
|
62
|
-
precision_val = np.mean(list(precision.values()))
|
63
|
-
recall_val = np.mean(list(recall.values()))
|
64
|
-
f1_val = np.mean(list(f1.values()))
|
65
|
-
|
66
|
-
elif average == 'weighted':
|
67
|
-
weights = np.array([np.sum(y_test_d == c) for c in classes])
|
68
|
-
weights = weights / np.sum(weights)
|
69
|
-
precision_val = np.sum([weights[i] * precision[classes[i]] for i in range(len(classes))])
|
70
|
-
recall_val = np.sum([weights[i] * recall[classes[i]] for i in range(len(classes))])
|
71
|
-
f1_val = np.sum([weights[i] * f1[classes[i]] for i in range(len(classes))])
|
72
|
-
|
73
|
-
else:
|
74
|
-
raise ValueError("Invalid value for 'average'. Choose from 'micro', 'macro', 'weighted'.")
|
75
|
-
|
76
|
-
return precision_val, recall_val, f1_val
|
77
|
-
|
78
|
-
|
79
|
-
def roc_curve(y_true, y_score):
|
80
|
-
"""
|
81
|
-
Compute Receiver Operating Characteristic (ROC) curve.
|
82
|
-
|
83
|
-
Parameters:
|
84
|
-
y_true : array, shape = [n_samples]
|
85
|
-
True binary labels in range {0, 1} or {-1, 1}.
|
86
|
-
y_score : array, shape = [n_samples]
|
87
|
-
Target scores, can either be probability estimates of the positive class,
|
88
|
-
confidence values, or non-thresholded measure of decisions (as returned
|
89
|
-
by decision_function on some classifiers).
|
90
|
-
|
91
|
-
Returns:
|
92
|
-
fpr : array, shape = [n]
|
93
|
-
Increasing false positive rates such that element i is the false positive rate
|
94
|
-
of predictions with score >= thresholds[i].
|
95
|
-
tpr : array, shape = [n]
|
96
|
-
Increasing true positive rates such that element i is the true positive rate
|
97
|
-
of predictions with score >= thresholds[i].
|
98
|
-
thresholds : array, shape = [n]
|
99
|
-
Decreasing thresholds on the decision function used to compute fpr and tpr.
|
100
|
-
"""
|
101
|
-
|
102
|
-
y_true = np.asarray(y_true)
|
103
|
-
y_score = np.asarray(y_score)
|
104
|
-
|
105
|
-
if len(np.unique(y_true)) != 2:
|
106
|
-
raise ValueError("Only binary classification is supported.")
|
107
|
-
|
108
|
-
|
109
|
-
desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1]
|
110
|
-
y_score = y_score[desc_score_indices]
|
111
|
-
y_true = y_true[desc_score_indices]
|
112
|
-
|
113
|
-
|
114
|
-
fpr = []
|
115
|
-
tpr = []
|
116
|
-
thresholds = []
|
117
|
-
n_pos = np.sum(y_true)
|
118
|
-
n_neg = len(y_true) - n_pos
|
119
|
-
|
120
|
-
tp = 0
|
121
|
-
fp = 0
|
122
|
-
prev_score = None
|
123
|
-
|
124
|
-
|
125
|
-
for i, score in enumerate(y_score):
|
126
|
-
if score != prev_score:
|
127
|
-
fpr.append(fp / n_neg)
|
128
|
-
tpr.append(tp / n_pos)
|
129
|
-
thresholds.append(score)
|
130
|
-
prev_score = score
|
131
|
-
|
132
|
-
if y_true[i] == 1:
|
133
|
-
tp += 1
|
134
|
-
else:
|
135
|
-
fp += 1
|
136
|
-
|
137
|
-
fpr.append(fp / n_neg)
|
138
|
-
tpr.append(tp / n_pos)
|
139
|
-
thresholds.append(score)
|
140
|
-
|
141
|
-
return np.array(fpr), np.array(tpr), np.array(thresholds)
|
142
|
-
|
143
|
-
|
144
|
-
def confusion_matrix(y_true, y_pred, class_count):
|
145
|
-
"""
|
146
|
-
Computes confusion matrix.
|
147
|
-
|
148
|
-
Args:
|
149
|
-
y_true (numpy.ndarray): True class labels (1D array).
|
150
|
-
y_pred (numpy.ndarray): Predicted class labels (1D array).
|
151
|
-
num_classes (int): Number of classes.
|
152
|
-
|
153
|
-
Returns:
|
154
|
-
numpy.ndarray: Confusion matrix of shape (num_classes, num_classes).
|
155
|
-
"""
|
156
|
-
confusion = np.zeros((class_count, class_count), dtype=int)
|
157
|
-
|
158
|
-
for i in range(len(y_true)):
|
159
|
-
true_label = y_true[i]
|
160
|
-
pred_label = y_pred[i]
|
161
|
-
confusion[true_label, pred_label] += 1
|
162
|
-
|
163
|
-
return confusion
|
164
|
-
|
165
|
-
|
166
|
-
def pca(X, n_components):
|
167
|
-
"""
|
168
|
-
|
169
|
-
Parameters:
|
170
|
-
X (numpy array): (n_samples, n_features)
|
171
|
-
n_components (int):
|
172
|
-
|
173
|
-
Returns:
|
174
|
-
X_reduced (numpy array): (n_samples, n_components)
|
175
|
-
"""
|
176
|
-
|
177
|
-
X_meaned = X - np.mean(X, axis=0)
|
178
|
-
|
179
|
-
covariance_matrix = np.cov(X_meaned, rowvar=False)
|
180
|
-
|
181
|
-
eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
|
182
|
-
|
183
|
-
sorted_index = np.argsort(eigenvalues)[::-1]
|
184
|
-
sorted_eigenvectors = eigenvectors[:, sorted_index]
|
185
|
-
|
186
|
-
eigenvectors_subset = sorted_eigenvectors[:, :n_components]
|
187
|
-
|
188
|
-
X_reduced = np.dot(X_meaned, eigenvectors_subset)
|
189
|
-
|
190
|
-
return X_reduced
|
@@ -1,163 +0,0 @@
|
|
1
|
-
import cupy as cp
|
2
|
-
|
3
|
-
def metrics(y_ts, test_preds, average='weighted'):
|
4
|
-
from .data_operations import decode_one_hot
|
5
|
-
y_test_d = cp.array(decode_one_hot(y_ts))
|
6
|
-
y_pred = cp.array(test_preds)
|
7
|
-
|
8
|
-
if y_test_d.ndim > 1:
|
9
|
-
y_test_d = y_test_d.ravel()
|
10
|
-
if y_pred.ndim > 1:
|
11
|
-
y_pred = y_pred.ravel()
|
12
|
-
|
13
|
-
classes = cp.unique(cp.concatenate((y_test_d, y_pred)))
|
14
|
-
tp = cp.zeros(len(classes), dtype=cp.int32)
|
15
|
-
fp = cp.zeros(len(classes), dtype=cp.int32)
|
16
|
-
fn = cp.zeros(len(classes), dtype=cp.int32)
|
17
|
-
|
18
|
-
for i, c in enumerate(classes):
|
19
|
-
tp[i] = cp.sum((y_test_d == c) & (y_pred == c))
|
20
|
-
fp[i] = cp.sum((y_test_d != c) & (y_pred == c))
|
21
|
-
fn[i] = cp.sum((y_test_d == c) & (y_pred != c))
|
22
|
-
|
23
|
-
precision = tp / (tp + fp + 1e-10)
|
24
|
-
recall = tp / (tp + fn + 1e-10)
|
25
|
-
f1 = 2 * (precision * recall) / (precision + recall + 1e-10)
|
26
|
-
|
27
|
-
if average == 'micro':
|
28
|
-
tp_sum = cp.sum(tp)
|
29
|
-
fp_sum = cp.sum(fp)
|
30
|
-
fn_sum = cp.sum(fn)
|
31
|
-
precision_val = tp_sum / (tp_sum + fp_sum + 1e-10)
|
32
|
-
recall_val = tp_sum / (tp_sum + fn_sum + 1e-10)
|
33
|
-
f1_val = 2 * (precision_val * recall_val) / (precision_val + recall_val + 1e-10)
|
34
|
-
|
35
|
-
elif average == 'macro':
|
36
|
-
precision_val = cp.mean(precision)
|
37
|
-
recall_val = cp.mean(recall)
|
38
|
-
f1_val = cp.mean(f1)
|
39
|
-
|
40
|
-
elif average == 'weighted':
|
41
|
-
weights = cp.array([cp.sum(y_test_d == c) for c in classes])
|
42
|
-
weights = weights / cp.sum(weights)
|
43
|
-
precision_val = cp.sum(weights * precision)
|
44
|
-
recall_val = cp.sum(weights * recall)
|
45
|
-
f1_val = cp.sum(weights * f1)
|
46
|
-
|
47
|
-
else:
|
48
|
-
raise ValueError("Invalid value for 'average'. Choose from 'micro', 'macro', 'weighted'.")
|
49
|
-
|
50
|
-
return precision_val.item(), recall_val.item(), f1_val.item()
|
51
|
-
|
52
|
-
|
53
|
-
def roc_curve(y_true, y_score):
|
54
|
-
"""
|
55
|
-
Compute Receiver Operating Characteristic (ROC) curve.
|
56
|
-
|
57
|
-
Parameters:
|
58
|
-
y_true : array, shape = [n_samples]
|
59
|
-
True binary labels in range {0, 1} or {-1, 1}.
|
60
|
-
y_score : array, shape = [n_samples]
|
61
|
-
Target scores, can either be probability estimates of the positive class,
|
62
|
-
confidence values, or non-thresholded measure of decisions (as returned
|
63
|
-
by decision_function on some classifiers).
|
64
|
-
|
65
|
-
Returns:
|
66
|
-
fpr : array, shape = [n]
|
67
|
-
Increasing false positive rates such that element i is the false positive rate
|
68
|
-
of predictions with score >= thresholds[i].
|
69
|
-
tpr : array, shape = [n]
|
70
|
-
Increasing true positive rates such that element i is the true positive rate
|
71
|
-
of predictions with score >= thresholds[i].
|
72
|
-
thresholds : array, shape = [n]
|
73
|
-
Decreasing thresholds on the decision function used to compute fpr and tpr.
|
74
|
-
"""
|
75
|
-
|
76
|
-
y_true = cp.asarray(y_true)
|
77
|
-
y_score = cp.asarray(y_score)
|
78
|
-
|
79
|
-
if len(cp.unique(y_true)) != 2:
|
80
|
-
raise ValueError("Only binary classification is supported.")
|
81
|
-
|
82
|
-
|
83
|
-
desc_score_indices = cp.argsort(y_score, kind="stable")[::-1]
|
84
|
-
y_score = y_score[desc_score_indices]
|
85
|
-
y_true = y_true[desc_score_indices]
|
86
|
-
|
87
|
-
|
88
|
-
fpr = []
|
89
|
-
tpr = []
|
90
|
-
thresholds = []
|
91
|
-
n_pos = cp.sum(y_true)
|
92
|
-
n_neg = len(y_true) - n_pos
|
93
|
-
|
94
|
-
tp = 0
|
95
|
-
fp = 0
|
96
|
-
prev_score = 0
|
97
|
-
|
98
|
-
for i, score in enumerate(y_score):
|
99
|
-
if score is not prev_score:
|
100
|
-
fpr.append(fp / n_neg)
|
101
|
-
tpr.append(tp / n_pos)
|
102
|
-
thresholds.append(score)
|
103
|
-
prev_score = score
|
104
|
-
|
105
|
-
if y_true[i] == 1:
|
106
|
-
tp += 1
|
107
|
-
else:
|
108
|
-
fp += 1
|
109
|
-
|
110
|
-
fpr.append(fp / n_neg)
|
111
|
-
tpr.append(tp / n_pos)
|
112
|
-
thresholds.append(score)
|
113
|
-
|
114
|
-
return cp.array(fpr), cp.array(tpr), cp.array(thresholds)
|
115
|
-
|
116
|
-
|
117
|
-
def confusion_matrix(y_true, y_pred, class_count):
|
118
|
-
"""
|
119
|
-
Computes confusion matrix.
|
120
|
-
|
121
|
-
Args:
|
122
|
-
y_true (numpy.ndarray): True class labels (1D array).
|
123
|
-
y_pred (numpy.ndarray): Predicted class labels (1D array).
|
124
|
-
num_classes (int): Number of classes.
|
125
|
-
|
126
|
-
Returns:
|
127
|
-
numpy.ndarray: Confusion matrix of shape (num_classes, num_classes).
|
128
|
-
"""
|
129
|
-
confusion = cp.zeros((class_count, class_count), dtype=int)
|
130
|
-
|
131
|
-
for i in range(len(y_true)):
|
132
|
-
true_label = y_true[i]
|
133
|
-
pred_label = y_pred[i]
|
134
|
-
confusion[true_label, pred_label] += 1
|
135
|
-
|
136
|
-
return confusion
|
137
|
-
|
138
|
-
|
139
|
-
def pca(X, n_components):
|
140
|
-
"""
|
141
|
-
|
142
|
-
Parameters:
|
143
|
-
X (numpy array): (n_samples, n_features)
|
144
|
-
n_components (int):
|
145
|
-
|
146
|
-
Returns:
|
147
|
-
X_reduced (numpy array): (n_samples, n_components)
|
148
|
-
"""
|
149
|
-
|
150
|
-
X_meaned = X - cp.mean(X, axis=0)
|
151
|
-
|
152
|
-
covariance_matrix = cp.cov(X_meaned, rowvar=False)
|
153
|
-
|
154
|
-
eigenvalues, eigenvectors = cp.linalg.eigh(covariance_matrix)
|
155
|
-
|
156
|
-
sorted_index = cp.argsort(eigenvalues)[::-1]
|
157
|
-
sorted_eigenvectors = eigenvectors[:, sorted_index]
|
158
|
-
|
159
|
-
eigenvectors_subset = sorted_eigenvectors[:, :n_components]
|
160
|
-
|
161
|
-
X_reduced = cp.dot(X_meaned, eigenvectors_subset)
|
162
|
-
|
163
|
-
return X_reduced
|