fastl2lir 0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastl2lir-0.11/LICENSE +21 -0
- fastl2lir-0.11/PKG-INFO +78 -0
- fastl2lir-0.11/README.md +40 -0
- fastl2lir-0.11/pyproject.toml +33 -0
- fastl2lir-0.11/src/fastl2lir/__init__.py +1 -0
- fastl2lir-0.11/src/fastl2lir/fastl2lir.py +431 -0
- fastl2lir-0.11/src/fastl2lir/py.typed +0 -0
fastl2lir-0.11/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2019 Kamitani Lab
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
fastl2lir-0.11/PKG-INFO
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: fastl2lir
|
|
3
|
+
Version: 0.11
|
|
4
|
+
Summary: Fast L2-reguralized linear regression
|
|
5
|
+
Author: Kei Majima
|
|
6
|
+
Author-email: Kei Majima <kamitanilab@gmail.com>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2019 Kamitani Lab
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
Requires-Dist: numpy>=1.16.6
|
|
29
|
+
Requires-Dist: threadpoolctl>=2.1.0 ; python_full_version >= '3.5'
|
|
30
|
+
Requires-Dist: tqdm>=4.64.1
|
|
31
|
+
Maintainer: Shuntaro C. Aoki, Yoshihiro Nagano
|
|
32
|
+
Maintainer-email: Shuntaro C. Aoki <kamitanilab@gmail.com>, Yoshihiro Nagano <kamitanilab@gmail.com>
|
|
33
|
+
Requires-Python: >=3.1
|
|
34
|
+
Project-URL: Bug Tracker, https://github.com/KamitaniLab/PyFastL2LiR/issues
|
|
35
|
+
Project-URL: Homepage, https://github.com/KamitaniLab/PyFastL2LiR
|
|
36
|
+
Project-URL: Repository, https://github.com/KamitaniLab/PyFastL2LiR
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# PyFastL2LiR: Fast L2-regularized Linear Regression
|
|
40
|
+
|
|
41
|
+
[](https://badge.fury.io/py/fastl2lir)
|
|
42
|
+
[](https://github.com/KamitaniLab/PyFastL2LiR/blob/master/LICENSE)
|
|
43
|
+
|
|
44
|
+
PyFastL2LR is fast implementation of ridge regression (regression with L2 normalization) that is developed for predicting neural netowrk unit activities from fMRI data. This method is five times faster than ordinary implementations of ridge regression, and can be used with feature selection.
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
$ pip install fastl2lir
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
When installing on Python >= 3.5, `threadpoolctl` are required.
|
|
53
|
+
|
|
54
|
+
## Usage
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
import fastl2lir
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
model = fastl2lir.FastL2LiR()
|
|
61
|
+
model.fit(X, Y, alpha, n_feat)
|
|
62
|
+
Y_predicted = model.predict(X)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Here,
|
|
66
|
+
|
|
67
|
+
* `X`: A matrix (# of training samples x # of voxels).
|
|
68
|
+
* `Y`: A vector including label information (# of training samples x # of cnn features).
|
|
69
|
+
* `alpha`: Regularization term of L2 normalization.
|
|
70
|
+
* `n_feat`: # of features to be selected (feature selection is based on correlation coefficient).
|
|
71
|
+
|
|
72
|
+
See `demo.py` for more examples.
|
|
73
|
+
|
|
74
|
+
## Notice
|
|
75
|
+
|
|
76
|
+
* You don't need to add bias term in `X`; `FastL2LiR` automatically adds the bias term in the input data.
|
|
77
|
+
* `FastL2LiR.fit()` automatically performs feature selection. You don't need to select features by yourself.
|
|
78
|
+
* `X` and `Y` should be z-scored with mean and standard deviation of training data.
|
fastl2lir-0.11/README.md
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# PyFastL2LiR: Fast L2-regularized Linear Regression
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/py/fastl2lir)
|
|
4
|
+
[](https://github.com/KamitaniLab/PyFastL2LiR/blob/master/LICENSE)
|
|
5
|
+
|
|
6
|
+
PyFastL2LR is fast implementation of ridge regression (regression with L2 normalization) that is developed for predicting neural netowrk unit activities from fMRI data. This method is five times faster than ordinary implementations of ridge regression, and can be used with feature selection.
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
```
|
|
11
|
+
$ pip install fastl2lir
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
When installing on Python >= 3.5, `threadpoolctl` are required.
|
|
15
|
+
|
|
16
|
+
## Usage
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
import fastl2lir
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
model = fastl2lir.FastL2LiR()
|
|
23
|
+
model.fit(X, Y, alpha, n_feat)
|
|
24
|
+
Y_predicted = model.predict(X)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Here,
|
|
28
|
+
|
|
29
|
+
* `X`: A matrix (# of training samples x # of voxels).
|
|
30
|
+
* `Y`: A vector including label information (# of training samples x # of cnn features).
|
|
31
|
+
* `alpha`: Regularization term of L2 normalization.
|
|
32
|
+
* `n_feat`: # of features to be selected (feature selection is based on correlation coefficient).
|
|
33
|
+
|
|
34
|
+
See `demo.py` for more examples.
|
|
35
|
+
|
|
36
|
+
## Notice
|
|
37
|
+
|
|
38
|
+
* You don't need to add bias term in `X`; `FastL2LiR` automatically adds the bias term in the input data.
|
|
39
|
+
* `FastL2LiR.fit()` automatically performs feature selection. You don't need to select features by yourself.
|
|
40
|
+
* `X` and `Y` should be z-scored with mean and standard deviation of training data.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "fastl2lir"
|
|
3
|
+
version = "0.11"
|
|
4
|
+
description = "Fast L2-reguralized linear regression"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Kei Majima", email = "kamitanilab@gmail.com" }
|
|
8
|
+
]
|
|
9
|
+
maintainers = [
|
|
10
|
+
{ name = "Shuntaro C. Aoki", email = "kamitanilab@gmail.com" },
|
|
11
|
+
{ name = "Yoshihiro Nagano", email = "kamitanilab@gmail.com" }
|
|
12
|
+
]
|
|
13
|
+
license = { file = "LICENSE" }
|
|
14
|
+
requires-python = ">=3.1"
|
|
15
|
+
dependencies = [
|
|
16
|
+
"numpy>=1.16.6",
|
|
17
|
+
"threadpoolctl>=2.1.0 ; python_full_version >= '3.5'",
|
|
18
|
+
"tqdm>=4.64.1",
|
|
19
|
+
]
|
|
20
|
+
[project.urls]
|
|
21
|
+
Homepage = "https://github.com/KamitaniLab/PyFastL2LiR"
|
|
22
|
+
Repository = "https://github.com/KamitaniLab/PyFastL2LiR"
|
|
23
|
+
"Bug Tracker" = "https://github.com/KamitaniLab/PyFastL2LiR/issues"
|
|
24
|
+
|
|
25
|
+
[build-system]
|
|
26
|
+
requires = ["uv_build>=0.8.22,<0.9.0"]
|
|
27
|
+
build-backend = "uv_build"
|
|
28
|
+
|
|
29
|
+
[dependency-groups]
|
|
30
|
+
dev = [
|
|
31
|
+
"pytest>=4.6.11",
|
|
32
|
+
"ruff>=0.0.17",
|
|
33
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .fastl2lir import FastL2LiR
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
'''PyFastL2LiR: Fast L2-regularized Linear Regression.'''
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
import math
|
|
5
|
+
import sys
|
|
6
|
+
from time import time
|
|
7
|
+
import warnings
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
|
|
12
|
+
pv = sys.version_info
|
|
13
|
+
|
|
14
|
+
if pv.major > 3 or (pv.major == 3 and pv.minor >= 5):
|
|
15
|
+
from threadpoolctl import threadpool_limits
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FastL2LiR(object):
|
|
19
|
+
'''Fast L2-regularized linear regression class.'''
|
|
20
|
+
|
|
21
|
+
def __init__(self, W=np.array([]), b=np.array([]), verbose=False):
|
|
22
|
+
self.__W = W
|
|
23
|
+
self.__b = b
|
|
24
|
+
self.__verbose = verbose
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def W(self):
|
|
28
|
+
return self.__W
|
|
29
|
+
|
|
30
|
+
@W.setter
|
|
31
|
+
def W(self, W):
|
|
32
|
+
self.__W = W
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def b(self):
|
|
36
|
+
return self.__b
|
|
37
|
+
|
|
38
|
+
@b.setter
|
|
39
|
+
def b(self, b):
|
|
40
|
+
self.__b = b
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def S(self):
|
|
44
|
+
return self.__S
|
|
45
|
+
|
|
46
|
+
@S.setter
|
|
47
|
+
def S(self, S):
|
|
48
|
+
self.__S = S
|
|
49
|
+
|
|
50
|
+
def fit(self, X, Y, alpha=1.0, n_feat=0, save_select_feat=False, spatial_norm=None, select_sample=None, chunk_size=0, cache_dir='./cache', dtype=np.float64):
|
|
51
|
+
'''Fit the L2-regularized linear model with the given data.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
X, Y : array_like
|
|
56
|
+
Training inputs (data and targets).
|
|
57
|
+
alpha: float
|
|
58
|
+
Regularization parameter (coefficient for L2-norm).
|
|
59
|
+
n_feta: int
|
|
60
|
+
The number of selected input features.
|
|
61
|
+
save_select_feat: bool
|
|
62
|
+
Save bool matrix indicating selected voxel for each unit.
|
|
63
|
+
Since fitting is performed for each unit, the amount of memory
|
|
64
|
+
required at runtime can be reduced (On the other hand, computation
|
|
65
|
+
time and storage requirements increase).
|
|
66
|
+
spatial_norm: str (None, 'norm1', 'norm2', 'std1', 'std1mean0', 'norm1mean0', or 'norm2mean0')
|
|
67
|
+
Perform spatial normalization (sample unit) on the voxel selected
|
|
68
|
+
for each unit. Selecting this automatically sets 'save_select_feat'
|
|
69
|
+
to True because it is necessary to save the index matrix of the
|
|
70
|
+
selected voxel
|
|
71
|
+
select_sample: str ('nan_remove' or None)
|
|
72
|
+
Specify how to select training samples
|
|
73
|
+
Selecting this automatically sets 'save_select_feat' to True
|
|
74
|
+
because this is an operation for each unit.
|
|
75
|
+
(The sample selection operation itself does not essentially need
|
|
76
|
+
to record the selected voxel.)
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
self
|
|
81
|
+
Returns an instance of self.
|
|
82
|
+
'''
|
|
83
|
+
|
|
84
|
+
if X.dtype != dtype:
|
|
85
|
+
X = X.astype(dtype)
|
|
86
|
+
if Y.dtype != dtype:
|
|
87
|
+
Y = Y.astype(dtype)
|
|
88
|
+
|
|
89
|
+
# Reshape Y
|
|
90
|
+
reshape_y = Y.ndim > 2
|
|
91
|
+
|
|
92
|
+
if reshape_y:
|
|
93
|
+
Y_shape = Y.shape
|
|
94
|
+
Y = Y.reshape(Y.shape[0], -1, order='F')
|
|
95
|
+
|
|
96
|
+
# Feature selection settings
|
|
97
|
+
if n_feat == 0:
|
|
98
|
+
n_feat = X.shape[1]
|
|
99
|
+
|
|
100
|
+
no_feature_selection = X.shape[1] == n_feat
|
|
101
|
+
|
|
102
|
+
if n_feat > X.shape[1]:
|
|
103
|
+
warnings.warn('X has less features than n_feat (X.shape[1] < n_feat). Feature selection is not applied.')
|
|
104
|
+
no_feature_selection = True
|
|
105
|
+
|
|
106
|
+
# # Save selected voxel mode
|
|
107
|
+
if not save_select_feat:
|
|
108
|
+
if (spatial_norm is not None) or (select_sample is not None):
|
|
109
|
+
save_select_feat = True
|
|
110
|
+
|
|
111
|
+
# Chunking
|
|
112
|
+
if chunk_size > 0:
|
|
113
|
+
chunks = self.__get_chunks(range(Y.shape[1]), chunk_size)
|
|
114
|
+
|
|
115
|
+
if self.__verbose:
|
|
116
|
+
print('Num chunks: %d' % len(chunks))
|
|
117
|
+
|
|
118
|
+
w_list = []
|
|
119
|
+
b_list = []
|
|
120
|
+
s_list = []
|
|
121
|
+
for i, chunk in enumerate(chunks):
|
|
122
|
+
start_time = time()
|
|
123
|
+
if save_select_feat:
|
|
124
|
+
W, b, S = self.__sub_fit_save_select_feat(
|
|
125
|
+
X, Y[0:, chunk], alpha=alpha, n_feat=n_feat,
|
|
126
|
+
spatial_norm=spatial_norm,
|
|
127
|
+
use_all_features=no_feature_selection,
|
|
128
|
+
select_sample=select_sample,
|
|
129
|
+
dtype=dtype
|
|
130
|
+
)
|
|
131
|
+
s_list.append(S)
|
|
132
|
+
else:
|
|
133
|
+
W, b = self.__sub_fit(
|
|
134
|
+
X, Y[0:, chunk], alpha=alpha, n_feat=n_feat,
|
|
135
|
+
use_all_features=no_feature_selection,
|
|
136
|
+
dtype=dtype
|
|
137
|
+
)
|
|
138
|
+
w_list.append(W)
|
|
139
|
+
b_list.append(b)
|
|
140
|
+
|
|
141
|
+
if self.__verbose:
|
|
142
|
+
print('Chunk %d (time: %f s)' % (i + 1, time() - start_time))
|
|
143
|
+
|
|
144
|
+
W = np.hstack(w_list)
|
|
145
|
+
b = np.hstack(b_list)
|
|
146
|
+
if save_select_feat:
|
|
147
|
+
S = np.hstack(s_list)
|
|
148
|
+
else:
|
|
149
|
+
if save_select_feat:
|
|
150
|
+
W, b, S = self.__sub_fit_save_select_feat(
|
|
151
|
+
X, Y, alpha=alpha, n_feat=n_feat,
|
|
152
|
+
spatial_norm=spatial_norm,
|
|
153
|
+
use_all_features=no_feature_selection,
|
|
154
|
+
select_sample=select_sample,
|
|
155
|
+
dtype=dtype
|
|
156
|
+
)
|
|
157
|
+
else:
|
|
158
|
+
W, b = self.__sub_fit(
|
|
159
|
+
X, Y, alpha=alpha, n_feat=n_feat,
|
|
160
|
+
use_all_features=no_feature_selection,
|
|
161
|
+
dtype=dtype
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
self.__W = W
|
|
165
|
+
self.__b = b
|
|
166
|
+
if save_select_feat:
|
|
167
|
+
self.__S = S
|
|
168
|
+
|
|
169
|
+
if reshape_y:
|
|
170
|
+
Y = Y.reshape(Y_shape, order='F')
|
|
171
|
+
self.__W = self.__W.reshape((self.__W.shape[0],) + Y_shape[1:], order='F')
|
|
172
|
+
self.__b = self.__b.reshape((1,) + Y_shape[1:], order='F')
|
|
173
|
+
if save_select_feat:
|
|
174
|
+
self.__S = self.__S.reshape((self.__S.shape[0],) + Y_shape[1:], order='F')
|
|
175
|
+
|
|
176
|
+
return self
|
|
177
|
+
|
|
178
|
+
def predict(self, X, dtype=np.float64, save_select_feat=False, spatial_norm=None):
|
|
179
|
+
'''Predict with the fitted linear model.
|
|
180
|
+
|
|
181
|
+
Parameters
|
|
182
|
+
----------
|
|
183
|
+
X : array_like
|
|
184
|
+
save_select_feat: bool
|
|
185
|
+
Load bool matrix indicating selected voxel for each unit.
|
|
186
|
+
If save_select_feat is True during training, it must be true
|
|
187
|
+
during testing as well.
|
|
188
|
+
spatial_norm: str (None, 'norm1', 'norm2', 'std1', 'std1mean0', 'norm1mean0', or 'norm2mean0')
|
|
189
|
+
Perform spatial normalization (sample unit) on the voxel selected
|
|
190
|
+
for each unit. It is necessary to specify the same spatial_norm
|
|
191
|
+
method as during training.
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
Y : array_like
|
|
195
|
+
'''
|
|
196
|
+
if X.dtype != dtype:
|
|
197
|
+
X = X.astype(dtype)
|
|
198
|
+
|
|
199
|
+
# Save selected voxel mode
|
|
200
|
+
if not save_select_feat:
|
|
201
|
+
if spatial_norm is not None:
|
|
202
|
+
save_select_feat = True
|
|
203
|
+
|
|
204
|
+
# Reshape
|
|
205
|
+
reshape_y = self.__W.ndim > 2
|
|
206
|
+
if reshape_y:
|
|
207
|
+
Y_shape = self.__W.shape
|
|
208
|
+
W = self.__W.reshape(self.__W.shape[0], -1, order='F')
|
|
209
|
+
b = self.__b.reshape(self.__b.shape[0], -1, order='F')
|
|
210
|
+
if save_select_feat:
|
|
211
|
+
S = self.__S.reshape(self.__S.shape[0], -1, order='F')
|
|
212
|
+
else:
|
|
213
|
+
W = self.__W
|
|
214
|
+
b = self.__b
|
|
215
|
+
if save_select_feat:
|
|
216
|
+
S = self.__S
|
|
217
|
+
|
|
218
|
+
# Prediction
|
|
219
|
+
if save_select_feat:
|
|
220
|
+
Y = np.zeros((X.shape[0], W.shape[1]), dtype=dtype)
|
|
221
|
+
for si in range(W.shape[1]): # Loop for feature
|
|
222
|
+
selected_voxel = S[:, si]
|
|
223
|
+
newX = X[:, selected_voxel] # extract selected features
|
|
224
|
+
|
|
225
|
+
# Perform the sample normalization.
|
|
226
|
+
newX = self.__apply_spatial_normalization(newX, spatial_norm)
|
|
227
|
+
|
|
228
|
+
# Predict
|
|
229
|
+
newW = W[:, si].reshape(-1, 1)
|
|
230
|
+
newW = newW[selected_voxel, :].reshape(-1, 1) # extract selected features
|
|
231
|
+
Y[:, si] = (np.matmul(newX, newW) + b[:, si]).flatten()
|
|
232
|
+
else:
|
|
233
|
+
Y = np.matmul(X, W) + np.matmul(np.ones((X.shape[0], 1), dtype=dtype), b)
|
|
234
|
+
|
|
235
|
+
if reshape_y:
|
|
236
|
+
Y = Y.reshape((Y.shape[0],) + Y_shape[1:], order='F')
|
|
237
|
+
|
|
238
|
+
return Y
|
|
239
|
+
|
|
240
|
+
def __sub_fit(self, X, Y, alpha=0, n_feat=0, use_all_features=True, dtype=np.float64):
|
|
241
|
+
if use_all_features:
|
|
242
|
+
# Without feature selection
|
|
243
|
+
X = np.hstack((X, np.ones((X.shape[0], 1), dtype=dtype)))
|
|
244
|
+
|
|
245
|
+
# Choose the more efficient method based on matrix dimensions
|
|
246
|
+
if X.shape[0] > X.shape[1]:
|
|
247
|
+
# Use primal form for tall matrices (more samples than features)
|
|
248
|
+
Wb = np.linalg.solve(np.matmul(X.T, X) + alpha * np.eye(X.shape[1], dtype=dtype), np.matmul(X.T, Y))
|
|
249
|
+
else:
|
|
250
|
+
# Use dual form for wide matrices (more features than samples)
|
|
251
|
+
Wb = np.matmul(X.T, np.linalg.solve(np.matmul(X, X.T) + alpha * np.eye(X.shape[0], dtype=dtype), Y))
|
|
252
|
+
|
|
253
|
+
W = Wb[0:-1, :]
|
|
254
|
+
b = Wb[-1, :][np.newaxis, :] # Returning b as a 2D array
|
|
255
|
+
else:
|
|
256
|
+
# With feature selection
|
|
257
|
+
W = np.zeros((Y.shape[1], X.shape[1]), dtype=dtype)
|
|
258
|
+
b = np.zeros((1, Y.shape[1]), dtype=dtype)
|
|
259
|
+
I = np.nonzero(np.var(X, axis=0) < 0.00000001)
|
|
260
|
+
C = corrmat(X, Y, 'col')
|
|
261
|
+
C[I, :] = 0.0
|
|
262
|
+
X = np.hstack((X, np.ones((X.shape[0], 1), dtype=dtype)))
|
|
263
|
+
W0 = np.matmul(X.T, X) + alpha * np.eye(X.shape[1], dtype=dtype)
|
|
264
|
+
W1 = np.matmul(Y.T, X)
|
|
265
|
+
C = C.T
|
|
266
|
+
|
|
267
|
+
# TODO: refactoring
|
|
268
|
+
if pv.major > 3 or (pv.major == 3 and pv.minor >= 5):
|
|
269
|
+
with threadpool_limits(limits=1, user_api='blas'):
|
|
270
|
+
for index_outputDim in tqdm(range(Y.shape[1])):
|
|
271
|
+
C0 = abs(C[index_outputDim,:])
|
|
272
|
+
I = np.argsort(C0)
|
|
273
|
+
I = I[::-1]
|
|
274
|
+
I = I[0:n_feat]
|
|
275
|
+
I = np.hstack((I, X.shape[1]-1))
|
|
276
|
+
W0_sub = (W0.ravel()[(I + (I * W0.shape[1]).reshape((-1, 1))).ravel()]).reshape(I.size, I.size)
|
|
277
|
+
Wb = np.linalg.solve(W0_sub, W1[index_outputDim][I].reshape(-1, 1))
|
|
278
|
+
for index_selectedDim in range(n_feat):
|
|
279
|
+
W[index_outputDim, I[index_selectedDim]] = Wb[index_selectedDim]
|
|
280
|
+
b[0, index_outputDim] = Wb[-1]
|
|
281
|
+
W = W.T
|
|
282
|
+
else:
|
|
283
|
+
for index_outputDim in tqdm(range(Y.shape[1])):
|
|
284
|
+
C0 = abs(C[index_outputDim,:])
|
|
285
|
+
I = np.argsort(C0)
|
|
286
|
+
I = I[::-1]
|
|
287
|
+
I = I[0:n_feat]
|
|
288
|
+
I = np.hstack((I, X.shape[1]-1))
|
|
289
|
+
W0_sub = (W0.ravel()[(I + (I * W0.shape[1]).reshape((-1,1))).ravel()]).reshape(I.size, I.size)
|
|
290
|
+
Wb = np.linalg.solve(W0_sub, W1[index_outputDim][I].reshape(-1,1))
|
|
291
|
+
for index_selectedDim in range(n_feat):
|
|
292
|
+
W[index_outputDim, I[index_selectedDim]] = Wb[index_selectedDim]
|
|
293
|
+
b[0, index_outputDim] = Wb[-1]
|
|
294
|
+
W = W.T
|
|
295
|
+
|
|
296
|
+
return W, b
|
|
297
|
+
|
|
298
|
+
def __sub_fit_save_select_feat(
|
|
299
|
+
self, X, Y, alpha=0, n_feat=0,
|
|
300
|
+
spatial_norm=None,
|
|
301
|
+
use_all_features=True,
|
|
302
|
+
select_sample=None,
|
|
303
|
+
dtype=np.float64
|
|
304
|
+
):
|
|
305
|
+
'''
|
|
306
|
+
Execute fitting for each unit.
|
|
307
|
+
Enables spatial normalization for selected voxels and selection of
|
|
308
|
+
training samples.
|
|
309
|
+
'''
|
|
310
|
+
# Prepare the matixes to save.
|
|
311
|
+
W = np.zeros((Y.shape[1], X.shape[1]), dtype=dtype) # feature size x voxel size
|
|
312
|
+
b = np.zeros((1, Y.shape[1]), dtype=dtype) # feautre size
|
|
313
|
+
S = np.zeros((Y.shape[1], X.shape[1]), dtype=np.bool) # feature size x voxel size
|
|
314
|
+
|
|
315
|
+
if not (pv.major > 3 or (pv.major == 3 and pv.minor >= 5)):
|
|
316
|
+
raise RuntimeError('Python version requires 3.5 or more.')
|
|
317
|
+
|
|
318
|
+
with threadpool_limits(limits=1, user_api='blas'):
|
|
319
|
+
for index_outputDim in tqdm(range(Y.shape[1])):
|
|
320
|
+
# Select training samples
|
|
321
|
+
if select_sample is None:
|
|
322
|
+
pass
|
|
323
|
+
elif select_sample == 'remove_nan': # Delete sample with nan value in unit
|
|
324
|
+
selector = np.logical_not(np.isnan(Y[:, index_outputDim].flatten()))
|
|
325
|
+
else:
|
|
326
|
+
raise RuntimeError('Not implemented selection method:', select_sample)
|
|
327
|
+
selX = X[selector, :]
|
|
328
|
+
selY = Y[selector, index_outputDim].reshape(-1, 1)
|
|
329
|
+
|
|
330
|
+
# Select voxels
|
|
331
|
+
if use_all_features:
|
|
332
|
+
I = np.arange(selX.shape[1])
|
|
333
|
+
else:
|
|
334
|
+
C0 = abs(corrmat(selX, selY, 'col')).ravel()
|
|
335
|
+
I = np.argsort(C0 * -1)
|
|
336
|
+
I = I[0:n_feat]
|
|
337
|
+
newX = selX[:, I] # sample_num x voxel_num
|
|
338
|
+
S[index_outputDim, I] = True
|
|
339
|
+
|
|
340
|
+
# Perform the spatial normalization
|
|
341
|
+
newX = self.__apply_spatial_normalization(newX, spatial_norm)
|
|
342
|
+
|
|
343
|
+
# Fit
|
|
344
|
+
newX = np.hstack((newX, np.ones((newX.shape[0], 1), dtype=dtype))) # Add one column to rightmost column
|
|
345
|
+
W0 = np.matmul(newX.T, newX) + alpha * np.eye(newX.shape[1], dtype=dtype)
|
|
346
|
+
W1 = np.matmul(selY.ravel(), newX).reshape(-1,1)
|
|
347
|
+
Wb = np.linalg.solve(W0, W1)
|
|
348
|
+
for index_selectedDim in range(n_feat):
|
|
349
|
+
W[index_outputDim, I[index_selectedDim]] = Wb[index_selectedDim]
|
|
350
|
+
b[0, index_outputDim] = Wb[-1]
|
|
351
|
+
W = W.T
|
|
352
|
+
S = np.asarray(S.T, dtype=np.bool) # Transpose and convert to bool type
|
|
353
|
+
|
|
354
|
+
return W, b, S
|
|
355
|
+
|
|
356
|
+
def __get_chunks(self, a, chunk_size):
|
|
357
|
+
n_chunk = int(math.ceil(len(a) / float(chunk_size)))
|
|
358
|
+
|
|
359
|
+
chunks = []
|
|
360
|
+
for i in range(n_chunk):
|
|
361
|
+
index_start = i * chunk_size
|
|
362
|
+
index_end = (i + 1) * chunk_size
|
|
363
|
+
index_end = len(a) if index_end > len(a) else index_end
|
|
364
|
+
chunks.append(a[index_start:index_end])
|
|
365
|
+
|
|
366
|
+
return chunks
|
|
367
|
+
|
|
368
|
+
def __apply_spatial_normalization(self, X, spatial_norm):
|
|
369
|
+
'''
|
|
370
|
+
Perform the spatial normalization
|
|
371
|
+
'''
|
|
372
|
+
if spatial_norm is None:
|
|
373
|
+
pass
|
|
374
|
+
elif spatial_norm == 'norm1': # L1norm (Divide by L1norm on each sample)
|
|
375
|
+
X = X / np.sum(np.abs(X), axis=1).reshape(X.shape[0], 1)
|
|
376
|
+
elif spatial_norm == 'norm2': # L2norm (Divide by L2norm on each sample)
|
|
377
|
+
X = X / np.sqrt(np.sum(np.square(X), axis=1)).reshape(X.shape[0], 1)
|
|
378
|
+
elif spatial_norm == 'std1': # Normalize with STD=1
|
|
379
|
+
X = (X - np.mean(X, axis=1, keepdims=True)) / np.std(X, axis=1, ddof=1, keepdims=True) + np.mean(X, axis=1, keepdims=True)
|
|
380
|
+
elif spatial_norm == 'std1mean0': # Mean correction + Normalize with STD=1
|
|
381
|
+
X = (X - np.mean(X, axis=1, keepdims=True)) / np.std(X, axis=1, ddof=1, keepdims=True)
|
|
382
|
+
elif spatial_norm == 'norm1mean0': # Mean correction + L1norm (Divide by L1norm on each sample)
|
|
383
|
+
X = X - np.mean(X, axis=1, keepdims=True)
|
|
384
|
+
X = X / np.sum(np.abs(X), axis=1).reshape(X.shape[0], 1)
|
|
385
|
+
elif spatial_norm == 'norm2mean0': # Mean correction + L2norm (Divide by L2norm on each sample)
|
|
386
|
+
X = X - np.mean(X, axis=1, keepdims=True)
|
|
387
|
+
X = X / np.sqrt(np.sum(np.square(X), axis=1)).reshape(X.shape[0], 1)
|
|
388
|
+
else:
|
|
389
|
+
raise RuntimeError('Not implemented spatial normalization method:', spatial_norm)
|
|
390
|
+
return X
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
# Functions ##################################################################
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def corrmat(x, y, var='row'):
|
|
397
|
+
'''
|
|
398
|
+
Returns correlation matrix between `x` and `y`
|
|
399
|
+
|
|
400
|
+
Parameters
|
|
401
|
+
----------
|
|
402
|
+
x, y : array_like
|
|
403
|
+
Matrix or vector
|
|
404
|
+
var : str, 'row' or 'col'
|
|
405
|
+
Specifying whether rows (default) or columns represent variables
|
|
406
|
+
|
|
407
|
+
Returns
|
|
408
|
+
-------
|
|
409
|
+
rmat
|
|
410
|
+
Correlation matrix
|
|
411
|
+
'''
|
|
412
|
+
|
|
413
|
+
# Fix x and y to represent variables in each row
|
|
414
|
+
if var == 'row':
|
|
415
|
+
pass
|
|
416
|
+
elif var == 'col':
|
|
417
|
+
x = x.T
|
|
418
|
+
y = y.T
|
|
419
|
+
else:
|
|
420
|
+
raise ValueError('Unknown var parameter specified')
|
|
421
|
+
|
|
422
|
+
nobs = x.shape[1]
|
|
423
|
+
|
|
424
|
+
# Subtract mean(a, axis=1) from a
|
|
425
|
+
def submean(a): return a - np.matrix(np.mean(a, axis=1)).T
|
|
426
|
+
|
|
427
|
+
cmat = (np.dot(submean(x), submean(y).T) / (nobs - 1)) / \
|
|
428
|
+
np.dot(np.matrix(np.std(x, axis=1, ddof=1)).T,
|
|
429
|
+
np.matrix(np.std(y, axis=1, ddof=1)))
|
|
430
|
+
|
|
431
|
+
return np.array(cmat)
|
|
File without changes
|