amica-python 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amica_python-0.1.0/LICENSE +25 -0
- amica_python-0.1.0/PKG-INFO +196 -0
- amica_python-0.1.0/README.md +160 -0
- amica_python-0.1.0/pyproject.toml +97 -0
- amica_python-0.1.0/setup.cfg +4 -0
- amica_python-0.1.0/src/amica/__init__.py +5 -0
- amica_python-0.1.0/src/amica/_batching.py +194 -0
- amica_python-0.1.0/src/amica/_newton.py +77 -0
- amica_python-0.1.0/src/amica/_sklearn_interface.py +387 -0
- amica_python-0.1.0/src/amica/_types.py +44 -0
- amica_python-0.1.0/src/amica/conftest.py +30 -0
- amica_python-0.1.0/src/amica/constants.py +47 -0
- amica_python-0.1.0/src/amica/core.py +1165 -0
- amica_python-0.1.0/src/amica/datasets.py +15 -0
- amica_python-0.1.0/src/amica/kernels.py +1308 -0
- amica_python-0.1.0/src/amica/linalg.py +349 -0
- amica_python-0.1.0/src/amica/state.py +385 -0
- amica_python-0.1.0/src/amica/tests/test_amica.py +497 -0
- amica_python-0.1.0/src/amica/utils/__init__.py +36 -0
- amica_python-0.1.0/src/amica/utils/_logging.py +64 -0
- amica_python-0.1.0/src/amica/utils/_progress.py +34 -0
- amica_python-0.1.0/src/amica/utils/_verbose.py +14 -0
- amica_python-0.1.0/src/amica/utils/fetch.py +274 -0
- amica_python-0.1.0/src/amica/utils/fortran.py +387 -0
- amica_python-0.1.0/src/amica/utils/imports.py +46 -0
- amica_python-0.1.0/src/amica/utils/mne.py +74 -0
- amica_python-0.1.0/src/amica/utils/parallel.py +72 -0
- amica_python-0.1.0/src/amica/utils/simulation.py +36 -0
- amica_python-0.1.0/src/amica/utils/tests/test_fetch.py +9 -0
- amica_python-0.1.0/src/amica/utils/tests/test_fortran.py +47 -0
- amica_python-0.1.0/src/amica/utils/tests/test_imports.py +0 -0
- amica_python-0.1.0/src/amica/utils/tests/test_logger.py +29 -0
- amica_python-0.1.0/src/amica/utils/tests/test_mne.py +27 -0
- amica_python-0.1.0/src/amica_python.egg-info/PKG-INFO +196 -0
- amica_python-0.1.0/src/amica_python.egg-info/SOURCES.txt +36 -0
- amica_python-0.1.0/src/amica_python.egg-info/dependency_links.txt +1 -0
- amica_python-0.1.0/src/amica_python.egg-info/requires.txt +31 -0
- amica_python-0.1.0/src/amica_python.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
BSD 2-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2015-2020, Jason Palmer and contributors
|
|
4
|
+
All rights reserved.
|
|
5
|
+
|
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
|
8
|
+
|
|
9
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
10
|
+
list of conditions and the following disclaimer.
|
|
11
|
+
|
|
12
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
and/or other materials provided with the distribution.
|
|
15
|
+
|
|
16
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
17
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
19
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
20
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
21
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
22
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
23
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
24
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
25
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: amica-python
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Adaptive Mixture ICA in Python
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: loguru
|
|
9
|
+
Requires-Dist: rich
|
|
10
|
+
Requires-Dist: pooch>=1.5
|
|
11
|
+
Requires-Dist: psutil
|
|
12
|
+
Requires-Dist: numpy>=2.2.6
|
|
13
|
+
Requires-Dist: scikit-learn>=1.7.0
|
|
14
|
+
Provides-Extra: torch-cpu
|
|
15
|
+
Requires-Dist: torch; extra == "torch-cpu"
|
|
16
|
+
Provides-Extra: torch-cuda
|
|
17
|
+
Requires-Dist: torch; extra == "torch-cuda"
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest; extra == "dev"
|
|
20
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-timeout; extra == "dev"
|
|
22
|
+
Requires-Dist: matplotlib; extra == "dev"
|
|
23
|
+
Requires-Dist: mne; extra == "dev"
|
|
24
|
+
Requires-Dist: ruff; extra == "dev"
|
|
25
|
+
Provides-Extra: doc
|
|
26
|
+
Requires-Dist: sphinx<8.2; extra == "doc"
|
|
27
|
+
Requires-Dist: shibuya; extra == "doc"
|
|
28
|
+
Requires-Dist: sphinx-gallery; extra == "doc"
|
|
29
|
+
Requires-Dist: numpydoc; extra == "doc"
|
|
30
|
+
Requires-Dist: sphinx-design; extra == "doc"
|
|
31
|
+
Requires-Dist: sphinxcontrib-bibtex; extra == "doc"
|
|
32
|
+
Requires-Dist: sphinx-copybutton; extra == "doc"
|
|
33
|
+
Requires-Dist: healpy; extra == "doc"
|
|
34
|
+
Requires-Dist: pandas; extra == "doc"
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
|
|
37
|
+
[](https://codecov.io/github/scott-huberty/amica-python)
|
|
38
|
+
[](https://github.com/scott-huberty/amica-python/actions/workflows/ci.yaml)
|
|
39
|
+
[](https://dl.circleci.com/status-badge/redirect/gh/scott-huberty/amica-python/tree/main)
|
|
40
|
+
[](https://github.com/astral-sh/ruff)
|
|
41
|
+
|
|
42
|
+
# AMICA-Python
|
|
43
|
+
### Yes, it's fast.
|
|
44
|
+
|
|
45
|
+
A Python implementation of the [AMICA](https://sccn.ucsd.edu/~jason/amica_a.pdf) (Adaptive Mixture Independent Component Analysis) algorithm for blind source separation, that was originally [developed in FORTRAN](https://github.com/sccn/amica) by Jason Palmer at the Swartz Center for Computational Neuroscience (SCCN).
|
|
46
|
+
|
|
47
|
+
AMICA-Python is pre-alpha but is tested against the Fortran implementation and is ready for test driving.
|
|
48
|
+
|
|
49
|
+
| Python | Fortran |
|
|
50
|
+
|--------|---------|
|
|
51
|
+
| <img src="https://raw.githubusercontent.com/scott-huberty/amica-python/main/docs/source/_static/amica-python.gif" width=400px /> | <img src="https://raw.githubusercontent.com/scott-huberty/amica-python/main/docs/source/_static/amica-fortran.gif" width=400px /> |
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
## Installation
|
|
55
|
+
|
|
56
|
+
For now, AMICA-Python should be installed from source, and you will have to manually install
|
|
57
|
+
PyTorch (see below) yourself:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
git clone https://github.com/scott-huberty/amica-python.git
|
|
61
|
+
cd amica-python
|
|
62
|
+
pip install -e .
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
> [!IMPORTANT]
|
|
66
|
+
> You must install PyTorch before using AMICA-Python.
|
|
67
|
+
|
|
68
|
+
### Installing PyTorch
|
|
69
|
+
|
|
70
|
+
Depending on your system and preferences, you can install PyTorch with or without GPU support.
|
|
71
|
+
|
|
72
|
+
To install the standard version of PyTorch, run:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
python -m pip install torch
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
To install the CPU-only version of PyTorch, run:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
python -m pip install torch --index-url https://download.pytorch.org/whl/cu113
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Or for Conda users:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
conda install -c conda-forge pytorch-cpu
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
>[!WARNING]
|
|
91
|
+
> If you are using an Intel Mac, you cannot install Pytorch via pip, because there are no precompiled wheels for that platform. Instead, you must install PyTorch via Conda, e.g.:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
conda install pytorch -c conda-forge
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
If you use UV, you can also just install torch while installing AMICA-Python:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
uv pip install -e ".[torch-cpu]"
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
uv pip install -e ".[torch-cuda]"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Usage
|
|
108
|
+
|
|
109
|
+
AMICA-Python exposes a scikit-learn style interface. Here is an example of how to use it:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
import numpy as np
|
|
113
|
+
from scipy import signal
|
|
114
|
+
from amica import AMICA
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
rng = np.random.default_rng(0)
|
|
118
|
+
n_samples = 2000
|
|
119
|
+
time = np.linspace(0, 8, n_samples)
|
|
120
|
+
|
|
121
|
+
s1 = np.sin(2 * time) # Sinusoidal
|
|
122
|
+
s2 = np.sign(np.sin(3 * time)) # Square wave
|
|
123
|
+
s3 = signal.sawtooth(2 * np.pi * time) # Sawtooth
|
|
124
|
+
|
|
125
|
+
S = np.c_[s1, s2, s3]
|
|
126
|
+
S += 0.2 * rng.standard_normal(S.shape) # Add noise
|
|
127
|
+
S /= S.std(axis=0) # Standardize
|
|
128
|
+
|
|
129
|
+
A = np.array([[1, 1, 1],
|
|
130
|
+
[0.5, 2, 1.0],
|
|
131
|
+
[1.5, 1.0, 2.0]]) # Mixing matrix
|
|
132
|
+
|
|
133
|
+
X = S @ A.T # Observed mixtures
|
|
134
|
+
|
|
135
|
+
ica = AMICA(random_state=0)
|
|
136
|
+
X_new = ica.fit_transform(X)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
<img src="https://scott-huberty.github.io/amica-python/_images/sphx_glr_plot_ica_blind_source_separation_001.png" alt="AMICA-Python vs FastICA outputs" width="50%" style="display: block; margin: 0 auto;"/>
|
|
140
|
+
|
|
141
|
+
### GPU acceleration
|
|
142
|
+
|
|
143
|
+
If PyTorch was installed with CUDA support, you can fit AMICA on GPU:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
ica = AMICA(device='cuda', random_state=0)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
<br/>
|
|
150
|
+
|
|
151
|
+
For more examples and documentation, please see the [documentation](https://scott-huberty.github.io/amica-python/).
|
|
152
|
+
|
|
153
|
+
## What is AMICA?
|
|
154
|
+
|
|
155
|
+
AMICA is composed of two main ideas, which are hinted at by the name and the title of the original paper:
|
|
156
|
+
*AMICA: An Adaptive Mixture of Independent Component Analyzers with Shared Components*.
|
|
157
|
+
|
|
158
|
+
#### 1. *Adaptive Mixture* ICA
|
|
159
|
+
|
|
160
|
+
Standard ICA assumes each source is independent and *non-Gaussian*. Extended Infomax ICA
|
|
161
|
+
improves on this by handling both *sub-Gaussian* and *super-Gaussian* sources. AMICA goes
|
|
162
|
+
further by modeling each source as a *mixture of multiple Gaussians*. This flexibility
|
|
163
|
+
lets AMICA represent virtually any source shape - super-Gaussian, sub-Gaussian,
|
|
164
|
+
or even some funky bimodal distribution:
|
|
165
|
+
|
|
166
|
+
<img src="docs/source/_static/GMM.png" alt="Source distributions modeled by AMICA" width="25%"/>
|
|
167
|
+
|
|
168
|
+
In practice, the authors argue that this leads to a more accurate
|
|
169
|
+
approximation of the source signals.
|
|
170
|
+
|
|
171
|
+
#### 2. *Shared Components*
|
|
172
|
+
|
|
173
|
+
AMICA can learn multiple ICA decompositions (i.e. models). This is a work around to the assumption of ICA that the sources are
|
|
174
|
+
stationary (they do not change over time). AMICA will
|
|
175
|
+
decide which model best explains the data at each sample, effectively allowing
|
|
176
|
+
the sources to change over time. The "shared components" part of the paper title refers
|
|
177
|
+
to AMICA's ability to allow the various ICA models to share some components (i.e. sources)
|
|
178
|
+
between them, to reduce computational load.
|
|
179
|
+
|
|
180
|
+
# What does AMICA-Python implement?
|
|
181
|
+
|
|
182
|
+
In short, AMICA-Python implements point 1 above (Adaptive Mixture ICA),
|
|
183
|
+
but does not implement point 2 (running multiple ICA models simultaneously).
|
|
184
|
+
|
|
185
|
+
AMICA-Python is powered by [Torch](https://pytorch.org/) and wrapped in an easy-to-use [scikit-learn](https://scikit-learn.org/stable/) style interface.
|
|
186
|
+
|
|
187
|
+
The outputs are numerically tested against the original FORTRAN implementation to ensure correctness and minimize bugs.
|
|
188
|
+
|
|
189
|
+
# What wasn't implemented?
|
|
190
|
+
|
|
191
|
+
- The ability to model multiple ICA decompositions simultaneously.
|
|
192
|
+
- The ability to reject unlikely samples based on a thresholded log-likelihood (in the
|
|
193
|
+
FORTRAN implementation, this is a strategy to deal with artifacts in the data).
|
|
194
|
+
- AMICA-Python does not expose all the hyper-parameters available in the original FORTRAN implementation.
|
|
195
|
+
Instead I have tried to pick sensible defaults that should work well in most cases,
|
|
196
|
+
thus reducing the complexity of the interface.
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
[](https://codecov.io/github/scott-huberty/amica-python)
|
|
2
|
+
[](https://github.com/scott-huberty/amica-python/actions/workflows/ci.yaml)
|
|
3
|
+
[](https://dl.circleci.com/status-badge/redirect/gh/scott-huberty/amica-python/tree/main)
|
|
4
|
+
[](https://github.com/astral-sh/ruff)
|
|
5
|
+
|
|
6
|
+
# AMICA-Python
|
|
7
|
+
### Yes, it's fast.
|
|
8
|
+
|
|
9
|
+
A Python implementation of the [AMICA](https://sccn.ucsd.edu/~jason/amica_a.pdf) (Adaptive Mixture Independent Component Analysis) algorithm for blind source separation, that was originally [developed in FORTRAN](https://github.com/sccn/amica) by Jason Palmer at the Swartz Center for Computational Neuroscience (SCCN).
|
|
10
|
+
|
|
11
|
+
AMICA-Python is pre-alpha but is tested against the Fortran implementation and is ready for test driving.
|
|
12
|
+
|
|
13
|
+
| Python | Fortran |
|
|
14
|
+
|--------|---------|
|
|
15
|
+
| <img src="https://raw.githubusercontent.com/scott-huberty/amica-python/main/docs/source/_static/amica-python.gif" width=400px /> | <img src="https://raw.githubusercontent.com/scott-huberty/amica-python/main/docs/source/_static/amica-fortran.gif" width=400px /> |
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
For now, AMICA-Python should be installed from source, and you will have to manually install
|
|
21
|
+
PyTorch (see below) yourself:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
git clone https://github.com/scott-huberty/amica-python.git
|
|
25
|
+
cd amica-python
|
|
26
|
+
pip install -e .
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
> [!IMPORTANT]
|
|
30
|
+
> You must install PyTorch before using AMICA-Python.
|
|
31
|
+
|
|
32
|
+
### Installing PyTorch
|
|
33
|
+
|
|
34
|
+
Depending on your system and preferences, you can install PyTorch with or without GPU support.
|
|
35
|
+
|
|
36
|
+
To install the standard version of PyTorch, run:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
python -m pip install torch
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
To install the CPU-only version of PyTorch, run:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
python -m pip install torch --index-url https://download.pytorch.org/whl/cu113
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Or for Conda users:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
conda install -c conda-forge pytorch-cpu
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
>[!WARNING]
|
|
55
|
+
> If you are using an Intel Mac, you cannot install Pytorch via pip, because there are no precompiled wheels for that platform. Instead, you must install PyTorch via Conda, e.g.:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
conda install pytorch -c conda-forge
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
If you use UV, you can also just install torch while installing AMICA-Python:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
uv pip install -e ".[torch-cpu]"
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
uv pip install -e ".[torch-cuda]"
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Usage
|
|
72
|
+
|
|
73
|
+
AMICA-Python exposes a scikit-learn style interface. Here is an example of how to use it:
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
import numpy as np
|
|
77
|
+
from scipy import signal
|
|
78
|
+
from amica import AMICA
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
rng = np.random.default_rng(0)
|
|
82
|
+
n_samples = 2000
|
|
83
|
+
time = np.linspace(0, 8, n_samples)
|
|
84
|
+
|
|
85
|
+
s1 = np.sin(2 * time) # Sinusoidal
|
|
86
|
+
s2 = np.sign(np.sin(3 * time)) # Square wave
|
|
87
|
+
s3 = signal.sawtooth(2 * np.pi * time) # Sawtooth
|
|
88
|
+
|
|
89
|
+
S = np.c_[s1, s2, s3]
|
|
90
|
+
S += 0.2 * rng.standard_normal(S.shape) # Add noise
|
|
91
|
+
S /= S.std(axis=0) # Standardize
|
|
92
|
+
|
|
93
|
+
A = np.array([[1, 1, 1],
|
|
94
|
+
[0.5, 2, 1.0],
|
|
95
|
+
[1.5, 1.0, 2.0]]) # Mixing matrix
|
|
96
|
+
|
|
97
|
+
X = S @ A.T # Observed mixtures
|
|
98
|
+
|
|
99
|
+
ica = AMICA(random_state=0)
|
|
100
|
+
X_new = ica.fit_transform(X)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
<img src="https://scott-huberty.github.io/amica-python/_images/sphx_glr_plot_ica_blind_source_separation_001.png" alt="AMICA-Python vs FastICA outputs" width="50%" style="display: block; margin: 0 auto;"/>
|
|
104
|
+
|
|
105
|
+
### GPU acceleration
|
|
106
|
+
|
|
107
|
+
If PyTorch was installed with CUDA support, you can fit AMICA on GPU:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
ica = AMICA(device='cuda', random_state=0)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
<br/>
|
|
114
|
+
|
|
115
|
+
For more examples and documentation, please see the [documentation](https://scott-huberty.github.io/amica-python/).
|
|
116
|
+
|
|
117
|
+
## What is AMICA?
|
|
118
|
+
|
|
119
|
+
AMICA is composed of two main ideas, which are hinted at by the name and the title of the original paper:
|
|
120
|
+
*AMICA: An Adaptive Mixture of Independent Component Analyzers with Shared Components*.
|
|
121
|
+
|
|
122
|
+
#### 1. *Adaptive Mixture* ICA
|
|
123
|
+
|
|
124
|
+
Standard ICA assumes each source is independent and *non-Gaussian*. Extended Infomax ICA
|
|
125
|
+
improves on this by handling both *sub-Gaussian* and *super-Gaussian* sources. AMICA goes
|
|
126
|
+
further by modeling each source as a *mixture of multiple Gaussians*. This flexibility
|
|
127
|
+
lets AMICA represent virtually any source shape - super-Gaussian, sub-Gaussian,
|
|
128
|
+
or even some funky bimodal distribution:
|
|
129
|
+
|
|
130
|
+
<img src="docs/source/_static/GMM.png" alt="Source distributions modeled by AMICA" width="25%"/>
|
|
131
|
+
|
|
132
|
+
In practice, the authors argue that this leads to a more accurate
|
|
133
|
+
approximation of the source signals.
|
|
134
|
+
|
|
135
|
+
#### 2. *Shared Components*
|
|
136
|
+
|
|
137
|
+
AMICA can learn multiple ICA decompositions (i.e. models). This is a work around to the assumption of ICA that the sources are
|
|
138
|
+
stationary (they do not change over time). AMICA will
|
|
139
|
+
decide which model best explains the data at each sample, effectively allowing
|
|
140
|
+
the sources to change over time. The "shared components" part of the paper title refers
|
|
141
|
+
to AMICA's ability to allow the various ICA models to share some components (i.e. sources)
|
|
142
|
+
between them, to reduce computational load.
|
|
143
|
+
|
|
144
|
+
# What does AMICA-Python implement?
|
|
145
|
+
|
|
146
|
+
In short, AMICA-Python implements point 1 above (Adaptive Mixture ICA),
|
|
147
|
+
but does not implement point 2 (running multiple ICA models simultaneously).
|
|
148
|
+
|
|
149
|
+
AMICA-Python is powered by [Torch](https://pytorch.org/) and wrapped in an easy-to-use [scikit-learn](https://scikit-learn.org/stable/) style interface.
|
|
150
|
+
|
|
151
|
+
The outputs are numerically tested against the original FORTRAN implementation to ensure correctness and minimize bugs.
|
|
152
|
+
|
|
153
|
+
# What wasn't implemented?
|
|
154
|
+
|
|
155
|
+
- The ability to model multiple ICA decompositions simultaneously.
|
|
156
|
+
- The ability to reject unlikely samples based on a thresholded log-likelihood (in the
|
|
157
|
+
FORTRAN implementation, this is a strategy to deal with artifacts in the data).
|
|
158
|
+
- AMICA-Python does not expose all the hyper-parameters available in the original FORTRAN implementation.
|
|
159
|
+
Instead I have tried to pick sensible defaults that should work well in most cases,
|
|
160
|
+
thus reducing the complexity of the interface.
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "amica-python"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Adaptive Mixture ICA in Python"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"loguru",
|
|
13
|
+
"rich",
|
|
14
|
+
"pooch >= 1.5",
|
|
15
|
+
"psutil",
|
|
16
|
+
"numpy>=2.2.6",
|
|
17
|
+
"scikit-learn>=1.7.0",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[project.optional-dependencies]
|
|
21
|
+
torch-cpu = ["torch"]
|
|
22
|
+
torch-cuda = ["torch"]
|
|
23
|
+
dev = ["pytest", "pytest-cov", "pytest-timeout", "matplotlib", "mne", "ruff"]
|
|
24
|
+
doc = [
|
|
25
|
+
"sphinx<8.2",
|
|
26
|
+
"shibuya",
|
|
27
|
+
"sphinx-gallery",
|
|
28
|
+
"numpydoc",
|
|
29
|
+
"sphinx-design",
|
|
30
|
+
"sphinxcontrib-bibtex",
|
|
31
|
+
"sphinx-copybutton",
|
|
32
|
+
# For Tutorials
|
|
33
|
+
"healpy",
|
|
34
|
+
"pandas", # Needed to load the MNIST dataset example
|
|
35
|
+
# "smica @ git+https://github.com/scott-huberty/smica.git",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[tool.setuptools]
|
|
39
|
+
package-dir = {"" = "src"}
|
|
40
|
+
|
|
41
|
+
[tool.setuptools.packages.find]
|
|
42
|
+
where = ["src"]
|
|
43
|
+
include = ["amica*"]
|
|
44
|
+
exclude = ["amica.tests*"]
|
|
45
|
+
|
|
46
|
+
[tool.ruff.lint]
|
|
47
|
+
select = ["A", "B006", "D", "E", "F", "I", "UP", "UP031", "W"]
|
|
48
|
+
|
|
49
|
+
[tool.ruff.lint.pydocstyle]
|
|
50
|
+
convention = "numpy"
|
|
51
|
+
|
|
52
|
+
[tool.ruff.lint.per-file-ignores]
|
|
53
|
+
"src/amica/_types.py" = ["E501"] # Line too long
|
|
54
|
+
"src/amica/**/__init__.py" = ["D104"] # Missing docstring in public package
|
|
55
|
+
"src/amica/**/tests/*.py" = ["D100"] # Missing docstring in public module
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
[[tool.uv.index]]
|
|
59
|
+
name = "pytorch_cpu"
|
|
60
|
+
url = "https://download.pytorch.org/whl/cpu"
|
|
61
|
+
explicit = true # only fetch from this index if we explicitly map a package there.
|
|
62
|
+
|
|
63
|
+
[tool.uv.sources]
|
|
64
|
+
torch = { index = "pytorch_cpu" }
|
|
65
|
+
markupsafe = { index = "pytorch_cpu" }
|
|
66
|
+
|
|
67
|
+
[tool.pytest.ini_options]
|
|
68
|
+
addopts = [
|
|
69
|
+
"--cov=amica",
|
|
70
|
+
"--cov-branch",
|
|
71
|
+
"--cov-report=xml",
|
|
72
|
+
"--cov-report=term",
|
|
73
|
+
"--ignore=src/amica/tests/test_kernels.py",
|
|
74
|
+
]
|
|
75
|
+
markers = [
|
|
76
|
+
"sklearn_api: Tests that validate Scikit-Learn API conformance",
|
|
77
|
+
"slow: Marks tests as slow (deselect with `pytest -m not slow`)"
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
[tool.coverage.run]
|
|
81
|
+
source = ["amica"] # Source files to measure.
|
|
82
|
+
branch = true # Add branch coverage to the analysis.
|
|
83
|
+
omit = [
|
|
84
|
+
"*/tests/*",
|
|
85
|
+
".venv/*",
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
[tool.coverage.report]
|
|
89
|
+
exclude_lines = [
|
|
90
|
+
"pragma: no cover",
|
|
91
|
+
"if TYPE_CHECKING:",
|
|
92
|
+
]
|
|
93
|
+
show_missing = true
|
|
94
|
+
skip_covered = true
|
|
95
|
+
|
|
96
|
+
[tool.coverage.xml]
|
|
97
|
+
output = "coverage.xml"
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterator
|
|
4
|
+
from typing import Union
|
|
5
|
+
from warnings import warn
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import psutil
|
|
9
|
+
import torch
|
|
10
|
+
|
|
11
|
+
ArrayLike2D = Union[np.ndarray, "np.typing.NDArray[np.floating]"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BatchLoader:
|
|
15
|
+
"""Iterate over an array in fixed-size batches of data along a chosen axis.
|
|
16
|
+
|
|
17
|
+
We hand rolled this instead of using DataLoader because 1) we want to yield
|
|
18
|
+
slices of input array (i.e. a view), and 2) return the indices as
|
|
19
|
+
a slice object. DataLoader would internally convert the slice into a tensor
|
|
20
|
+
of indices.
|
|
21
|
+
|
|
22
|
+
Example (AMICA shape):
|
|
23
|
+
X: (n_samples, n_features)
|
|
24
|
+
it = BatchLoader(X, axis=0, batch_size=4096)
|
|
25
|
+
for X_blk, sl in it:
|
|
26
|
+
# X_blk is X[sl, :] where sl is slice(start, end)
|
|
27
|
+
...
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, X: ArrayLike2D, axis: int, batch_size: int | None = None):
|
|
31
|
+
# Validate inputs
|
|
32
|
+
cls_name = self.__class__.__name__
|
|
33
|
+
if not isinstance(X, torch.Tensor):
|
|
34
|
+
raise TypeError(f"{cls_name} expects a torch.Tensor") # pragma: no cover
|
|
35
|
+
if X.ndim < 1:
|
|
36
|
+
raise ValueError(
|
|
37
|
+
f"{cls_name} expects an array with at least 1 dimension"
|
|
38
|
+
) # pragma: no cover
|
|
39
|
+
self.X = X
|
|
40
|
+
self.axis = axis
|
|
41
|
+
|
|
42
|
+
if self.axis < 0:
|
|
43
|
+
self.axis += X.ndim
|
|
44
|
+
if not (0 <= self.axis < X.ndim):
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"axis {self.axis} out of bounds for array with ndim={X.ndim}"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Determine batching parameters
|
|
50
|
+
n = X.shape[self.axis]
|
|
51
|
+
start = 0
|
|
52
|
+
stop = n
|
|
53
|
+
if batch_size is None:
|
|
54
|
+
# Treat as single chunk spanning [start:stop]
|
|
55
|
+
batch_size = stop
|
|
56
|
+
|
|
57
|
+
# Validate parameters
|
|
58
|
+
assert (0 <= start <= n), f"start {start} out of range [0, {n}]"
|
|
59
|
+
assert (0 <= stop <= n), f"stop {stop} out of range [0, {n}]"
|
|
60
|
+
assert start <= stop, f"start {start} must be <= stop {stop}"
|
|
61
|
+
if batch_size < 0:
|
|
62
|
+
raise ValueError(f"batch_size must be positive. Got {batch_size}.")
|
|
63
|
+
if batch_size > X.shape[self.axis]:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"batch_size {batch_size} exceeds data size {X.shape[self.axis]} "
|
|
66
|
+
f"along axis {self.axis}."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Store parameters
|
|
70
|
+
self.start = start
|
|
71
|
+
self.stop = stop
|
|
72
|
+
self.batch_size = int(batch_size)
|
|
73
|
+
|
|
74
|
+
def __getitem__(self, idx: int) -> torch.Tensor:
|
|
75
|
+
start = self.start + idx * self.batch_size
|
|
76
|
+
stop = min(start + self.batch_size, self.stop)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
idx = [slice(None)] * self.X.ndim
|
|
80
|
+
idx[self.axis] = slice(start, stop)
|
|
81
|
+
return self.X[tuple(idx)]
|
|
82
|
+
|
|
83
|
+
def __iter__(self) -> Iterator[tuple[np.ndarray, slice]]:
|
|
84
|
+
axis = self.axis
|
|
85
|
+
start = self.start
|
|
86
|
+
stop = self.stop
|
|
87
|
+
step = self.batch_size
|
|
88
|
+
|
|
89
|
+
idx = [slice(None)] * self.X.ndim
|
|
90
|
+
assert -((stop - start) // -step) == len(self) # sanity check
|
|
91
|
+
for s in range(start, stop, step):
|
|
92
|
+
e = min(s + step, stop)
|
|
93
|
+
batch_slice = slice(s, e)
|
|
94
|
+
idx[axis] = batch_slice
|
|
95
|
+
yield self.X[tuple(idx)], batch_slice
|
|
96
|
+
|
|
97
|
+
def __len__(self) -> int:
|
|
98
|
+
return (self.X.shape[self.axis] + self.batch_size - 1) // self.batch_size
|
|
99
|
+
|
|
100
|
+
def __repr__(self) -> str:
|
|
101
|
+
return (
|
|
102
|
+
f"{self.__class__.__name__}(Data shape: {self.X.shape}, "
|
|
103
|
+
f"Batched axis: {self.axis}, batch_size: {self.batch_size}, "
|
|
104
|
+
f"n_batches: {len(self)})"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def choose_batch_size(
|
|
108
|
+
*,
|
|
109
|
+
N: int,
|
|
110
|
+
n_comps: int,
|
|
111
|
+
n_mix: int,
|
|
112
|
+
n_models: int = 1,
|
|
113
|
+
dtype: np.dtype = np.float64,
|
|
114
|
+
memory_fraction: float = 0.25, # use up to 25% of available memory
|
|
115
|
+
memory_cap: float = 1.5 * 1024**3, # 1.5 GB absolute ceiling
|
|
116
|
+
) -> int:
|
|
117
|
+
"""
|
|
118
|
+
Choose batch size for processing data in chunks.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
N : int
|
|
123
|
+
Total number of samples.
|
|
124
|
+
n_comps : int
|
|
125
|
+
Number of components to be learned in the model, e.g. size of the n_components
|
|
126
|
+
dimension of the data.
|
|
127
|
+
n_mix : int
|
|
128
|
+
Number of mixture components per source/component to be learned in the model.
|
|
129
|
+
dtype : np.dtype, optional
|
|
130
|
+
Data type of the input data, by default np.float64.
|
|
131
|
+
memory_cap : float, optional
|
|
132
|
+
Maximum memory (in bytes) to be used for processing, by default
|
|
133
|
+
``1.5 * 1024**3`` (1.5 GB).
|
|
134
|
+
|
|
135
|
+
Notes
|
|
136
|
+
-----
|
|
137
|
+
The batch size is primarily determined by the estimated size of hot buffers (e.g.
|
|
138
|
+
y, z, fp, ufp), which scale with the size of n_samples:
|
|
139
|
+
- One array of shape (N,):
|
|
140
|
+
- loglik
|
|
141
|
+
- Two arrays of shape (N, n_models):
|
|
142
|
+
- modloglik
|
|
143
|
+
- v (model responsibilities)
|
|
144
|
+
- Two arrays of shape (N, n_comps)
|
|
145
|
+
- b
|
|
146
|
+
- g
|
|
147
|
+
- Five arrays of shape (N, n_comps, n_mix): u, y, z, fp, ufp
|
|
148
|
+
- u (mixture responsibilities)
|
|
149
|
+
- y
|
|
150
|
+
- z
|
|
151
|
+
- fp
|
|
152
|
+
- ufp
|
|
153
|
+
"""
|
|
154
|
+
dtype_size = np.dtype(dtype).itemsize
|
|
155
|
+
# per-sample cost across pre-allocated buffers
|
|
156
|
+
bytes_per_sample = (
|
|
157
|
+
1 # loglik
|
|
158
|
+
+ 2 * n_models # modloglik, v
|
|
159
|
+
+ 2 * n_comps # b, g
|
|
160
|
+
+ 5 * n_comps * n_mix # fp, u, ufp, y, z,
|
|
161
|
+
) * dtype_size
|
|
162
|
+
# Plus small headroom for intermediates
|
|
163
|
+
bytes_per_sample = int(bytes_per_sample * 1.2)
|
|
164
|
+
|
|
165
|
+
# Pick memory budget
|
|
166
|
+
try:
|
|
167
|
+
hard_cap = 4 * 1024**3 # 4 GiB (avoid runaway memory use)
|
|
168
|
+
avail_mem = psutil.virtual_memory().available
|
|
169
|
+
mem_cap = min(avail_mem * memory_fraction, hard_cap)
|
|
170
|
+
except Exception:
|
|
171
|
+
mem_cap = memory_cap # fallback to user-specified cap
|
|
172
|
+
|
|
173
|
+
max_batch_size = mem_cap // bytes_per_sample
|
|
174
|
+
|
|
175
|
+
# Ensure at least 1 sample. This should only trigger if n_comps and n_mix are huge.
|
|
176
|
+
if max_batch_size < 1:
|
|
177
|
+
raise MemoryError(
|
|
178
|
+
f"Cannot fit even 1 sample within memory cap of "
|
|
179
|
+
f"{mem_cap / 1024**3:.2f} GiB. "
|
|
180
|
+
f"Per-sample memory cost is {bytes_per_sample / 1024**3:.2f} GB."
|
|
181
|
+
)
|
|
182
|
+
batch_size = int(min(N, max_batch_size))
|
|
183
|
+
|
|
184
|
+
# Heuristic floor, we don't want absurdly small chunks or chunks that are too
|
|
185
|
+
# small relative to the model complexity (n_comps)
|
|
186
|
+
# This heuristic works well for typical ICA regimes, where n_comps is < 256
|
|
187
|
+
min_batch_size = max(8192, n_comps * 32) # at least 32 samples per component
|
|
188
|
+
min_batch_size = min(min_batch_size, N) # Cannot exceed N
|
|
189
|
+
if batch_size < min_batch_size:
|
|
190
|
+
warn(
|
|
191
|
+
f"Warning: To stay within the memory cap, batch size is {batch_size} "
|
|
192
|
+
f"samples, which is below the recommended minimum of {min_batch_size}."
|
|
193
|
+
)
|
|
194
|
+
return batch_size
|