amica-python 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. amica_python-0.1.0/LICENSE +25 -0
  2. amica_python-0.1.0/PKG-INFO +196 -0
  3. amica_python-0.1.0/README.md +160 -0
  4. amica_python-0.1.0/pyproject.toml +97 -0
  5. amica_python-0.1.0/setup.cfg +4 -0
  6. amica_python-0.1.0/src/amica/__init__.py +5 -0
  7. amica_python-0.1.0/src/amica/_batching.py +194 -0
  8. amica_python-0.1.0/src/amica/_newton.py +77 -0
  9. amica_python-0.1.0/src/amica/_sklearn_interface.py +387 -0
  10. amica_python-0.1.0/src/amica/_types.py +44 -0
  11. amica_python-0.1.0/src/amica/conftest.py +30 -0
  12. amica_python-0.1.0/src/amica/constants.py +47 -0
  13. amica_python-0.1.0/src/amica/core.py +1165 -0
  14. amica_python-0.1.0/src/amica/datasets.py +15 -0
  15. amica_python-0.1.0/src/amica/kernels.py +1308 -0
  16. amica_python-0.1.0/src/amica/linalg.py +349 -0
  17. amica_python-0.1.0/src/amica/state.py +385 -0
  18. amica_python-0.1.0/src/amica/tests/test_amica.py +497 -0
  19. amica_python-0.1.0/src/amica/utils/__init__.py +36 -0
  20. amica_python-0.1.0/src/amica/utils/_logging.py +64 -0
  21. amica_python-0.1.0/src/amica/utils/_progress.py +34 -0
  22. amica_python-0.1.0/src/amica/utils/_verbose.py +14 -0
  23. amica_python-0.1.0/src/amica/utils/fetch.py +274 -0
  24. amica_python-0.1.0/src/amica/utils/fortran.py +387 -0
  25. amica_python-0.1.0/src/amica/utils/imports.py +46 -0
  26. amica_python-0.1.0/src/amica/utils/mne.py +74 -0
  27. amica_python-0.1.0/src/amica/utils/parallel.py +72 -0
  28. amica_python-0.1.0/src/amica/utils/simulation.py +36 -0
  29. amica_python-0.1.0/src/amica/utils/tests/test_fetch.py +9 -0
  30. amica_python-0.1.0/src/amica/utils/tests/test_fortran.py +47 -0
  31. amica_python-0.1.0/src/amica/utils/tests/test_imports.py +0 -0
  32. amica_python-0.1.0/src/amica/utils/tests/test_logger.py +29 -0
  33. amica_python-0.1.0/src/amica/utils/tests/test_mne.py +27 -0
  34. amica_python-0.1.0/src/amica_python.egg-info/PKG-INFO +196 -0
  35. amica_python-0.1.0/src/amica_python.egg-info/SOURCES.txt +36 -0
  36. amica_python-0.1.0/src/amica_python.egg-info/dependency_links.txt +1 -0
  37. amica_python-0.1.0/src/amica_python.egg-info/requires.txt +31 -0
  38. amica_python-0.1.0/src/amica_python.egg-info/top_level.txt +1 -0
@@ -0,0 +1,25 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2015-2020, Jason Palmer and contributors
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,196 @@
1
+ Metadata-Version: 2.4
2
+ Name: amica-python
3
+ Version: 0.1.0
4
+ Summary: Adaptive Mixture ICA in Python
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+ Requires-Dist: loguru
9
+ Requires-Dist: rich
10
+ Requires-Dist: pooch>=1.5
11
+ Requires-Dist: psutil
12
+ Requires-Dist: numpy>=2.2.6
13
+ Requires-Dist: scikit-learn>=1.7.0
14
+ Provides-Extra: torch-cpu
15
+ Requires-Dist: torch; extra == "torch-cpu"
16
+ Provides-Extra: torch-cuda
17
+ Requires-Dist: torch; extra == "torch-cuda"
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest; extra == "dev"
20
+ Requires-Dist: pytest-cov; extra == "dev"
21
+ Requires-Dist: pytest-timeout; extra == "dev"
22
+ Requires-Dist: matplotlib; extra == "dev"
23
+ Requires-Dist: mne; extra == "dev"
24
+ Requires-Dist: ruff; extra == "dev"
25
+ Provides-Extra: doc
26
+ Requires-Dist: sphinx<8.2; extra == "doc"
27
+ Requires-Dist: shibuya; extra == "doc"
28
+ Requires-Dist: sphinx-gallery; extra == "doc"
29
+ Requires-Dist: numpydoc; extra == "doc"
30
+ Requires-Dist: sphinx-design; extra == "doc"
31
+ Requires-Dist: sphinxcontrib-bibtex; extra == "doc"
32
+ Requires-Dist: sphinx-copybutton; extra == "doc"
33
+ Requires-Dist: healpy; extra == "doc"
34
+ Requires-Dist: pandas; extra == "doc"
35
+ Dynamic: license-file
36
+
37
+ [![codecov](https://codecov.io/github/scott-huberty/amica-python/graph/badge.svg?token=Gt7dvyE9mL)](https://codecov.io/github/scott-huberty/amica-python)
38
+ [![tests](https://github.com/scott-huberty/amica-python/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/scott-huberty/amica-python/actions/workflows/ci.yaml)
39
+ [![docs](https://img.shields.io/github/actions/workflow/status/scott-huberty/amica-python/circleci_redirect.yml?label=Docs)](https://dl.circleci.com/status-badge/redirect/gh/scott-huberty/amica-python/tree/main)
40
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
41
+
42
+ # AMICA-Python
43
+ ### Yes, it's fast.
44
+
45
+ A Python implementation of the [AMICA](https://sccn.ucsd.edu/~jason/amica_a.pdf) (Adaptive Mixture Independent Component Analysis) algorithm for blind source separation, that was originally [developed in FORTRAN](https://github.com/sccn/amica) by Jason Palmer at the Swartz Center for Computational Neuroscience (SCCN).
46
+
47
+ AMICA-Python is pre-alpha but is tested against the Fortran implementation and is ready for test driving.
48
+
49
+ | Python | Fortran |
50
+ |--------|---------|
51
+ | <img src="https://raw.githubusercontent.com/scott-huberty/amica-python/main/docs/source/_static/amica-python.gif" width=400px /> | <img src="https://raw.githubusercontent.com/scott-huberty/amica-python/main/docs/source/_static/amica-fortran.gif" width=400px /> |
52
+
53
+
54
+ ## Installation
55
+
56
+ For now, AMICA-Python should be installed from source, and you will have to manually install
57
+ PyTorch (see below) yourself:
58
+
59
+ ```bash
60
+ git clone https://github.com/scott-huberty/amica-python.git
61
+ cd amica-python
62
+ pip install -e .
63
+ ```
64
+
65
+ > [!IMPORTANT]
66
+ > You must install PyTorch before using AMICA-Python.
67
+
68
+ ### Installing PyTorch
69
+
70
+ Depending on your system and preferences, you can install PyTorch with or without GPU support.
71
+
72
+ To install the standard version of PyTorch, run:
73
+
74
+ ```bash
75
+ python -m pip install torch
76
+ ```
77
+
78
+ To install the CPU-only version of PyTorch, run:
79
+
80
+ ```bash
81
+ python -m pip install torch --index-url https://download.pytorch.org/whl/cu113
82
+ ```
83
+
84
+ Or for Conda users:
85
+
86
+ ```bash
87
+ conda install -c conda-forge pytorch-cpu
88
+ ```
89
+
90
+ >[!WARNING]
91
+ > If you are using an Intel Mac, you cannot install Pytorch via pip, because there are no precompiled wheels for that platform. Instead, you must install PyTorch via Conda, e.g.:
92
+
93
+ ```bash
94
+ conda install pytorch -c conda-forge
95
+ ```
96
+
97
+ If you use UV, you can also just install torch while installing AMICA-Python:
98
+
99
+ ```bash
100
+ uv pip install -e ".[torch-cpu]"
101
+ ```
102
+
103
+ ```bash
104
+ uv pip install -e ".[torch-cuda]"
105
+ ```
106
+
107
+ ## Usage
108
+
109
+ AMICA-Python exposes a scikit-learn style interface. Here is an example of how to use it:
110
+
111
+ ```python
112
+ import numpy as np
113
+ from scipy import signal
114
+ from amica import AMICA
115
+
116
+
117
+ rng = np.random.default_rng(0)
118
+ n_samples = 2000
119
+ time = np.linspace(0, 8, n_samples)
120
+
121
+ s1 = np.sin(2 * time) # Sinusoidal
122
+ s2 = np.sign(np.sin(3 * time)) # Square wave
123
+ s3 = signal.sawtooth(2 * np.pi * time) # Sawtooth
124
+
125
+ S = np.c_[s1, s2, s3]
126
+ S += 0.2 * rng.standard_normal(S.shape) # Add noise
127
+ S /= S.std(axis=0) # Standardize
128
+
129
+ A = np.array([[1, 1, 1],
130
+ [0.5, 2, 1.0],
131
+ [1.5, 1.0, 2.0]]) # Mixing matrix
132
+
133
+ X = S @ A.T # Observed mixtures
134
+
135
+ ica = AMICA(random_state=0)
136
+ X_new = ica.fit_transform(X)
137
+ ```
138
+
139
+ <img src="https://scott-huberty.github.io/amica-python/_images/sphx_glr_plot_ica_blind_source_separation_001.png" alt="AMICA-Python vs FastICA outputs" width="50%" style="display: block; margin: 0 auto;"/>
140
+
141
+ ### GPU acceleration
142
+
143
+ If PyTorch was installed with CUDA support, you can fit AMICA on GPU:
144
+
145
+ ```python
146
+ ica = AMICA(device='cuda', random_state=0)
147
+ ```
148
+
149
+ <br/>
150
+
151
+ For more examples and documentation, please see the [documentation](https://scott-huberty.github.io/amica-python/).
152
+
153
+ ## What is AMICA?
154
+
155
+ AMICA is composed of two main ideas, which are hinted at by the name and the title of the original paper:
156
+ *AMICA: An Adaptive Mixture of Independent Component Analyzers with Shared Components*.
157
+
158
+ #### 1. *Adaptive Mixture* ICA
159
+
160
+ Standard ICA assumes each source is independent and *non-Gaussian*. Extended Infomax ICA
161
+ improves on this by handling both *sub-Gaussian* and *super-Gaussian* sources. AMICA goes
162
+ further by modeling each source as a *mixture of multiple Gaussians*. This flexibility
163
+ lets AMICA represent virtually any source shape - super-Gaussian, sub-Gaussian,
164
+ or even some funky bimodal distribution:
165
+
166
+ <img src="docs/source/_static/GMM.png" alt="Source distributions modeled by AMICA" width="25%"/>
167
+
168
+ In practice, the authors argue that this leads to a more accurate
169
+ approximation of the source signals.
170
+
171
+ #### 2. *Shared Components*
172
+
173
+ AMICA can learn multiple ICA decompositions (i.e. models). This is a work around to the assumption of ICA that the sources are
174
+ stationary (they do not change over time). AMICA will
175
+ decide which model best explains the data at each sample, effectively allowing
176
+ the sources to change over time. The "shared components" part of the paper title refers
177
+ to AMICA's ability to allow the various ICA models to share some components (i.e. sources)
178
+ between them, to reduce computational load.
179
+
180
+ # What does AMICA-Python implement?
181
+
182
+ In short, AMICA-Python implements point 1 above (Adaptive Mixture ICA),
183
+ but does not implement point 2 (running multiple ICA models simultaneously).
184
+
185
+ AMICA-Python is powered by [Torch](https://pytorch.org/) and wrapped in an easy-to-use [scikit-learn](https://scikit-learn.org/stable/) style interface.
186
+
187
+ The outputs are numerically tested against the original FORTRAN implementation to ensure correctness and minimize bugs.
188
+
189
+ # What wasn't implemented?
190
+
191
+ - The ability to model multiple ICA decompositions simultaneously.
192
+ - The ability to reject unlikely samples based on a thresholded log-likelihood (in the
193
+ FORTRAN implementation, this is a strategy to deal with artifacts in the data).
194
+ - AMICA-Python does not expose all the hyper-parameters available in the original FORTRAN implementation.
195
+ Instead I have tried to pick sensible defaults that should work well in most cases,
196
+ thus reducing the complexity of the interface.
@@ -0,0 +1,160 @@
1
+ [![codecov](https://codecov.io/github/scott-huberty/amica-python/graph/badge.svg?token=Gt7dvyE9mL)](https://codecov.io/github/scott-huberty/amica-python)
2
+ [![tests](https://github.com/scott-huberty/amica-python/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/scott-huberty/amica-python/actions/workflows/ci.yaml)
3
+ [![docs](https://img.shields.io/github/actions/workflow/status/scott-huberty/amica-python/circleci_redirect.yml?label=Docs)](https://dl.circleci.com/status-badge/redirect/gh/scott-huberty/amica-python/tree/main)
4
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
5
+
6
+ # AMICA-Python
7
+ ### Yes, it's fast.
8
+
9
+ A Python implementation of the [AMICA](https://sccn.ucsd.edu/~jason/amica_a.pdf) (Adaptive Mixture Independent Component Analysis) algorithm for blind source separation, that was originally [developed in FORTRAN](https://github.com/sccn/amica) by Jason Palmer at the Swartz Center for Computational Neuroscience (SCCN).
10
+
11
+ AMICA-Python is pre-alpha but is tested against the Fortran implementation and is ready for test driving.
12
+
13
+ | Python | Fortran |
14
+ |--------|---------|
15
+ | <img src="https://raw.githubusercontent.com/scott-huberty/amica-python/main/docs/source/_static/amica-python.gif" width=400px /> | <img src="https://raw.githubusercontent.com/scott-huberty/amica-python/main/docs/source/_static/amica-fortran.gif" width=400px /> |
16
+
17
+
18
+ ## Installation
19
+
20
+ For now, AMICA-Python should be installed from source, and you will have to manually install
21
+ PyTorch (see below) yourself:
22
+
23
+ ```bash
24
+ git clone https://github.com/scott-huberty/amica-python.git
25
+ cd amica-python
26
+ pip install -e .
27
+ ```
28
+
29
+ > [!IMPORTANT]
30
+ > You must install PyTorch before using AMICA-Python.
31
+
32
+ ### Installing PyTorch
33
+
34
+ Depending on your system and preferences, you can install PyTorch with or without GPU support.
35
+
36
+ To install the standard version of PyTorch, run:
37
+
38
+ ```bash
39
+ python -m pip install torch
40
+ ```
41
+
42
+ To install the CPU-only version of PyTorch, run:
43
+
44
+ ```bash
45
+ python -m pip install torch --index-url https://download.pytorch.org/whl/cu113
46
+ ```
47
+
48
+ Or for Conda users:
49
+
50
+ ```bash
51
+ conda install -c conda-forge pytorch-cpu
52
+ ```
53
+
54
+ >[!WARNING]
55
+ > If you are using an Intel Mac, you cannot install Pytorch via pip, because there are no precompiled wheels for that platform. Instead, you must install PyTorch via Conda, e.g.:
56
+
57
+ ```bash
58
+ conda install pytorch -c conda-forge
59
+ ```
60
+
61
+ If you use UV, you can also just install torch while installing AMICA-Python:
62
+
63
+ ```bash
64
+ uv pip install -e ".[torch-cpu]"
65
+ ```
66
+
67
+ ```bash
68
+ uv pip install -e ".[torch-cuda]"
69
+ ```
70
+
71
+ ## Usage
72
+
73
+ AMICA-Python exposes a scikit-learn style interface. Here is an example of how to use it:
74
+
75
+ ```python
76
+ import numpy as np
77
+ from scipy import signal
78
+ from amica import AMICA
79
+
80
+
81
+ rng = np.random.default_rng(0)
82
+ n_samples = 2000
83
+ time = np.linspace(0, 8, n_samples)
84
+
85
+ s1 = np.sin(2 * time) # Sinusoidal
86
+ s2 = np.sign(np.sin(3 * time)) # Square wave
87
+ s3 = signal.sawtooth(2 * np.pi * time) # Sawtooth
88
+
89
+ S = np.c_[s1, s2, s3]
90
+ S += 0.2 * rng.standard_normal(S.shape) # Add noise
91
+ S /= S.std(axis=0) # Standardize
92
+
93
+ A = np.array([[1, 1, 1],
94
+ [0.5, 2, 1.0],
95
+ [1.5, 1.0, 2.0]]) # Mixing matrix
96
+
97
+ X = S @ A.T # Observed mixtures
98
+
99
+ ica = AMICA(random_state=0)
100
+ X_new = ica.fit_transform(X)
101
+ ```
102
+
103
+ <img src="https://scott-huberty.github.io/amica-python/_images/sphx_glr_plot_ica_blind_source_separation_001.png" alt="AMICA-Python vs FastICA outputs" width="50%" style="display: block; margin: 0 auto;"/>
104
+
105
+ ### GPU acceleration
106
+
107
+ If PyTorch was installed with CUDA support, you can fit AMICA on GPU:
108
+
109
+ ```python
110
+ ica = AMICA(device='cuda', random_state=0)
111
+ ```
112
+
113
+ <br/>
114
+
115
+ For more examples and documentation, please see the [documentation](https://scott-huberty.github.io/amica-python/).
116
+
117
+ ## What is AMICA?
118
+
119
+ AMICA is composed of two main ideas, which are hinted at by the name and the title of the original paper:
120
+ *AMICA: An Adaptive Mixture of Independent Component Analyzers with Shared Components*.
121
+
122
+ #### 1. *Adaptive Mixture* ICA
123
+
124
+ Standard ICA assumes each source is independent and *non-Gaussian*. Extended Infomax ICA
125
+ improves on this by handling both *sub-Gaussian* and *super-Gaussian* sources. AMICA goes
126
+ further by modeling each source as a *mixture of multiple Gaussians*. This flexibility
127
+ lets AMICA represent virtually any source shape - super-Gaussian, sub-Gaussian,
128
+ or even some funky bimodal distribution:
129
+
130
+ <img src="docs/source/_static/GMM.png" alt="Source distributions modeled by AMICA" width="25%"/>
131
+
132
+ In practice, the authors argue that this leads to a more accurate
133
+ approximation of the source signals.
134
+
135
+ #### 2. *Shared Components*
136
+
137
+ AMICA can learn multiple ICA decompositions (i.e. models). This is a work around to the assumption of ICA that the sources are
138
+ stationary (they do not change over time). AMICA will
139
+ decide which model best explains the data at each sample, effectively allowing
140
+ the sources to change over time. The "shared components" part of the paper title refers
141
+ to AMICA's ability to allow the various ICA models to share some components (i.e. sources)
142
+ between them, to reduce computational load.
143
+
144
+ # What does AMICA-Python implement?
145
+
146
+ In short, AMICA-Python implements point 1 above (Adaptive Mixture ICA),
147
+ but does not implement point 2 (running multiple ICA models simultaneously).
148
+
149
+ AMICA-Python is powered by [Torch](https://pytorch.org/) and wrapped in an easy-to-use [scikit-learn](https://scikit-learn.org/stable/) style interface.
150
+
151
+ The outputs are numerically tested against the original FORTRAN implementation to ensure correctness and minimize bugs.
152
+
153
+ # What wasn't implemented?
154
+
155
+ - The ability to model multiple ICA decompositions simultaneously.
156
+ - The ability to reject unlikely samples based on a thresholded log-likelihood (in the
157
+ FORTRAN implementation, this is a strategy to deal with artifacts in the data).
158
+ - AMICA-Python does not expose all the hyper-parameters available in the original FORTRAN implementation.
159
+ Instead I have tried to pick sensible defaults that should work well in most cases,
160
+ thus reducing the complexity of the interface.
@@ -0,0 +1,97 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "amica-python"
7
+ version = "0.1.0"
8
+ description = "Adaptive Mixture ICA in Python"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [
12
+ "loguru",
13
+ "rich",
14
+ "pooch >= 1.5",
15
+ "psutil",
16
+ "numpy>=2.2.6",
17
+ "scikit-learn>=1.7.0",
18
+ ]
19
+
20
+ [project.optional-dependencies]
21
+ torch-cpu = ["torch"]
22
+ torch-cuda = ["torch"]
23
+ dev = ["pytest", "pytest-cov", "pytest-timeout", "matplotlib", "mne", "ruff"]
24
+ doc = [
25
+ "sphinx<8.2",
26
+ "shibuya",
27
+ "sphinx-gallery",
28
+ "numpydoc",
29
+ "sphinx-design",
30
+ "sphinxcontrib-bibtex",
31
+ "sphinx-copybutton",
32
+ # For Tutorials
33
+ "healpy",
34
+ "pandas", # Needed to load the MNIST dataset example
35
+ # "smica @ git+https://github.com/scott-huberty/smica.git",
36
+ ]
37
+
38
+ [tool.setuptools]
39
+ package-dir = {"" = "src"}
40
+
41
+ [tool.setuptools.packages.find]
42
+ where = ["src"]
43
+ include = ["amica*"]
44
+ exclude = ["amica.tests*"]
45
+
46
+ [tool.ruff.lint]
47
+ select = ["A", "B006", "D", "E", "F", "I", "UP", "UP031", "W"]
48
+
49
+ [tool.ruff.lint.pydocstyle]
50
+ convention = "numpy"
51
+
52
+ [tool.ruff.lint.per-file-ignores]
53
+ "src/amica/_types.py" = ["E501"] # Line too long
54
+ "src/amica/**/__init__.py" = ["D104"] # Missing docstring in public package
55
+ "src/amica/**/tests/*.py" = ["D100"] # Missing docstring in public module
56
+
57
+
58
+ [[tool.uv.index]]
59
+ name = "pytorch_cpu"
60
+ url = "https://download.pytorch.org/whl/cpu"
61
+ explicit = true # only fetch from this index if we explicitly map a package there.
62
+
63
+ [tool.uv.sources]
64
+ torch = { index = "pytorch_cpu" }
65
+ markupsafe = { index = "pytorch_cpu" }
66
+
67
+ [tool.pytest.ini_options]
68
+ addopts = [
69
+ "--cov=amica",
70
+ "--cov-branch",
71
+ "--cov-report=xml",
72
+ "--cov-report=term",
73
+ "--ignore=src/amica/tests/test_kernels.py",
74
+ ]
75
+ markers = [
76
+ "sklearn_api: Tests that validate Scikit-Learn API conformance",
77
+ "slow: Marks tests as slow (deselect with `pytest -m not slow`)"
78
+ ]
79
+
80
+ [tool.coverage.run]
81
+ source = ["amica"] # Source files to measure.
82
+ branch = true # Add branch coverage to the analysis.
83
+ omit = [
84
+ "*/tests/*",
85
+ ".venv/*",
86
+ ]
87
+
88
+ [tool.coverage.report]
89
+ exclude_lines = [
90
+ "pragma: no cover",
91
+ "if TYPE_CHECKING:",
92
+ ]
93
+ show_missing = true
94
+ skip_covered = true
95
+
96
+ [tool.coverage.xml]
97
+ output = "coverage.xml"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,5 @@
1
+ from . import datasets, utils
2
+ from ._sklearn_interface import AMICA
3
+ from .core import fit_amica
4
+
5
+ __all__ = ['fit_amica', 'AMICA', 'datasets', 'utils']
@@ -0,0 +1,194 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterator
4
+ from typing import Union
5
+ from warnings import warn
6
+
7
+ import numpy as np
8
+ import psutil
9
+ import torch
10
+
11
+ ArrayLike2D = Union[np.ndarray, "np.typing.NDArray[np.floating]"]
12
+
13
+
14
+ class BatchLoader:
15
+ """Iterate over an array in fixed-size batches of data along a chosen axis.
16
+
17
+ We hand rolled this instead of using DataLoader because 1) we want to yield
18
+ slices of input array (i.e. a view), and 2) return the indices as
19
+ a slice object. DataLoader would internally convert the slice into a tensor
20
+ of indices.
21
+
22
+ Example (AMICA shape):
23
+ X: (n_samples, n_features)
24
+ it = BatchLoader(X, axis=0, batch_size=4096)
25
+ for X_blk, sl in it:
26
+ # X_blk is X[sl, :] where sl is slice(start, end)
27
+ ...
28
+ """
29
+
30
+ def __init__(self, X: ArrayLike2D, axis: int, batch_size: int | None = None):
31
+ # Validate inputs
32
+ cls_name = self.__class__.__name__
33
+ if not isinstance(X, torch.Tensor):
34
+ raise TypeError(f"{cls_name} expects a torch.Tensor") # pragma: no cover
35
+ if X.ndim < 1:
36
+ raise ValueError(
37
+ f"{cls_name} expects an array with at least 1 dimension"
38
+ ) # pragma: no cover
39
+ self.X = X
40
+ self.axis = axis
41
+
42
+ if self.axis < 0:
43
+ self.axis += X.ndim
44
+ if not (0 <= self.axis < X.ndim):
45
+ raise ValueError(
46
+ f"axis {self.axis} out of bounds for array with ndim={X.ndim}"
47
+ )
48
+
49
+ # Determine batching parameters
50
+ n = X.shape[self.axis]
51
+ start = 0
52
+ stop = n
53
+ if batch_size is None:
54
+ # Treat as single chunk spanning [start:stop]
55
+ batch_size = stop
56
+
57
+ # Validate parameters
58
+ assert (0 <= start <= n), f"start {start} out of range [0, {n}]"
59
+ assert (0 <= stop <= n), f"stop {stop} out of range [0, {n}]"
60
+ assert start <= stop, f"start {start} must be <= stop {stop}"
61
+ if batch_size < 0:
62
+ raise ValueError(f"batch_size must be positive. Got {batch_size}.")
63
+ if batch_size > X.shape[self.axis]:
64
+ raise ValueError(
65
+ f"batch_size {batch_size} exceeds data size {X.shape[self.axis]} "
66
+ f"along axis {self.axis}."
67
+ )
68
+
69
+ # Store parameters
70
+ self.start = start
71
+ self.stop = stop
72
+ self.batch_size = int(batch_size)
73
+
74
+ def __getitem__(self, idx: int) -> torch.Tensor:
75
+ start = self.start + idx * self.batch_size
76
+ stop = min(start + self.batch_size, self.stop)
77
+
78
+
79
+ idx = [slice(None)] * self.X.ndim
80
+ idx[self.axis] = slice(start, stop)
81
+ return self.X[tuple(idx)]
82
+
83
+ def __iter__(self) -> Iterator[tuple[np.ndarray, slice]]:
84
+ axis = self.axis
85
+ start = self.start
86
+ stop = self.stop
87
+ step = self.batch_size
88
+
89
+ idx = [slice(None)] * self.X.ndim
90
+ assert -((stop - start) // -step) == len(self) # sanity check
91
+ for s in range(start, stop, step):
92
+ e = min(s + step, stop)
93
+ batch_slice = slice(s, e)
94
+ idx[axis] = batch_slice
95
+ yield self.X[tuple(idx)], batch_slice
96
+
97
+ def __len__(self) -> int:
98
+ return (self.X.shape[self.axis] + self.batch_size - 1) // self.batch_size
99
+
100
+ def __repr__(self) -> str:
101
+ return (
102
+ f"{self.__class__.__name__}(Data shape: {self.X.shape}, "
103
+ f"Batched axis: {self.axis}, batch_size: {self.batch_size}, "
104
+ f"n_batches: {len(self)})"
105
+ )
106
+
107
+ def choose_batch_size(
108
+ *,
109
+ N: int,
110
+ n_comps: int,
111
+ n_mix: int,
112
+ n_models: int = 1,
113
+ dtype: np.dtype = np.float64,
114
+ memory_fraction: float = 0.25, # use up to 25% of available memory
115
+ memory_cap: float = 1.5 * 1024**3, # 1.5 GB absolute ceiling
116
+ ) -> int:
117
+ """
118
+ Choose batch size for processing data in chunks.
119
+
120
+ Parameters
121
+ ----------
122
+ N : int
123
+ Total number of samples.
124
+ n_comps : int
125
+ Number of components to be learned in the model, e.g. size of the n_components
126
+ dimension of the data.
127
+ n_mix : int
128
+ Number of mixture components per source/component to be learned in the model.
129
+ dtype : np.dtype, optional
130
+ Data type of the input data, by default np.float64.
131
+ memory_cap : float, optional
132
+ Maximum memory (in bytes) to be used for processing, by default
133
+ ``1.5 * 1024**3`` (1.5 GB).
134
+
135
+ Notes
136
+ -----
137
+ The batch size is primarily determined by the estimated size of hot buffers (e.g.
138
+ y, z, fp, ufp), which scale with the size of n_samples:
139
+ - One array of shape (N,):
140
+ - loglik
141
+ - Two arrays of shape (N, n_models):
142
+ - modloglik
143
+ - v (model responsibilities)
144
+ - Two arrays of shape (N, n_comps)
145
+ - b
146
+ - g
147
+ - Five arrays of shape (N, n_comps, n_mix): u, y, z, fp, ufp
148
+ - u (mixture responsibilities)
149
+ - y
150
+ - z
151
+ - fp
152
+ - ufp
153
+ """
154
+ dtype_size = np.dtype(dtype).itemsize
155
+ # per-sample cost across pre-allocated buffers
156
+ bytes_per_sample = (
157
+ 1 # loglik
158
+ + 2 * n_models # modloglik, v
159
+ + 2 * n_comps # b, g
160
+ + 5 * n_comps * n_mix # fp, u, ufp, y, z,
161
+ ) * dtype_size
162
+ # Plus small headroom for intermediates
163
+ bytes_per_sample = int(bytes_per_sample * 1.2)
164
+
165
+ # Pick memory budget
166
+ try:
167
+ hard_cap = 4 * 1024**3 # 4 GiB (avoid runaway memory use)
168
+ avail_mem = psutil.virtual_memory().available
169
+ mem_cap = min(avail_mem * memory_fraction, hard_cap)
170
+ except Exception:
171
+ mem_cap = memory_cap # fallback to user-specified cap
172
+
173
+ max_batch_size = mem_cap // bytes_per_sample
174
+
175
+ # Ensure at least 1 sample. This should only trigger if n_comps and n_mix are huge.
176
+ if max_batch_size < 1:
177
+ raise MemoryError(
178
+ f"Cannot fit even 1 sample within memory cap of "
179
+ f"{mem_cap / 1024**3:.2f} GiB. "
180
+ f"Per-sample memory cost is {bytes_per_sample / 1024**3:.2f} GB."
181
+ )
182
+ batch_size = int(min(N, max_batch_size))
183
+
184
+ # Heuristic floor, we don't want absurdly small chunks or chunks that are too
185
+ # small relative to the model complexity (n_comps)
186
+ # This heuristic works well for typical ICA regimes, where n_comps is < 256
187
+ min_batch_size = max(8192, n_comps * 32) # at least 32 samples per component
188
+ min_batch_size = min(min_batch_size, N) # Cannot exceed N
189
+ if batch_size < min_batch_size:
190
+ warn(
191
+ f"Warning: To stay within the memory cap, batch size is {batch_size} "
192
+ f"samples, which is below the recommended minimum of {min_batch_size}."
193
+ )
194
+ return batch_size