plsdo 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ name: Bug report
2
+ description: Report something that is not working correctly
3
+ labels: ["type: bug"]
4
+ body:
5
+ - type: textarea
6
+ id: command
7
+ attributes:
8
+ label: Command run
9
+ description: The exact `plsdo` invocation, including all flags
10
+ placeholder: "plsdo run --method c --x X.csv --y Y.csv --demographics demo.csv --output results/"
11
+ validations:
12
+ required: true
13
+
14
+ - type: textarea
15
+ id: expected
16
+ attributes:
17
+ label: Expected behaviour
18
+ description: What should have happened
19
+ validations:
20
+ required: true
21
+
22
+ - type: textarea
23
+ id: actual
24
+ attributes:
25
+ label: Actual behaviour
26
+ description: What happened instead — include the full error message and traceback if applicable
27
+ validations:
28
+ required: true
29
+
30
+ - type: textarea
31
+ id: input
32
+ attributes:
33
+ label: Input description
34
+ description: >
35
+ Describe the shape and content of your input files (number of subjects, features, groups).
36
+ Do not attach real data — synthetic or anonymised descriptions are fine.
37
+ placeholder: "X: 42 subjects × 120 features, Y: 42 subjects × 8 scores, 3 groups"
38
+ validations:
39
+ required: false
40
+
41
+ - type: textarea
42
+ id: environment
43
+ attributes:
44
+ label: Environment
45
+ description: Python version, operating system, and plsdo version (found in `log.txt` under `version:`)
46
+ placeholder: "Python 3.11, macOS 14.4, plsdo 0.1.0"
47
+ validations:
48
+ required: true
@@ -0,0 +1 @@
1
+ blank_issues_enabled: true
@@ -0,0 +1,30 @@
1
+ name: Documentation issue
2
+ description: Report missing, incorrect, or unclear documentation — including missing references
3
+ labels: ["type: docs"]
4
+ body:
5
+ - type: textarea
6
+ id: description
7
+ attributes:
8
+ label: What is missing or incorrect
9
+ description: Describe the gap or error
10
+ validations:
11
+ required: true
12
+
13
+ - type: input
14
+ id: location
15
+ attributes:
16
+ label: Location
17
+ description: Which doc page, CLI help text, log output, or plot label is affected?
18
+ placeholder: "docs/interpreting-output.md, section on bootstrap ratios"
19
+ validations:
20
+ required: false
21
+
22
+ - type: textarea
23
+ id: reference
24
+ attributes:
25
+ label: Reference or source
26
+ description: >
27
+ Optional. Cite the paper, source, or example that should be referenced or used to correct the docs.
28
+ placeholder: "McIntosh & Lobaugh (2004), Partial least squares analysis of neuroimaging data, doi:10.1016/j.neuroimage.2004.07.020"
29
+ validations:
30
+ required: false
@@ -0,0 +1,29 @@
1
+ name: Feature request
2
+ description: Propose a new capability or enhancement
3
+ labels: ["type: feature"]
4
+ body:
5
+ - type: textarea
6
+ id: motivation
7
+ attributes:
8
+ label: Use case and motivation
9
+ description: What problem does this solve, and for whom?
10
+ validations:
11
+ required: true
12
+
13
+ - type: textarea
14
+ id: proposal
15
+ attributes:
16
+ label: Proposed behaviour
17
+ description: How should the feature work? Include proposed CLI flags, output changes, or API behaviour as applicable.
18
+ validations:
19
+ required: true
20
+
21
+ - type: textarea
22
+ id: reference
23
+ attributes:
24
+ label: Literature reference
25
+ description: >
26
+ Optional. If the request relates to a specific PLS variant, statistical method, or published analysis,
27
+ cite the paper here (author, year, DOI or URL if available).
28
+ validations:
29
+ required: false
@@ -0,0 +1,52 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ name: Test (Python ${{ matrix.python-version }})
12
+ runs-on: ubuntu-latest
13
+ strategy:
14
+ fail-fast: false
15
+ matrix:
16
+ python-version: ["3.10", "3.11", "3.12"]
17
+
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - uses: actions/setup-python@v5
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+
25
+ - name: Install uv
26
+ run: pip install uv
27
+
28
+ - name: Install package and dev dependencies
29
+ run: uv pip install --system -e ".[dev]"
30
+
31
+ - name: Run tests
32
+ run: pytest -v
33
+
34
+ lint:
35
+ name: Lint
36
+ runs-on: ubuntu-latest
37
+
38
+ steps:
39
+ - uses: actions/checkout@v4
40
+
41
+ - uses: actions/setup-python@v5
42
+ with:
43
+ python-version: "3.12"
44
+
45
+ - name: Install uv
46
+ run: pip install uv
47
+
48
+ - name: Install ruff
49
+ run: uv pip install --system ruff
50
+
51
+ - name: Run ruff
52
+ run: ruff check plsdo/ tests/
plsdo-0.0.1/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+
8
+ # Virtual environments
9
+ .venv/
10
+
11
+ # IDE
12
+ .idea/
13
+ .vscode/
14
+
15
+ # OS
16
+ .DS_Store
17
+
18
+ # uv
19
+ uv.lock
20
+
21
+ # Local dev / agentic reference files (not for distribution)
22
+ .dev/
plsdo-0.0.1/CLAUDE.md ADDED
@@ -0,0 +1,68 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Commands
6
+
7
+ ```bash
8
+ # Install for development (from repo root, with .venv active)
9
+ uv pip install -e ".[dev]"
10
+
11
+ # Run all tests
12
+ .venv/bin/pytest tests/
13
+
14
+ # Run a single test file
15
+ .venv/bin/pytest tests/test_core.py
16
+
17
+ # Run a single test by name
18
+ .venv/bin/pytest tests/test_core.py::TestBootstrap::test_seed_reproducibility
19
+ ```
20
+
21
+ ## Architecture
22
+
23
+ The package has a strict separation of concerns across five modules:
24
+
25
+ - **`io.py`** — everything that touches files or validates inputs: loading CSVs, detecting subject IDs, aligning subjects, checking missing values and variance, z-scoring, parsing YAML group configs, loading feature metadata, and building the dummy-coded design matrix.
26
+ - **`core.py`** — the `PLS` class. Stateful: takes z-scored arrays, runs `fit()` → `permutation_test()` → `bootstrap()` → `filter_lvs()` in sequence. Stores results as instance attributes.
27
+ - **`cross_validate.py`** — `run_cv()` and `permutation_test_cv()`. Uses `sklearn.PLSRegression` (not the SVD-based `PLS` class) because prediction requires `predict()`. Entirely independent of `core.py`.
28
+ - **`plotting.py`** — stateless functions. All take data arrays and an `out_path`, save the figure, return nothing. `meta_colours()` is here too (not in pipeline).
29
+ - **`pipeline.py`** — orchestration only. Calls `io` → `core` → `plotting` in sequence, writes CSVs and `log.txt`. No computation here.
30
+ - **`cli.py`** — argument parsing and validation only. Dispatches to `pipeline.run_pipeline()` or `pipeline.cross_validate_pipeline()`.
31
+
32
+ ### Key design decisions
33
+
34
+ **One SVD engine for both PLS variants.** Correlational PLS z-scores both X and Y; discriminatory PLS uses a dummy-coded X (not z-scored) and z-scores Y only. The `PLS` class handles both — `pipeline.py` builds the right inputs before calling it.
35
+
36
+ **Bootstrap uses Procrustes + sign correction.** Each bootstrap SVD is aligned to the reference via `scipy.linalg.orthogonal_procrustes` on Vt, then signs are corrected by dot product with the reference Vt loadings. Both U and Vt loadings are aligned together.
37
+
38
+ **LV filtering is two-stage.** `filter_lvs()` keeps LVs that are (1) significant by permutation (p < 0.05) and (2) have at least one feature with |bootstrap ratio| > 1.96 on *both* the X and Y sides. Result is a boolean `final_lvs` mask.
39
+
40
+ **CV flips X and Y.** `cross_validate.py` uses Y (continuous data) as the predictor and dummy-coded groups as the target, so `pls.predict()` gives predicted group scores. This is the opposite convention from `plsdo run`.
41
+
42
+ ## Design philosophy
43
+
44
+ Four principles, in priority order:
45
+
46
+ 1. **Mathematical validity** — correctness is non-negotiable.
47
+ 2. **Lightweight** — no unnecessary dependencies. Every dependency must earn its place.
48
+ 3. **Scientific Python standards** — follow community conventions so the package is citable, installable, and maintainable.
49
+ 4. **Glass-box and FAIR** — output everything needed to reproduce a result; keep the implementation transparent.
50
+
51
+ Practical consequences: prefer stdlib over third-party where reasonable (argparse over click, logging over print). Do not add inference or statistical tests beyond PLS itself — plotting scores by group factors is in scope; pairwise post-hoc tests are not. When in doubt, do less.
52
+
53
+ **Efficiency** is part of correctness. Prefer vectorised NumPy operations over Python loops wherever the maths permits — not for micro-optimisation, but because this code runs on HPCs and environmental cost is real. If a loop can be replaced by array operations without adding complexity or obscuring intent, replace it.
54
+
55
+ **Robustness** sits inside principle 1, not alongside it. Validate aggressively anywhere a silent failure could propagate — at file boundaries and wherever mathematical assumptions could be violated (zero variance before z-scoring, empty group levels before dummy coding, etc.). Fail loudly with informative errors. Trust internal transformations between already-validated states; defensive checks between modules add noise without catching anything real.
56
+
57
+ ## Conventions
58
+
59
+ - British English in all prose: docs, commit messages, user-facing strings, comments.
60
+ - Commit messages use conventional prefixes: `feat`, `fix`, `enh`, `ref`, `test`, `docs`, `chore`. User commits with a GPG key — stage files and provide message text only, with attribution for claude, do not run `git commit`.
61
+ - `plsdo/` contains no data. Test data lives in `tests/data/` (synthetic, small).
62
+ - Reference notebooks (`.dev/correlational_pls.ipynb`, `.dev/discriminatory_pls.ipynb`, `.dev/claude_cross_validation.py`) are the source of truth for computational steps and plot styling. Deviations require discussion. These files are gitignored and live only in your local working copy.
63
+ - `.dev/superpowers/specs/` and `.dev/superpowers/plans/` contain the design spec and implementation plan. Consult them before making structural changes. These files are gitignored.
64
+
65
+ ## Before public release / PyPI submission
66
+ - Claim the `plsdo` package name on PyPI before announcing the package publicly — squatting is a real risk once there is any visibility.
67
+ - Update `README.md` and `docs/usage.md` installation instructions from `git clone` to `pip install plsdo` once the package is published.
68
+ - Bump version to `1.0.0` and update the `Development Status` classifier to `4 - Beta` or `5 - Production/Stable` as appropriate.
plsdo-0.0.1/LICENSE ADDED
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2026, Eilidh MacNicol
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
plsdo-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: plsdo
3
+ Version: 0.0.1
4
+ Summary: PLS covariance analysis with statistical testing and visualisation
5
+ Project-URL: Repository, https://github.com/braincentrekcl/plsdo
6
+ Project-URL: Issues, https://github.com/braincentrekcl/plsdo/issues
7
+ Author: Eilidh MacNicol
8
+ Maintainer: Eilidh MacNicol
9
+ License-Expression: BSD-3-Clause
10
+ License-File: LICENSE
11
+ Keywords: PLS,analysis,multivariate,statistics
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: BSD License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: matplotlib>=3.7
23
+ Requires-Dist: numpy>=1.24
24
+ Requires-Dist: pandas>=2.0
25
+ Requires-Dist: pyyaml>=6.0
26
+ Requires-Dist: scipy>=1.10
27
+ Requires-Dist: seaborn>=0.13
28
+ Provides-Extra: cv
29
+ Requires-Dist: scikit-learn>=1.2; extra == 'cv'
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest; extra == 'dev'
32
+ Requires-Dist: ruff; extra == 'dev'
33
+ Requires-Dist: scikit-learn; extra == 'dev'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # plsdo
37
+
38
+ Partial Least Squares (PLS) covariance analysis with permutation testing, bootstrap reliability, and publication-ready visualisation — from the command line.
39
+
40
+ (Pronounced: "please do")
41
+
42
+ `plsdo` was built out of necessity for project-specific neuroscience and neuroimaging pipelines, then generalised to handle flexible, diverse datasets beyond its origins. It implements two PLS variants used in neuroimaging and cognitive neuroscience research:
43
+
44
+ - **Correlational PLS** — finds latent variables that maximise covariance between two continuous data matrices (e.g. brain measures and behaviour scores).
45
+ - **Discriminatory PLS** — finds latent variables that maximise covariance between a continuous data matrix and a dummy-coded group matrix (i.e. group differences).
46
+
47
+ Statistical validity is built in: every analysis runs a permutation test on singular values and bootstraps loading stability. Only latent variables that pass both tests appear in the output.
48
+
49
+ > **Early alpha.** The API and output format may change before the first stable release. Feedback and bug reports are very welcome — please open an issue.
50
+
51
+ ---
52
+
53
+ ## Installation
54
+
55
+ Requires Python ≥ 3.10.
56
+
57
+ ```bash
58
+ git clone https://github.com/braincentrekcl/plsdo.git
59
+ cd plsdo
60
+ uv venv .venv && source .venv/bin/activate
61
+ uv pip install -e .
62
+ ```
63
+
64
+ For discriminatory PLS with cross-validation (requires scikit-learn):
65
+ ```bash
66
+ uv pip install -e ".[cv]"
67
+ ```
68
+
69
+ ---
70
+
71
+ ## Quick start
72
+
73
+ ### Correlational PLS
74
+
75
+ ```bash
76
+ plsdo run --method c \
77
+ --x brain_measures.csv \
78
+ --y behaviour_scores.csv \
79
+ --demographics participants.csv \
80
+ --group-col treatment \
81
+ --subject-id participant_id \
82
+ --output results/
83
+ ```
84
+
85
+ ### Discriminatory PLS
86
+
87
+ ```bash
88
+ plsdo run --method d \
89
+ --y mri_features.csv \
90
+ --demographics participants.csv \
91
+ --group-col drug_group \
92
+ --subject-id participant_id \
93
+ --output results/
94
+ ```
95
+
96
+ ### Cross-validation (discriminatory only)
97
+
98
+ Requires `plsdo[cv]` — see Installation above.
99
+
100
+ ```bash
101
+ plsdo cross-validate \
102
+ --y mri_features.csv \
103
+ --demographics participants.csv \
104
+ --group-col drug_group \
105
+ --subject-id participant_id \
106
+ --output cv_results/
107
+ ```
108
+
109
+ ---
110
+
111
+ ## Output
112
+
113
+ Each run writes to the output directory:
114
+
115
+ ```
116
+ results/
117
+ figures/ cross-correlation heatmap, permutation test, loading bar plots, score plots
118
+ data/ singular values, p-values, loadings, bootstrap ratios, subject scores (CSV)
119
+ log.txt parameters and version stamp
120
+ ```
121
+
122
+ ---
123
+
124
+ ## Documentation
125
+
126
+ | Page | Contents |
127
+ |------|----------|
128
+ | [Usage guide](docs/usage.md) | Full CLI options, multiple grouping variables, all flags |
129
+ | [Input format](docs/input-format.md) | How to structure X, Y, demographics, and metadata files |
130
+ | [Interpreting output](docs/interpreting-output.md) | What each plot and CSV means |
131
+ | [Missing data](docs/missing-data.md) | Why plsdo does not impute, and what to do instead |
132
+
133
+ ---
134
+
135
+ ## Contributing
136
+
137
+ Issues and pull requests are welcome. Please open an issue before starting significant work.
138
+
139
+ Contact: eilidh [dot] macnicol [at] kcl [dot] ac [dot] uk
140
+
141
+ ---
142
+
143
+ ## Licence
144
+
145
+ BSD 3-Clause. See [LICENSE](LICENSE).
plsdo-0.0.1/README.md ADDED
@@ -0,0 +1,110 @@
1
+ # plsdo
2
+
3
+ Partial Least Squares (PLS) covariance analysis with permutation testing, bootstrap reliability, and publication-ready visualisation — from the command line.
4
+
5
+ (Pronounced: "please do")
6
+
7
+ `plsdo` was built out of necessity for project-specific neuroscience and neuroimaging pipelines, then generalised to handle flexible, diverse datasets beyond its origins. It implements two PLS variants used in neuroimaging and cognitive neuroscience research:
8
+
9
+ - **Correlational PLS** — finds latent variables that maximise covariance between two continuous data matrices (e.g. brain measures and behaviour scores).
10
+ - **Discriminatory PLS** — finds latent variables that maximise covariance between a continuous data matrix and a dummy-coded group matrix (i.e. group differences).
11
+
12
+ Statistical validity is built in: every analysis runs a permutation test on singular values and bootstraps loading stability. Only latent variables that pass both tests appear in the output.
13
+
14
+ > **Early alpha.** The API and output format may change before the first stable release. Feedback and bug reports are very welcome — please open an issue.
15
+
16
+ ---
17
+
18
+ ## Installation
19
+
20
+ Requires Python ≥ 3.10.
21
+
22
+ ```bash
23
+ git clone https://github.com/braincentrekcl/plsdo.git
24
+ cd plsdo
25
+ uv venv .venv && source .venv/bin/activate
26
+ uv pip install -e .
27
+ ```
28
+
29
+ For discriminatory PLS with cross-validation (requires scikit-learn):
30
+ ```bash
31
+ uv pip install -e ".[cv]"
32
+ ```
33
+
34
+ ---
35
+
36
+ ## Quick start
37
+
38
+ ### Correlational PLS
39
+
40
+ ```bash
41
+ plsdo run --method c \
42
+ --x brain_measures.csv \
43
+ --y behaviour_scores.csv \
44
+ --demographics participants.csv \
45
+ --group-col treatment \
46
+ --subject-id participant_id \
47
+ --output results/
48
+ ```
49
+
50
+ ### Discriminatory PLS
51
+
52
+ ```bash
53
+ plsdo run --method d \
54
+ --y mri_features.csv \
55
+ --demographics participants.csv \
56
+ --group-col drug_group \
57
+ --subject-id participant_id \
58
+ --output results/
59
+ ```
60
+
61
+ ### Cross-validation (discriminatory only)
62
+
63
+ Requires `plsdo[cv]` — see Installation above.
64
+
65
+ ```bash
66
+ plsdo cross-validate \
67
+ --y mri_features.csv \
68
+ --demographics participants.csv \
69
+ --group-col drug_group \
70
+ --subject-id participant_id \
71
+ --output cv_results/
72
+ ```
73
+
74
+ ---
75
+
76
+ ## Output
77
+
78
+ Each run writes to the output directory:
79
+
80
+ ```
81
+ results/
82
+ figures/ cross-correlation heatmap, permutation test, loading bar plots, score plots
83
+ data/ singular values, p-values, loadings, bootstrap ratios, subject scores (CSV)
84
+ log.txt parameters and version stamp
85
+ ```
86
+
87
+ ---
88
+
89
+ ## Documentation
90
+
91
+ | Page | Contents |
92
+ |------|----------|
93
+ | [Usage guide](docs/usage.md) | Full CLI options, multiple grouping variables, all flags |
94
+ | [Input format](docs/input-format.md) | How to structure X, Y, demographics, and metadata files |
95
+ | [Interpreting output](docs/interpreting-output.md) | What each plot and CSV means |
96
+ | [Missing data](docs/missing-data.md) | Why plsdo does not impute, and what to do instead |
97
+
98
+ ---
99
+
100
+ ## Contributing
101
+
102
+ Issues and pull requests are welcome. Please open an issue before starting significant work.
103
+
104
+ Contact: eilidh [dot] macnicol [at] kcl [dot] ac [dot] uk
105
+
106
+ ---
107
+
108
+ ## Licence
109
+
110
+ BSD 3-Clause. See [LICENSE](LICENSE).
@@ -0,0 +1,60 @@
1
+ # Input Format
2
+
3
+ ## Required Files
4
+
5
+ ### X Matrix (correlational PLS only)
6
+
7
+ CSV with subjects as rows and features as columns. The first column must be
8
+ the subject identifier.
9
+
10
+ ```csv
11
+ subject_id,region_A,region_B,region_C
12
+ sub01,1.23,4.56,7.89
13
+ sub02,2.34,5.67,8.90
14
+ ```
15
+
16
+ ### Y Matrix
17
+
18
+ Same format as X. Subject IDs must match across files (order does not matter
19
+ — the pipeline will align them).
20
+
21
+ ### Demographics
22
+
23
+ CSV with a subject ID column and at least one grouping column.
24
+
25
+ ```csv
26
+ subject_id,group,sex,age
27
+ sub01,control,F,25
28
+ sub02,treatment,M,30
29
+ ```
30
+
31
+ ## Optional Files
32
+
33
+ ### Feature Metadata
34
+
35
+ CSV with a `feature` column matching data column headers, plus category
36
+ columns for plot colour-coding.
37
+
38
+ ```csv
39
+ feature,category
40
+ region_A,frontal
41
+ region_B,frontal
42
+ region_C,temporal
43
+ ```
44
+
45
+ ### Groups Configuration
46
+
47
+ YAML file for multiple grouping variables. See `docs/usage.md` for examples.
48
+
49
+ ## Subject Alignment
50
+
51
+ The pipeline finds the intersection of subject IDs across all input files.
52
+ Subjects present in some files but not others are excluded with a warning.
53
+ If no subjects are shared, the pipeline errors.
54
+
55
+ ## Missing Data
56
+
57
+ The pipeline does **not** handle missing data. If any value in X or Y is
58
+ NaN, the pipeline errors and lists which subjects and features are affected.
59
+
60
+ See `docs/missing-data.md` for guidance on how to address this.
@@ -0,0 +1,70 @@
1
+ # Interpreting PLS Output
2
+
3
+ ## Cross-Correlation Heatmap
4
+
5
+ This matrix shows the Pearson correlation between every feature in X and
6
+ every feature in Y, computed across all subjects. It is the raw input to
7
+ the SVD. Strong positive or negative values indicate features that co-vary
8
+ across subjects.
9
+
10
+ ## Singular Values and Permutation Test
11
+
12
+ The SVD breaks the cross-correlation matrix into latent variables (LVs),
13
+ ordered by how much covariance they explain. The singular value for each LV
14
+ quantifies its strength.
15
+
16
+ The permutation test asks: is this singular value larger than we would expect
17
+ if X and Y were unrelated? It shuffles the subject pairing between X and Y
18
+ 10,000 times and compares the observed singular value to this null
19
+ distribution.
20
+
21
+ **How to read the plot:** A red line to the right of the grey histogram
22
+ indicates a singular value that exceeds the null distribution — that LV
23
+ captures real covariance, not noise.
24
+
25
+ ## Loading Bar Plots
26
+
27
+ For each significant and reliable LV, the loading plots show which features
28
+ contribute most to the pattern. Bars are sorted by absolute loading. The
29
+ red error bars show the bootstrap standard error — they indicate how stable
30
+ each loading is across resampled versions of the data.
31
+
32
+ **Large bars with small error bars** are the features driving the pattern
33
+ reliably. **Large bars with large error bars** may be driven by a few
34
+ outlier subjects.
35
+
36
+ ## Bootstrap Ratios
37
+
38
+ The bootstrap ratio is the loading divided by its standard error. It can be
39
+ interpreted like a z-score: values above 1.96 indicate that a feature's
40
+ contribution is reliable at the 95% confidence level.
41
+
42
+ ## Subject Scores
43
+
44
+ Subject scores show how strongly each subject expresses a given LV pattern.
45
+ The X scores (XU) project each subject onto the X-side pattern; the Y
46
+ scores (YV') project onto the Y-side pattern.
47
+
48
+ **Box/strip plots** show how scores distribute across groups. If a LV
49
+ captures a group difference, the boxes will separate.
50
+
51
+ **Score scatter plots** (correlational PLS only) show the relationship
52
+ between X and Y scores. If the PLS pattern is strong, subjects should fall
53
+ along a diagonal. Group-specific linear fits reveal whether the X–Y
54
+ relationship differs by group.
55
+
56
+ ## Cross-Validation (Discriminatory PLS)
57
+
58
+ Cross-validation tests whether the group discrimination holds on unseen
59
+ subjects. The fold accuracy histogram shows per-fold classification
60
+ accuracy, while the confusion matrix shows which groups are well-separated
61
+ and which are confused.
62
+
63
+ The permutation test of CV accuracy answers: is the observed accuracy
64
+ significantly better than chance? A p-value below 0.05 indicates that
65
+ the model generalises beyond the training data.
66
+
67
+ **Important:** do not select the number of components based on `plsdo run`
68
+ results and then feed that into cross-validation. This introduces
69
+ circularity. Use all components (the default) or use nested
70
+ cross-validation.