reverse-pred 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 vital-kolab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: reverse_pred
3
+ Version: 0.0.1
4
+ Summary: Library to run Reverse Predictivity
5
+ Project-URL: Homepage, https://github.com/vital-kolab/reverse_pred
6
+ Project-URL: Issues, https://github.com/vital-kolab/reverse_pred/issues
7
+ Author-email: Sabine Muzellec and Kohitij Kar <sabinem@yorku.ca>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Programming Language :: Python :: 3
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+
15
+ # Reverse Predictivity
16
+
17
+ A research codebase accompanying the preprint:
18
+
19
+ **Reverse Predictivity: Going Beyond One-Way Mapping to Compare Artificial Neural Network Models and Brains**, Muzellec & Kar, bioRxiv (posted August 8, 2025) ([biorxiv.org](https://www.biorxiv.org/content/10.1101/2025.08.08.669382v1))
20
+
21
+ This repository supports analyses comparing macaque inferior temporal (IT) cortex responses with artificial neural network (ANN) units—specifically using a *reverse predictivity* metric that assesses how well neural responses predict ANN activations ([biorxiv.org](https://www.biorxiv.org/content/10.1101/2025.08.08.669382v1)).
22
+
23
+ ### Compare brains and models in both directions.
24
+
25
+ This repository implements **reverse predictivity**: a complementary evaluation to forward neural predictivity that asks *how well do neural responses predict ANN activations?* It provides utilities to map macaque IT population responses to model units, quantify bidirectional alignment, and reproduce manuscript figures.
26
+
27
+ ## 🧠 What is reverse predictivity?
28
+ Traditional *forward* neural predictivity evaluates how well a model’s features linearly predict neural responses. **Reverse predictivity** inverts that lens: using neural responses to predict model units. Agreement across both directions strengthens claims that a model and a brain area **share representations**. Practically, this repo includes:
29
+
30
+ - Regression utilities to decode IT neurons / ANN unit activations from ANN units / IT population responses
31
+ - Image‑level metrics and correlation suites to compare human/ANN/neural behaviors
32
+ - End‑to‑end notebooks to reproduce figures
33
+
34
+ ## 🗂️ Repository layout
35
+
36
+ - `demo_forward_predictivity.ipynb` – quick demo of *forward* mapping model units -> neurons
37
+ - `demo_reverse_predictivity.ipynb` – quick demo of *reverse* mapping model units <- neurons
38
+ - `demo_generate_neurons_i1.ipynb` – compute image‑level neural metrics
39
+ - `demo_generate_model_i1.ipynb` – compute image‑level model metrics
40
+ - `figure[1-6].ipynb` – figure reproduction notebooks
41
+ - `model_to_monkey.py` – utilities for model -> neural regression and evaluation
42
+ - `monkey_to_model.py` – utilities for model <- neural regression and evaluation
43
+ - `correlation_metrics.py` – Spearman/Pearson, reliability‑aware correlations, confidence intervals
44
+ - `regression_metrics.py` – regression helpers
45
+ - `prediction_utils.py` – shared helpers for prediction/decoding
46
+ - `decode_utils.py` – train/test splits, cross‑validation, split‑half routines
47
+ - `figure_utils.py` – journal‑style plotting helpers
48
+ - `h5_utils.py` – helpers to read/write HDF5 feature and metadata files
49
+
50
+ 📦 *Large data files (IT features, image sets) are not stored in the repo.* They can be downloaded from: [here](https://osf.io/y3qmk/?view_only=6dfe548c7ba24238932d247e65523053)
51
+
52
+ ## 🛠️ Installation
53
+
54
+ We recommend Python ≥3.10 with a fresh environment (Conda or venv).
55
+
56
+ ```bash
57
+ # Using conda
58
+ conda create -n reverse_pred python=3.10 -y
59
+ conda activate reverse_pred
60
+
61
+ # Install core dependencies
62
+ pip install numpy scipy scikit-learn matplotlib h5py
63
+ ```
64
+
65
+ ## 📥 Data & preparation
66
+
67
+ This project assumes access to:
68
+
69
+ 1. **Macaque IT responses**: population responses for N images.
70
+ - `/neural_data` shape `(n_images, n_neurons, n_reps)`
71
+ 2. **Model features**: precomputed ANN activations for the same images
72
+ - `/model_features` shape `(n_images, n_units)`
73
+ 3. **Humans / Primates behavior**: image‑level accuracies
74
+ - `/behavior` shape `(n_images)`
75
+
76
+ ## 🚀 Quickstart
77
+ - `demo_forward_predictivity.ipynb` – step‑by‑step guide to fitting a model to neuron regression, evaluating correlations.
78
+ - `demo_reverse_predictivity.ipynb` – end‑to‑end demonstration of neuron to model regression, computing EV/correlation metrics.
79
+ - `demo_generate_neurons_i1.ipynb` – generates image‑level accuracies from neural decoders.
80
+ - `demo_generate_model_i1.ipynb` – extracts image‑level model metrics from ANN activations.
81
+
82
+ ## 🔁 Reproducing manuscript figures
83
+ Each `figureX.ipynb` notebook reproduces the corresponding figure from the preprint. Notebooks expect the data assets described above. If paths differ, change the config cell at the top of each notebook.
84
+
85
+ - **Figure 1:** Forward Predictivity
86
+ - **Figure 2:** Reverse vs forward predictivity examples
87
+ - **Figure 3:** Reverse vs forward predictivity accross monkeys and models
88
+ - **Figure 4:** Influencing factors
89
+ - **Figure 5:** Analysis of unique units
90
+ - **Figure 6:** Link with behavior
91
+
92
+ ## 📌 Status & citation
93
+ This codebase accompanies the preprint:
94
+
95
+ **Muzellec, S. & Kar, K. (2025). _Reverse Predictivity: Going Beyond One‑Way Mapping to Compare Artificial Neural Network Models and Brains_. bioRxiv.**
96
+
97
+ If you use this repository or ideas from it, please cite the preprint and link to this repo.
98
+
99
+ ```
100
+ @article{muzellec_kar_2025_reversepredictivity,
101
+ title = {Reverse Predictivity: Going Beyond One-Way Mapping to Compare Artificial Neural Network Models and Brains},
102
+ author = {Muzellec, Sabine and Kar, Kohitij},
103
+ year = {2025},
104
+ journal= {bioRxiv}
105
+ }
106
+ ```
107
+
108
+ License: **MIT** (see `LICENSE`).
@@ -0,0 +1,94 @@
1
+ # Reverse Predictivity
2
+
3
+ A research codebase accompanying the preprint:
4
+
5
+ **Reverse Predictivity: Going Beyond One-Way Mapping to Compare Artificial Neural Network Models and Brains**, Muzellec & Kar, bioRxiv (posted August 8, 2025) ([biorxiv.org](https://www.biorxiv.org/content/10.1101/2025.08.08.669382v1))
6
+
7
+ This repository supports analyses comparing macaque inferior temporal (IT) cortex responses with artificial neural network (ANN) units—specifically using a *reverse predictivity* metric that assesses how well neural responses predict ANN activations ([biorxiv.org](https://www.biorxiv.org/content/10.1101/2025.08.08.669382v1)).
8
+
9
+ ### Compare brains and models in both directions.
10
+
11
+ This repository implements **reverse predictivity**: a complementary evaluation to forward neural predictivity that asks *how well do neural responses predict ANN activations?* It provides utilities to map macaque IT population responses to model units, quantify bidirectional alignment, and reproduce manuscript figures.
12
+
13
+ ## 🧠 What is reverse predictivity?
14
+ Traditional *forward* neural predictivity evaluates how well a model’s features linearly predict neural responses. **Reverse predictivity** inverts that lens: using neural responses to predict model units. Agreement across both directions strengthens claims that a model and a brain area **share representations**. Practically, this repo includes:
15
+
16
+ - Regression utilities to decode IT neurons / ANN unit activations from ANN units / IT population responses
17
+ - Image‑level metrics and correlation suites to compare human/ANN/neural behaviors
18
+ - End‑to‑end notebooks to reproduce figures
19
+
20
+ ## 🗂️ Repository layout
21
+
22
+ - `demo_forward_predictivity.ipynb` – quick demo of *forward* mapping model units -> neurons
23
+ - `demo_reverse_predictivity.ipynb` – quick demo of *reverse* mapping model units <- neurons
24
+ - `demo_generate_neurons_i1.ipynb` – compute image‑level neural metrics
25
+ - `demo_generate_model_i1.ipynb` – compute image‑level model metrics
26
+ - `figure[1-6].ipynb` – figure reproduction notebooks
27
+ - `model_to_monkey.py` – utilities for model -> neural regression and evaluation
28
+ - `monkey_to_model.py` – utilities for model <- neural regression and evaluation
29
+ - `correlation_metrics.py` – Spearman/Pearson, reliability‑aware correlations, confidence intervals
30
+ - `regression_metrics.py` – regression helpers
31
+ - `prediction_utils.py` – shared helpers for prediction/decoding
32
+ - `decode_utils.py` – train/test splits, cross‑validation, split‑half routines
33
+ - `figure_utils.py` – journal‑style plotting helpers
34
+ - `h5_utils.py` – helpers to read/write HDF5 feature and metadata files
35
+
36
+ 📦 *Large data files (IT features, image sets) are not stored in the repo.* They can be downloaded from: [here](https://osf.io/y3qmk/?view_only=6dfe548c7ba24238932d247e65523053)
37
+
38
+ ## 🛠️ Installation
39
+
40
+ We recommend Python ≥3.10 with a fresh environment (Conda or venv).
41
+
42
+ ```bash
43
+ # Using conda
44
+ conda create -n reverse_pred python=3.10 -y
45
+ conda activate reverse_pred
46
+
47
+ # Install core dependencies
48
+ pip install numpy scipy scikit-learn matplotlib h5py
49
+ ```
50
+
51
+ ## 📥 Data & preparation
52
+
53
+ This project assumes access to:
54
+
55
+ 1. **Macaque IT responses**: population responses for N images.
56
+ - `/neural_data` shape `(n_images, n_neurons, n_reps)`
57
+ 2. **Model features**: precomputed ANN activations for the same images
58
+ - `/model_features` shape `(n_images, n_units)`
59
+ 3. **Humans / Primates behavior**: image‑level accuracies
60
+ - `/behavior` shape `(n_images)`
61
+
62
+ ## 🚀 Quickstart
63
+ - `demo_forward_predictivity.ipynb` – step‑by‑step guide to fitting a model to neuron regression, evaluating correlations.
64
+ - `demo_reverse_predictivity.ipynb` – end‑to‑end demonstration of neuron to model regression, computing EV/correlation metrics.
65
+ - `demo_generate_neurons_i1.ipynb` – generates image‑level accuracies from neural decoders.
66
+ - `demo_generate_model_i1.ipynb` – extracts image‑level model metrics from ANN activations.
67
+
68
+ ## 🔁 Reproducing manuscript figures
69
+ Each `figureX.ipynb` notebook reproduces the corresponding figure from the preprint. Notebooks expect the data assets described above. If paths differ, change the config cell at the top of each notebook.
70
+
71
+ - **Figure 1:** Forward Predictivity
72
+ - **Figure 2:** Reverse vs forward predictivity examples
73
+ - **Figure 3:** Reverse vs forward predictivity accross monkeys and models
74
+ - **Figure 4:** Influencing factors
75
+ - **Figure 5:** Analysis of unique units
76
+ - **Figure 6:** Link with behavior
77
+
78
+ ## 📌 Status & citation
79
+ This codebase accompanies the preprint:
80
+
81
+ **Muzellec, S. & Kar, K. (2025). _Reverse Predictivity: Going Beyond One‑Way Mapping to Compare Artificial Neural Network Models and Brains_. bioRxiv.**
82
+
83
+ If you use this repository or ideas from it, please cite the preprint and link to this repo.
84
+
85
+ ```
86
+ @article{muzellec_kar_2025_reversepredictivity,
87
+ title = {Reverse Predictivity: Going Beyond One-Way Mapping to Compare Artificial Neural Network Models and Brains},
88
+ author = {Muzellec, Sabine and Kar, Kohitij},
89
+ year = {2025},
90
+ journal= {bioRxiv}
91
+ }
92
+ ```
93
+
94
+ License: **MIT** (see `LICENSE`).
@@ -0,0 +1,23 @@
1
+ [build-system]
2
+ requires = ["hatchling >= 1.26"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "reverse_pred"
7
+ version = "0.0.1"
8
+ authors = [
9
+ { name="Sabine Muzellec and Kohitij Kar", email="sabinem@yorku.ca" },
10
+ ]
11
+ description = "Library to run Reverse Predictivity"
12
+ readme = "README.md"
13
+ requires-python = ">=3.9"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "Operating System :: OS Independent",
17
+ ]
18
+ license = "MIT"
19
+ license-files = ["LICEN[CS]E*"]
20
+
21
+ [project.urls]
22
+ Homepage = "https://github.com/vital-kolab/reverse_pred"
23
+ Issues = "https://github.com/vital-kolab/reverse_pred/issues"
File without changes
@@ -0,0 +1,115 @@
1
+ import numpy as np
2
+ from scipy import stats
3
+ import random
4
+
5
+ def get_split_half_correlation(averaged_data):
6
+ n_shc_allsites = []
7
+ ev_allsites = []
8
+ for s in range(averaged_data.shape[1]):
9
+ new_rate = averaged_data[:,s]
10
+ shc = get_splithalf_corr(new_rate,ax=1)
11
+ neural_shc = spearmanbrown_correction(shc['split_half_corr'])
12
+ n_shc_allsites.append(neural_shc)
13
+ ev_allsites.append((neural_shc**2))
14
+ return ev_allsites, n_shc_allsites
15
+
16
+ def get_splithalf_corr(var, ax=1, type='spearman'):
17
+ """
18
+ specify the variable (var) for which splits are required,
19
+ along which axis (ax)?
20
+ which correlation method do you want (type)?
21
+ """
22
+ _, _, split_mean1, split_mean2 = get_splithalves(var, ax=ax)
23
+ if (type == 'spearman'):
24
+ split_half_correlation = stats.spearmanr(split_mean1, split_mean2) # get the Spearman Correlation
25
+ else:
26
+ split_half_correlation = stats.pearsonr(split_mean1, split_mean2) # get the Pearson Correlation
27
+ return {'split_half_corr': split_half_correlation[0],
28
+ 'p-value': split_half_correlation[1],
29
+ 'type': type
30
+ }
31
+
32
+ def get_splithalves(var, ax=1, rng=None):
33
+ """
34
+ Randomly split the array along the specified axis and return the two halves and their means.
35
+
36
+ Parameters
37
+ ----------
38
+ var : ndarray
39
+ The input array to split.
40
+ ax : int, optional
41
+ The axis along which to split. Default is 1.
42
+ rng : np.random.Generator, optional
43
+ Numpy random number generator for reproducibility. If None, defaults to np.random.default_rng().
44
+
45
+ Returns
46
+ -------
47
+ split1, split2 : ndarray
48
+ The two split halves.
49
+ split_mean1, split_mean2 : ndarray
50
+ The means of the two split halves along the specified axis.
51
+ """
52
+ if rng is None:
53
+ rng = np.random.default_rng()
54
+
55
+ # Transpose var so that the split axis becomes axis 0 (easier for shuffling along slices)
56
+ var = np.swapaxes(var, 0, ax)
57
+
58
+ shuffled = var.copy()
59
+ rng.shuffle(shuffled, axis=0) # shuffle along the new 0th axis (original ax)
60
+
61
+ split1, split2 = np.array_split(shuffled, 2, axis=0)
62
+ split_mean1 = np.nanmean(split1, axis=0)
63
+ split_mean2 = np.nanmean(split2, axis=0)
64
+
65
+ # Swap axes back to original configuration
66
+ return (
67
+ np.swapaxes(split1, 0, ax),
68
+ np.swapaxes(split2, 0, ax),
69
+ np.swapaxes(split_mean1, 0, ax - 1 if ax > 0 else 0),
70
+ np.swapaxes(split_mean2, 0, ax - 1 if ax > 0 else 0),
71
+ )
72
+
73
+ def spearmanbrown_correction(var): # Spearman Brown Correct the correlation value
74
+ spc_var = (2 * var) / (1 + var)
75
+ return spc_var
76
+
77
+
78
+ def get_correlation_noise_corrected(var1, var2, nrbs=50, correction_method='spearmanBrown'):
79
+ """
80
+ Parameters
81
+ ----------
82
+ var1 : variable 1 for correlation (2d array): 2nd dimension has to be trials (repetitions)
83
+ var2 : variable 2 for correlation (2d array): 2nd dimension has to be trials (repetitions)
84
+ nrbs : number of bootstrap repeats. optional, The default is 50.
85
+ correction_method : Split correction applied, optional, The default is 'spearmanBrown'.
86
+
87
+ Returns
88
+ -------
89
+ corrected_corr : 1d array of corrected pearson correlation values
90
+
91
+ """
92
+ corrected_corr = np.empty([nrbs, 1], dtype=float)
93
+ for i in range(nrbs):
94
+ sh_corr_var1 = get_splithalf_corr(var1)
95
+ sh_corr_var2 = get_splithalf_corr(var2)
96
+ den = np.sqrt(sh_corr_var1['split_half_corr'] * sh_corr_var2['split_half_corr'])
97
+ if (correction_method == 'spearmanBrown'):
98
+ num = stats.pearsonr(np.nanmean(var1, axis=1), np.nanmean(var2, axis=1))
99
+ else:
100
+ var1_split = var1[:, random.sample(list(np.arange(0, np.size(var1, axis=1), 1)),
101
+ int(np.round(np.size(var1, axis=1) / 2)))]
102
+ var2_split = var2[:, random.sample(list(np.arange(0, np.size(var2, axis=1), 1)),
103
+ int(np.round(np.size(var2, axis=1) / 2)))]
104
+ num = stats.pearsonr(np.nanmean(var1_split, axis=1), np.nanmean(var2_split, axis=1))
105
+ corrected_corr[i] = num[0] / den
106
+ return corrected_corr
107
+
108
+
109
+ def main():
110
+ if __name__ == "__main__":
111
+ main()
112
+
113
+
114
+
115
+
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Sat Apr 4 01:17:31 2020
5
+
6
+ @author: kohitij
7
+ """
8
+
9
+ import numpy as np
10
+ #import h5py
11
+ #from sklearn.multiclass import OneVsRestClassifier
12
+ from sklearn.linear_model import LogisticRegression
13
+ from sklearn.multiclass import OneVsRestClassifier
14
+ from sklearn.svm import SVC
15
+ from scipy.stats import zscore, norm
16
+ from sklearn import preprocessing
17
+
18
+ def decode(features,labels,nrfolds=2,seed=0):
19
+
20
+ classes=np.unique(labels)
21
+ nrImages = features.shape[1]
22
+ _,ind = np.unique(classes, return_inverse=True)
23
+ features = zscore(features,axis=0)
24
+ num_classes = len(classes)
25
+ prob = np.zeros((nrImages,len(classes)))
26
+ prob[:]=np.nan
27
+
28
+ for i in range(nrfolds):
29
+ train, test = get_train_test_indices(nrImages,nrfolds=nrfolds, foldnumber=i, seed=seed)
30
+ XTrain = features[:,train]
31
+ XTest = features[:,test]
32
+ YTrain = labels[train]
33
+
34
+ clf = LogisticRegression(penalty='l2',C=5*10e4,multi_class='ovr', max_iter=1000, class_weight='balanced').fit(XTrain.T, YTrain)
35
+ pred=clf.predict_proba(XTest.T)
36
+ prob[test,0:num_classes]=pred
37
+ return prob
38
+
39
+
40
+ def get_percent_correct_from_proba(prob, labels,class_order, eps=1e-3):
41
+ nrImages = prob.shape[0]
42
+ class_order=np.unique(labels)
43
+ pc = np.zeros((nrImages,len(class_order)))
44
+ pc[:]=np.nan
45
+ _,ind = np.unique(labels, return_inverse=True)
46
+ for i in range(nrImages):
47
+ loc_target = labels[i]==class_order
48
+ pc[i,:] = np.divide(prob[i,labels[i]==class_order],prob[i,:]+prob[i,loc_target]) #+eps
49
+ pc[i,loc_target]=np.nan
50
+ return pc
51
+
52
+ def get_fa(pc, labels):
53
+ _,ind = np.unique(labels, return_inverse=True)
54
+ full_fa = 1-pc
55
+ pfa = np.nanmean(full_fa,axis=0)
56
+ fa = pfa[ind]
57
+ return fa, full_fa
58
+
59
+ def get_dprime(pc,fa):
60
+ zHit = norm.ppf(pc)
61
+ zFA = norm.ppf(fa)
62
+ # controll for infinite values
63
+ zHit[np.isposinf(zHit)] = 5
64
+ zFA[np.isneginf(zFA)] = -5
65
+ # Calculate d'
66
+ dp = zHit - zFA
67
+ dp[dp>5]=5
68
+ dp[dp<-5]=-5
69
+
70
+ return dp
71
+
72
+
73
+ def get_train_test_indices(totalIndices, nrfolds=10,foldnumber=0, seed=1):
74
+ """
75
+
76
+
77
+ Parameters
78
+ ----------
79
+ totalIndices : TYPE
80
+ DESCRIPTION.
81
+ nrfolds : TYPE, optional
82
+ DESCRIPTION. The default is 10.
83
+ foldnumber : TYPE, optional
84
+ DESCRIPTION. The default is 0.
85
+ seed : TYPE, optional
86
+ DESCRIPTION. The default is 1.
87
+
88
+ Returns
89
+ -------
90
+ train_indices : TYPE
91
+ DESCRIPTION.
92
+ test_indices : TYPE
93
+ DESCRIPTION.
94
+
95
+ """
96
+
97
+ np.random.seed(seed)
98
+ inds = np.arange(totalIndices)
99
+ np.random.shuffle(inds)
100
+ splits = np.array_split(inds,nrfolds)
101
+ test_indices = inds[np.isin(inds,splits[foldnumber])]
102
+ train_indices = inds[np.logical_not(np.isin(inds, test_indices))]
103
+ return train_indices, test_indices
104
+
105
+
106
+
@@ -0,0 +1,72 @@
1
+ # function to look at .h5 file contents
2
+
3
+ import h5py
4
+
5
+ def h5disp(filename):
6
+ """
7
+ Display the structure and contents of an HDF5 file.
8
+
9
+ Parameters:
10
+ filename (str): Path to the HDF5 file.
11
+ """
12
+ def display_item(name, obj, indent=0):
13
+ """Recursively display information about HDF5 groups and datasets."""
14
+ spacing = ' ' * indent
15
+ if isinstance(obj, h5py.Group):
16
+ print(f"{spacing}Group: {name}")
17
+ for key, item in obj.items():
18
+ display_item(key, item, indent + 1)
19
+ elif isinstance(obj, h5py.Dataset):
20
+ print(f"{spacing}Dataset: {name}")
21
+ print(f"{spacing} Shape: {obj.shape}")
22
+ print(f"{spacing} Data type: {obj.dtype}")
23
+ if obj.size < 20: # Display small datasets inline
24
+ print(f"{spacing} Data: {obj[()]}")
25
+
26
+ try:
27
+ with h5py.File(filename, 'r') as h5file:
28
+ print(f"HDF5 file: {filename}")
29
+ for name, item in h5file.items():
30
+ display_item(name, item)
31
+ except Exception as e:
32
+ print(f"Error reading HDF5 file: {e}")
33
+
34
+ # Example usage:
35
+ # h5disp('example.h5')
36
+
37
+
38
+ def h5read(filename, dataset_path):
39
+ """
40
+ Read data from a specified dataset in an HDF5 file.
41
+
42
+ Parameters:
43
+ filename (str): Path to the HDF5 file.
44
+ dataset_path (str): Path to the dataset within the HDF5 file.
45
+
46
+ Returns:
47
+ numpy.ndarray: The data from the specified dataset.
48
+ """
49
+ try:
50
+ with h5py.File(filename, 'r') as h5file:
51
+ if dataset_path in h5file:
52
+ data = h5file[dataset_path][()]
53
+ return data
54
+ else:
55
+ raise KeyError(f"Dataset '{dataset_path}' not found in file '{filename}'.")
56
+ except Exception as e:
57
+ print(f"Error reading dataset '{dataset_path}' from file '{filename}': {e}")
58
+ return None
59
+
60
+
61
+ # # Import the function
62
+ # from h5_utils import h5read
63
+
64
+ # # Path to the HDF5 file
65
+ # filename = 'example.h5'
66
+
67
+ # # Path to the dataset within the file
68
+ # dataset_path = '/group1/dataset1'
69
+
70
+ # # Read and print the dataset
71
+ # data = h5read(filename, dataset_path)
72
+ # print(f"Dataset data: {data}")
@@ -0,0 +1,33 @@
1
+ import os
2
+ import sys
3
+ import numpy as np
4
+ import h5py
5
+ from h5_utils import h5read
6
+ import prediction_utils as pu
7
+
8
+ def load_model_features(model_name, n_images, data_dir):
9
+ features = np.load(os.path.join(data_dir, f"{model_name}_features.npy")).reshape(n_images, -1)
10
+ return features
11
+
12
+ def main(model1, model2, out_dir, n_images, data_dir, reps=10):
13
+ os.makedirs(out_dir, exist_ok=True)
14
+ ev_path = os.path.join(out_dir, f'forward_{model2}_ev.npy')
15
+ if not os.path.exists(ev_path):
16
+ model_features_predictor = load_model_features(model1, n_images, data_dir)
17
+ model_features_predicted = load_model_features(model2, n_images, data_dir)
18
+
19
+ # Compute predictions from model
20
+ prediction = pu.get_all_preds(model_features_predicted, model_features_predictor, ncomp=20)
21
+ # Compute EV
22
+ ev = pu.get_all_stats(prediction, model_features_predicted, model_features_predictor, ncomp=20)
23
+ print(np.nanmean(ev))
24
+ np.save(ev_path, ev)
25
+
26
+ if __name__ == "__main__":
27
+
28
+ model1 = sys.argv[1]
29
+ model2 = sys.argv[2]
30
+ out_dir = f'/scratch/smuzelle/results_predictions/model2model/{model1}'
31
+ data_dir = f'/scratch/smuzelle/model_features/'
32
+ n_images = 1320
33
+ main(model1, model2, out_dir, n_images, data_dir)
@@ -0,0 +1,57 @@
1
+ import os
2
+ import sys
3
+ import numpy as np
4
+ import h5py
5
+ from h5_utils import h5read
6
+ import prediction_utils as pu
7
+
8
+ def load_selected_rates(monkey, data_dir):
9
+ if monkey == "m1":
10
+ data = h5read(os.path.join(data_dir, 'neural_data/rates_m1_active.h5'), '/m1/active')
11
+ selected = np.load(os.path.join(data_dir, 'neural_data/selected_rates_m1.npy'))
12
+ elif monkey == "m2":
13
+ data = h5read(os.path.join(data_dir, 'neural_data/rates_m2_active.h5'), '/m2/active')
14
+ selected = np.load(os.path.join(data_dir, 'neural_data/selected_rates_m2.npy'))
15
+ else:
16
+ raise ValueError("Monkey not found")
17
+ return pu.average_data(data), selected
18
+
19
+
20
+ def load_features(model_name, n_images, data_dir):
21
+ features = np.load(os.path.join(data_dir, f'model_features/{model_name}_features.npy')).reshape(n_images, -1)
22
+ return features
23
+
24
+ def load_model_features(model, n_images, data_dir):
25
+ features = load_features(model, n_images, data_dir)
26
+ return features
27
+
28
+ def main(model, monkey, out_dir, n_images, data_dir, reps=20):
29
+ os.makedirs(out_dir, exist_ok=True)
30
+
31
+ # Load model features and data
32
+ rates, selected_rates = load_selected_rates(monkey, data_dir)
33
+ model_features = load_model_features(model, n_images, data_dir)
34
+ responses = np.nanmean(selected_rates, axis=2)
35
+ print(responses.shape)
36
+
37
+ ev_path = os.path.join(out_dir, f'forward_{monkey}_ev.npy')
38
+
39
+ all_evs = []
40
+ for r in range(reps):
41
+ # Compute predictions from model
42
+ prediction = pu.get_all_preds(responses, model_features, ncomp=20)
43
+ # Compute EV
44
+ ev = pu.get_all_stats(prediction, selected_rates, model_features, ncomp=20)
45
+ all_evs.append(ev)
46
+
47
+ all_evs = np.array(all_evs)
48
+ np.save(ev_path, np.nanmean(all_evs, axis=0))
49
+
50
+ if __name__ == "__main__":
51
+
52
+ model = sys.argv[1]
53
+ monkey = sys.argv[2]
54
+ out_dir = f'./results_for_figures/model2monkey/{model}'
55
+ data_dir = f'./'
56
+ n_images = 1320
57
+ main(model, monkey, out_dir, n_images, data_dir)
@@ -0,0 +1,57 @@
1
+ import os
2
+ import sys
3
+ import numpy as np
4
+ from h5_utils import h5read
5
+ import prediction_utils as pu
6
+ import h5py
7
+
8
+ def load_selected_rates(monkey, data_dir):
9
+ if monkey == "m1":
10
+ data = h5read(os.path.join(data_dir, 'neural_data/rates_m1_active.h5'), '/m1/active')
11
+ selected = np.load(os.path.join(data_dir, 'neural_data/selected_rates_m1.npy'))
12
+ elif monkey == "m2":
13
+ data = h5read(os.path.join(data_dir, 'neural_data/rates_m2_active.h5'), '/m2/active')
14
+ selected = np.load(os.path.join(data_dir, 'neural_data/selected_rates_m2.npy'))
15
+ else:
16
+ raise ValueError("Monkey not found")
17
+ return pu.average_data(data), selected
18
+
19
+
20
+ def load_features(model_name, n_images, data_dir):
21
+ features = np.load(os.path.join(data_dir, f'model_features/{model_name}_features.npy')).reshape(n_images, -1)
22
+ return features
23
+
24
+ def load_model_features(model, n_images, data_dir):
25
+ features = load_features(model, n_images, data_dir)
26
+ return features
27
+
28
+ def main(model, monkey, out_dir, n_images, data_dir, reps=20):
29
+ os.makedirs(out_dir, exist_ok=True)
30
+
31
+ # Load model features and data
32
+ rates, _ = load_selected_rates(monkey, data_dir)
33
+ model_features = load_model_features(model, n_images, data_dir)
34
+ responses = np.nanmean(rates, axis=2)
35
+ print(responses.shape)
36
+
37
+ ev_path = os.path.join(out_dir, f'reverse_{monkey}_ev.npy')
38
+
39
+ all_evs = []
40
+ for r in range(reps):
41
+ # Compute predictions from model
42
+ prediction = pu.get_all_preds(model_features, responses, ncomp=20)
43
+ # Compute EV
44
+ ev = pu.get_all_stats(prediction, model_features, rates, ncomp=20)
45
+ all_evs.append(ev)
46
+
47
+ all_evs = np.array(all_evs)
48
+ np.save(ev_path, np.nanmean(all_evs, axis=0))
49
+
50
+ if __name__ == "__main__":
51
+
52
+ model = sys.argv[1]
53
+ monkey = sys.argv[2]
54
+ out_dir = f'./results_for_figures/monkey2model/{model}'
55
+ data_dir = f'./'
56
+ n_images = 1320
57
+ main(model, monkey, out_dir, n_images, data_dir)
@@ -0,0 +1,44 @@
1
+ import os
2
+ import sys
3
+ import numpy as np
4
+ from h5_utils import h5read
5
+ import prediction_utils as pu
6
+ import h5py
7
+
8
+ def main(start, end, out_dir, data_dir, reps=10, max_n=None):
9
+ os.makedirs(out_dir, exist_ok=True)
10
+
11
+ rates_predictor = np.load("./temp/predictor.npy")
12
+ rates_predicted = np.load("./temp/predicted.npy")
13
+ responses_predicted = np.nanmean(rates_predicted, axis=2)
14
+ responses_predictor = np.nanmean(rates_predictor, axis=2)
15
+
16
+ ev_path = os.path.join(out_dir, f'{monkey1}_to_{monkey2}_ev.npy')
17
+
18
+ all_evs = []
19
+ for r in range(reps):
20
+ if max_n is not None and responses_predicted.shape[1] > max_n:
21
+ indices = np.random.choice(responses_predicted.shape[1], max_n, replace=False)
22
+ responses_predicted = responses_predicted[:, indices]
23
+ rates_predicted = rates_predicted[:, indices]
24
+ if max_n is not None and responses_predictor.shape[1] > max_n:
25
+ indices = np.random.choice(responses_predictor.shape[1], max_n, replace=False)
26
+ responses_predictor = responses_predictor[:, indices]
27
+ rates_predictor = rates_predictor[:, indices]
28
+ print(responses_predicted.shape, responses_predictor.shape)
29
+ # Compute predictions from model
30
+ prediction = pu.get_all_preds(responses_predicted, responses_predictor, ncomp=20)
31
+ # Compute EV
32
+ ev = pu.get_all_stats(prediction, rates_predicted, rates_predictor, ncomp=20) #, rhoxx, rhoyy
33
+ all_evs.append(ev)
34
+
35
+ all_evs = np.array(all_evs)
36
+ np.save(ev_path, np.nanmean(all_evs, axis=0))
37
+
38
+ if __name__ == "__main__":
39
+
40
+ monkey1 = sys.argv[1]
41
+ monkey2 = sys.argv[2]
42
+ out_dir = f'/scratch/smuzelle/results_predictions/monkey2model'
43
+ data_dir = f'./'
44
+ main(start, end, out_dir, data_dir)
@@ -0,0 +1,165 @@
1
+ from scipy import stats
2
+ from regression_metrics import get_train_test_indices, ridge_regress
3
+ import numpy as np
4
+ from correlation_metrics import get_splithalves, spearmanbrown_correction
5
+
6
+ def average_data(f):
7
+ # calculate the number of reliable sites
8
+
9
+ # Define time points
10
+ time = np.arange(0, 260, 10) # Equivalent to MATLAB's 0:10:250
11
+
12
+ # Find the indices corresponding to 70 ms and 170 ms
13
+ start_idx = np.where(time == 70)[0][0]
14
+ end_idx = np.where(time == 170)[0][0] + 1 # Include the 170 ms point
15
+
16
+ # Average across the specified time range
17
+ averaged_rates = np.nanmean(f[start_idx:end_idx, :, :, :], axis=0)
18
+ return averaged_rates
19
+
20
+ def get_predictions_multioutput(responses, predictor, ncomp=10, nrfolds=10, seed=0, model=None, monkey=None):
21
+ nrImages, n_targets = responses.shape
22
+ ypred = np.full((nrImages, n_targets), np.nan)
23
+
24
+ for i in range(nrfolds):
25
+ train, test = get_train_test_indices(nrImages, nrfolds=nrfolds, foldnumber=i, seed=seed)
26
+ pred = ridge_regress(predictor[train, :], responses[train, :], predictor[test, :], model=model, monkey=monkey, fold=i)
27
+ ypred[test, :] = pred
28
+
29
+ return ypred
30
+
31
+ # Updated to handle multi-target
32
+ def get_all_preds(neurons_predicted, neurons_predictor, ncomp, model=None, monkey=None):
33
+ if len(neurons_predicted.shape) == 3:
34
+ mean_target = np.nanmean(neurons_predicted, axis=2) # shape: (n_images, n_target_neurons)
35
+ else:
36
+ mean_target = neurons_predicted
37
+
38
+ if len(neurons_predictor.shape) == 3:
39
+ mean_source = np.nanmean(neurons_predictor, axis=2) # shape: (n_images, n_source_neurons)
40
+ else:
41
+ mean_source = neurons_predictor
42
+ p = get_predictions_multioutput(mean_target, mean_source, ncomp=ncomp, model=model, monkey=monkey)
43
+ return p
44
+
45
+ def get_splithalf_corr(var, ax=1, type='spearman'):
46
+ _, _, split_mean1, split_mean2 = get_splithalves(var, ax=ax) # e.g., output shape (samples, neurons)
47
+
48
+ # Make sure the inputs are 2D
49
+ assert split_mean1.ndim == 2 and split_mean2.ndim == 2, "Split halves must be 2D"
50
+
51
+ correlations = []
52
+ for i in range(split_mean1.shape[1]): # iterate over neurons
53
+ x, y = split_mean1[:, i], split_mean2[:, i]
54
+ if type == 'spearman':
55
+ r, _ = stats.spearmanr(x, y)
56
+ else:
57
+ r, _ = stats.pearsonr(x, y)
58
+ correlations.append(r)
59
+
60
+ return {
61
+ 'split_half_corr': np.array(correlations),
62
+ 'type': type
63
+ }
64
+
65
+ def predictivity(x, y, rho_xx, rho_yy):
66
+ assert x.shape == y.shape, "Input and prediction shapes must match"
67
+ n_neurons = x.shape[1]
68
+
69
+ raw_corr = np.array([stats.pearsonr(x[:, i], y[:, i])[0] for i in range(n_neurons)])
70
+ denominator = np.sqrt(rho_xx * rho_yy)
71
+ corrected_raw_corr = raw_corr / denominator
72
+ ev = (corrected_raw_corr ** 2) * 100
73
+ return ev
74
+
75
+
76
+ def get_neural_neural_splithalfcorr(rate_predicted, rate_predictor, ncomp=10, nrfolds=10, seed=0):
77
+ # Split-half correlation of each predicted neuron
78
+ shc_predicted = get_splithalf_corr(rate_predicted, ax=2) # shape: (n_neurons,) or (n_neurons, n_neurons)
79
+ # Predict using split 1 and split 2 of the predictor
80
+ sp1_predictor, sp2_predictor, _, _ = get_splithalves(rate_predictor, ax=2)
81
+
82
+ p1 = get_predictions_multioutput(np.nanmean(rate_predicted, axis=2), np.nanmean(sp1_predictor, axis=2),
83
+ nrfolds=nrfolds, ncomp=ncomp, seed=seed)
84
+ p2 = get_predictions_multioutput(np.nanmean(rate_predicted, axis=2), np.nanmean(sp2_predictor, axis=2),
85
+ nrfolds=nrfolds, ncomp=ncomp, seed=seed)
86
+
87
+ prediction_shc = np.array([stats.pearsonr(p1[:, i], p2[:, i])[0] for i in range(p1.shape[1])])
88
+ prediction_shc = spearmanbrown_correction(prediction_shc)
89
+
90
+ mat = shc_predicted['split_half_corr']
91
+
92
+ if mat.ndim == 2 and mat.shape[0] == mat.shape[1]:
93
+ diag_vals = np.diag(mat)
94
+ else:
95
+ diag_vals = mat
96
+
97
+ neuron_shc = spearmanbrown_correction(diag_vals)
98
+
99
+ return prediction_shc, neuron_shc
100
+
101
+ def get_neural_model_splithalfcorr(model_features, rate, ncomp=10, nrfolds=10, seed=0):
102
+ """
103
+ model_features: shape (n_images, n_model_units) - deterministic
104
+ rate: shape (n_images, n_neurons, n_repeats) - noisy
105
+ """
106
+ sp1, sp2, _, _ = get_splithalves(rate, ax=2) # split neural responses along repetitions
107
+
108
+ # Predict each split of neural data from fixed model features
109
+ p1 = get_predictions_multioutput(model_features, np.nanmean(sp1, axis=2), nrfolds=nrfolds, ncomp=ncomp, seed=seed)
110
+ p2 = get_predictions_multioutput(model_features, np.nanmean(sp2, axis=2), nrfolds=nrfolds, ncomp=ncomp, seed=seed)
111
+
112
+ # Compute split-half correlation per neuron
113
+ corr = np.array([stats.pearsonr(p1[:, i], p2[:, i])[0] for i in range(p1.shape[1])])
114
+ model_shc = spearmanbrown_correction(corr)
115
+
116
+ return model_shc, 1.0
117
+
118
+ def get_model_neural_splithalfcorr(rate, model_features, ncomp=10, nrfolds=10, seed=0):
119
+ """
120
+ Predict noisy neural responses from model features.
121
+ - rate: shape (images, neurons, repeats)
122
+ - model_features: shape (images, model_units)
123
+ """
124
+ # Split the rate data along the repetition axis
125
+ sp1, sp2, _, _ = get_splithalves(rate, ax=2)
126
+
127
+ # Compute SHC for the neural rate
128
+ shc = get_splithalf_corr(rate, ax=2)
129
+
130
+ # Model predictions from averaged neural splits
131
+ target_sp1 = np.nanmean(sp1, axis=2) # (images, neurons)
132
+ target_sp2 = np.nanmean(sp2, axis=2) # (images, neurons)
133
+
134
+ # Predict both splits from the model features
135
+ p1 = get_predictions_multioutput(target_sp1, model_features, nrfolds=nrfolds, ncomp=ncomp, seed=seed)
136
+ p2 = get_predictions_multioutput(target_sp2, model_features, nrfolds=nrfolds, ncomp=ncomp, seed=seed)
137
+
138
+ # Compute split-half correlation of model predictions per neuron
139
+ model_shc = np.array([stats.pearsonr(p1[:, i], p2[:, i])[0] for i in range(p1.shape[1])])
140
+ model_shc = spearmanbrown_correction(model_shc)
141
+
142
+ neural_shc = spearmanbrown_correction(shc['split_half_corr'])
143
+
144
+ return model_shc, neural_shc
145
+
146
+ def get_all_stats(p, neurons_predicted, neurons_predictor, ncomp):
147
+ if len(neurons_predicted.shape) == 3:
148
+ mean_target = np.nanmean(neurons_predicted, axis=2) # shape: (n_images, n_target_neurons)
149
+ else:
150
+ mean_target = neurons_predicted
151
+
152
+ if len(neurons_predicted.shape) == 3 and len(neurons_predictor.shape) == 3:
153
+ mshc, nshc = get_neural_neural_splithalfcorr(neurons_predicted, neurons_predictor, ncomp=ncomp)
154
+
155
+ if len(neurons_predicted.shape) == 2 and len(neurons_predictor.shape) == 3:
156
+ mshc, nshc = get_neural_model_splithalfcorr(neurons_predicted, neurons_predictor, ncomp=ncomp)
157
+
158
+ if len(neurons_predicted.shape) == 3 and len(neurons_predictor.shape) == 2:
159
+ mshc, nshc = get_model_neural_splithalfcorr(neurons_predicted, neurons_predictor, ncomp=ncomp)
160
+
161
+ if len(neurons_predicted.shape) == 2 and len(neurons_predictor.shape) == 2:
162
+ mshc, nshc = 1.0, 1.0
163
+
164
+ ev = predictivity(mean_target, p, nshc, mshc) # Now p and mean_target are 2D
165
+ return ev
@@ -0,0 +1,79 @@
1
+ import numpy as np
2
+ from sklearn import linear_model
3
+
4
+ def ridge_regress(X_train, Y_train, X_test, model=None, monkey=None, fold=None):
5
+ """
6
+ Parameters
7
+ ----------
8
+ X_train : TYPE
9
+ DESCRIPTION.
10
+ Y_train : TYPE
11
+ DESCRIPTION.
12
+ X_test : TYPE
13
+ DESCRIPTION.
14
+
15
+ Returns
16
+ -------
17
+ Y_test_pred : TYPE
18
+ DESCRIPTION.
19
+
20
+ """
21
+
22
+ clf = linear_model.Ridge(alpha=0.1)
23
+ clf.fit(X_train, Y_train)
24
+ Y_test_pred = clf.predict(X_test)
25
+
26
+ if model is not None:
27
+ # Save the weights for later use
28
+ np.save(f'./results_for_figures/model2monkey/{model}_to_{monkey}_ridge_weights_{fold}.npy', clf.coef_)
29
+
30
+ return Y_test_pred
31
+
32
+ def get_train_test_indices(totalIndices, nrfolds=10, foldnumber=0, seed=1):
33
+ """
34
+
35
+
36
+ Parameters
37
+ ----------
38
+ totalIndices : TYPE
39
+ DESCRIPTION.
40
+ nrfolds : TYPE, optional
41
+ DESCRIPTION. The default is 10.
42
+ foldnumber : TYPE, optional
43
+ DESCRIPTION. The default is 0.
44
+ seed : TYPE, optional
45
+ DESCRIPTION. The default is 1.
46
+
47
+ Returns
48
+ -------
49
+ train_indices : TYPE
50
+ DESCRIPTION.
51
+ test_indices : TYPE
52
+ DESCRIPTION.
53
+
54
+ """
55
+
56
+ np.random.seed(seed)
57
+ inds = np.arange(totalIndices)
58
+ np.random.shuffle(inds)
59
+ splits = np.array_split(inds, nrfolds)
60
+ test_indices = inds[np.isin(inds, splits[foldnumber])]
61
+ train_indices = inds[np.logical_not(np.isin(inds, test_indices))]
62
+ return train_indices, test_indices
63
+
64
+
65
+ def main():
66
+ if __name__ == "__main__":
67
+ main()
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+