cccpm 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cccpm/__init__.py +1 -0
- cccpm/cpm_analysis.py +272 -0
- cccpm/edge_selection.py +271 -0
- cccpm/fold.py +46 -0
- cccpm/logging.py +37 -0
- cccpm/models.py +148 -0
- cccpm/more_models.py +205 -0
- cccpm/reporting/__init__.py +1 -0
- cccpm/reporting/assets/CCCPM.png +0 -0
- cccpm/reporting/html_report.py +363 -0
- cccpm/reporting/plots/__init__.py +0 -0
- cccpm/reporting/plots/chord_v2.py +821 -0
- cccpm/reporting/plots/cpm_chord_plot.py +149 -0
- cccpm/reporting/plots/plots.py +337 -0
- cccpm/reporting/plots/utils.py +19 -0
- cccpm/reporting/reporting_utils.py +124 -0
- cccpm/results_manager.py +463 -0
- cccpm/scoring.py +40 -0
- cccpm/simulation/__init__.py +0 -0
- cccpm/simulation/simulate_multivariate.py +252 -0
- cccpm/simulation/simulate_sem.py +319 -0
- cccpm/simulation/simulate_simple.py +37 -0
- cccpm/utils.py +386 -0
- cccpm-0.2.1.dist-info/METADATA +105 -0
- cccpm-0.2.1.dist-info/RECORD +26 -0
- cccpm-0.2.1.dist-info/WHEEL +4 -0
cccpm/models.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sklearn.linear_model import LinearRegression
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class NetworkDict(dict):
|
|
6
|
+
def __init__(self):
|
|
7
|
+
super().__init__(self)
|
|
8
|
+
self.update({'positive': {}, 'negative': {}, 'both': {}})
|
|
9
|
+
|
|
10
|
+
@staticmethod
|
|
11
|
+
def n_networks():
|
|
12
|
+
return len(NetworkDict().keys())
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ModelDict(dict):
|
|
16
|
+
def __init__(self):
|
|
17
|
+
super().__init__(self)
|
|
18
|
+
self.update({'connectome': {}, 'covariates': {}, 'full': {}, 'residuals': {}})
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def n_models():
|
|
22
|
+
return len(ModelDict().keys())
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class LinearCPMModel:
|
|
26
|
+
"""
|
|
27
|
+
Linear Connectome-based Predictive Modeling (CPM) implementation.
|
|
28
|
+
|
|
29
|
+
This class implements a linear CPM model, allowing for fitting and prediction
|
|
30
|
+
based on connectome data, covariates, and residuals.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
models : ModelDict
|
|
35
|
+
A dictionary containing the fitted models for different networks and data types
|
|
36
|
+
(connectome, covariates, residuals, and full model).
|
|
37
|
+
models_residuals : dict
|
|
38
|
+
A dictionary storing linear regression models used to calculate residuals
|
|
39
|
+
for connectome data, controlling for covariates.
|
|
40
|
+
edges : dict
|
|
41
|
+
A dictionary defining the edges (features) used for each network (e.g., 'positive', 'negative').
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
edges : dict
|
|
46
|
+
Dictionary containing indices of edges for 'positive' and 'negative' networks.
|
|
47
|
+
"""
|
|
48
|
+
name = "LinearCPMModel"
|
|
49
|
+
def __init__(self, edges):
|
|
50
|
+
"""
|
|
51
|
+
Initialize the LinearCPMModel.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
edges : dict
|
|
56
|
+
Dictionary containing indices of edges for 'positive' and 'negative' networks.
|
|
57
|
+
"""
|
|
58
|
+
self.models = ModelDict()
|
|
59
|
+
self.models_residuals = {}
|
|
60
|
+
self.edges = edges
|
|
61
|
+
|
|
62
|
+
def fit(self, X, y, covariates):
|
|
63
|
+
"""
|
|
64
|
+
Fit the CPM model.
|
|
65
|
+
|
|
66
|
+
This method fits multiple linear regression models for the connectome, covariates,
|
|
67
|
+
residuals, and full model using the provided data.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
X : numpy.ndarray
|
|
72
|
+
A 2D array of shape (n_samples, n_features) representing the connectome data.
|
|
73
|
+
y : numpy.ndarray
|
|
74
|
+
A 1D array of shape (n_samples,) representing the target variable.
|
|
75
|
+
covariates : numpy.ndarray
|
|
76
|
+
A 2D array of shape (n_samples, n_covariates) representing the covariates.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
LinearCPMModel
|
|
81
|
+
The fitted CPM model instance.
|
|
82
|
+
"""
|
|
83
|
+
connectome = {}
|
|
84
|
+
residuals = {}
|
|
85
|
+
for network in ['positive', 'negative']:
|
|
86
|
+
# Compute sum_positive and sum_negative
|
|
87
|
+
connectome[network] = np.sum(X[:, self.edges[network]], axis=1).reshape(-1, 1)
|
|
88
|
+
self.models_residuals[network] = LinearRegression().fit(covariates, connectome[network])
|
|
89
|
+
residuals[network] = connectome[network] - self.models_residuals[network].predict(covariates)
|
|
90
|
+
|
|
91
|
+
residuals['both'] = np.hstack((residuals['positive'], residuals['negative']))
|
|
92
|
+
connectome['both'] = np.hstack((connectome['positive'], connectome['negative']))
|
|
93
|
+
|
|
94
|
+
for network in NetworkDict().keys():
|
|
95
|
+
self.models['connectome'][network] = LinearRegression().fit(connectome[network], y)
|
|
96
|
+
self.models['covariates'][network] = LinearRegression().fit(covariates, y)
|
|
97
|
+
self.models['residuals'][network] = LinearRegression().fit(residuals[network], y)
|
|
98
|
+
self.models['full'][network] = LinearRegression().fit(np.hstack((connectome[network], covariates)), y)
|
|
99
|
+
|
|
100
|
+
return self
|
|
101
|
+
|
|
102
|
+
def predict(self, X, covariates):
|
|
103
|
+
"""
|
|
104
|
+
Predict using the fitted CPM model.
|
|
105
|
+
|
|
106
|
+
This method generates predictions for the target variable using the
|
|
107
|
+
connectome, covariates, residuals, and full models.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
X : numpy.ndarray
|
|
112
|
+
A 2D array of shape (n_samples, n_features) representing the connectome data.
|
|
113
|
+
covariates : numpy.ndarray
|
|
114
|
+
A 2D array of shape (n_samples, n_covariates) representing the covariates.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
ModelDict
|
|
119
|
+
A dictionary containing predictions for each network and model type
|
|
120
|
+
(connectome, covariates, residuals, and full model).
|
|
121
|
+
"""
|
|
122
|
+
connectome = {}
|
|
123
|
+
residuals = {}
|
|
124
|
+
for network in ['positive', 'negative']:
|
|
125
|
+
# Compute sum_positive and sum_negative
|
|
126
|
+
connectome[network] = np.sum(X[:, self.edges[network]], axis=1).reshape(-1, 1)
|
|
127
|
+
residuals[network] = connectome[network] - self.models_residuals[network].predict(covariates)
|
|
128
|
+
|
|
129
|
+
residuals['both'] = np.hstack((residuals['positive'], residuals['negative']))
|
|
130
|
+
connectome['both'] = np.hstack((connectome['positive'], connectome['negative']))
|
|
131
|
+
|
|
132
|
+
predictions = ModelDict()
|
|
133
|
+
for network in ['positive', 'negative', 'both']:
|
|
134
|
+
predictions['connectome'][network] = self.models['connectome'][network].predict(connectome[network])
|
|
135
|
+
predictions['covariates'][network] = self.models['covariates'][network].predict(covariates)
|
|
136
|
+
predictions['residuals'][network] = self.models['residuals'][network].predict(residuals[network])
|
|
137
|
+
predictions['full'][network] = self.models['full'][network].predict(np.hstack((connectome[network], covariates)))
|
|
138
|
+
|
|
139
|
+
return predictions
|
|
140
|
+
|
|
141
|
+
def get_network_strengths(self, X, covariates):
|
|
142
|
+
connectome = {}
|
|
143
|
+
residuals = {}
|
|
144
|
+
for network in ['positive', 'negative']:
|
|
145
|
+
# Compute sum_positive and sum_negative
|
|
146
|
+
connectome[network] = np.sum(X[:, self.edges[network]], axis=1).reshape(-1, 1)
|
|
147
|
+
residuals[network] = connectome[network] - self.models_residuals[network].predict(covariates)
|
|
148
|
+
return {"connectome": connectome, "residuals": residuals}
|
cccpm/more_models.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Dict, Any
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from sklearn.linear_model import LinearRegression
|
|
6
|
+
from sklearn.tree import DecisionTreeRegressor
|
|
7
|
+
from sklearn.ensemble import RandomForestRegressor
|
|
8
|
+
|
|
9
|
+
from pygam import LinearGAM
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class NetworkDict(dict):
|
|
13
|
+
def __init__(self):
|
|
14
|
+
super().__init__(self)
|
|
15
|
+
self.update({'positive': {}, 'negative': {}, 'both': {}})
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def n_networks():
|
|
19
|
+
return len(NetworkDict().keys())
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ModelDict(dict):
|
|
23
|
+
def __init__(self):
|
|
24
|
+
super().__init__(self)
|
|
25
|
+
self.update({'connectome': {}, 'covariates': {}, 'full': {}, 'residuals': {}})
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def n_models():
|
|
29
|
+
return len(ModelDict().keys())
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class BaseCPMModel(ABC):
|
|
33
|
+
"""
|
|
34
|
+
Base Connectome-based Predictive Modeling (CPM) class.
|
|
35
|
+
|
|
36
|
+
Subclasses only need to define:
|
|
37
|
+
- fit_model(self, X, y) -> fitted_estimator
|
|
38
|
+
- predict_model(self, fitted_estimator, X) -> y_pred
|
|
39
|
+
"""
|
|
40
|
+
name = 'BaseCPMModel'
|
|
41
|
+
def __init__(self, edges: Dict[str, np.ndarray]):
|
|
42
|
+
"""
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
edges : dict
|
|
46
|
+
Dictionary containing indices of edges for 'positive' and 'negative' networks.
|
|
47
|
+
"""
|
|
48
|
+
self.models = ModelDict()
|
|
49
|
+
self.models_residuals: Dict[str, Any] = {}
|
|
50
|
+
self.edges = edges
|
|
51
|
+
|
|
52
|
+
# --- Methods to override in subclasses ---
|
|
53
|
+
@abstractmethod
|
|
54
|
+
def fit_model(self, X: np.ndarray, y: np.ndarray):
|
|
55
|
+
"""Return a fitted estimator for the provided X, y."""
|
|
56
|
+
raise NotImplementedError
|
|
57
|
+
|
|
58
|
+
# --- CPM logic (unchanged across algorithms) ---
|
|
59
|
+
def fit(self, X: np.ndarray, y: np.ndarray, covariates: np.ndarray):
|
|
60
|
+
"""
|
|
61
|
+
Fit CPM models for connectome, covariates, residuals, and full variants.
|
|
62
|
+
Residualization of connectome on covariates is kept linear (as in your code).
|
|
63
|
+
"""
|
|
64
|
+
connectome = {}
|
|
65
|
+
residuals = {}
|
|
66
|
+
network_strengths = {}
|
|
67
|
+
|
|
68
|
+
# Compute network strengths and residualize vs covariates
|
|
69
|
+
for network in ['positive', 'negative']:
|
|
70
|
+
network_strengths[network] = np.sum(X[:, self.edges[network]], axis=1).reshape(-1, 1)
|
|
71
|
+
self.models_residuals[network] = LinearRegression().fit(covariates, network_strengths[network])
|
|
72
|
+
residuals[network] = network_strengths[network] - self.models_residuals[network].predict(covariates)
|
|
73
|
+
if self.edges[network].shape[0] == 0:
|
|
74
|
+
connectome[network] = np.zeros((X.shape[0], 1))
|
|
75
|
+
else:
|
|
76
|
+
connectome[network] = X[:, self.edges[network]]
|
|
77
|
+
|
|
78
|
+
# Combine networks
|
|
79
|
+
residuals['both'] = np.hstack((residuals['positive'], residuals['negative']))
|
|
80
|
+
network_strengths['both'] = np.hstack((network_strengths['positive'], network_strengths['negative']))
|
|
81
|
+
if np.concatenate([X[:, self.edges['positive']], X[:, self.edges['negative']]], axis=1).shape[1] == 0:
|
|
82
|
+
connectome['both'] = np.zeros((X.shape[0], 1))
|
|
83
|
+
else:
|
|
84
|
+
connectome['both'] = np.concatenate([X[:, self.edges['positive']], X[:, self.edges['negative']]], axis=1)
|
|
85
|
+
|
|
86
|
+
# Fit per-network, per-variant models using subclass algorithm
|
|
87
|
+
for network in NetworkDict().keys():
|
|
88
|
+
self.models['connectome'][network] = self.fit_model(connectome[network], y)
|
|
89
|
+
self.models['covariates'][network] = LinearRegression().fit(covariates, y)
|
|
90
|
+
self.models['residuals'][network] = LinearRegression().fit(residuals[network], y)
|
|
91
|
+
self.models['full'][network] = self.fit_model(
|
|
92
|
+
np.concatenate([connectome[network], covariates], axis=1), y
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
return self
|
|
96
|
+
|
|
97
|
+
def predict(self, X: np.ndarray, covariates: np.ndarray) -> ModelDict:
|
|
98
|
+
"""
|
|
99
|
+
Predict y for connectome, covariates, residuals, and full models.
|
|
100
|
+
"""
|
|
101
|
+
connectome = {}
|
|
102
|
+
residuals = {}
|
|
103
|
+
network_strengths = {}
|
|
104
|
+
|
|
105
|
+
for network in ['positive', 'negative']:
|
|
106
|
+
network_strengths[network] = np.sum(X[:, self.edges[network]], axis=1).reshape(-1, 1)
|
|
107
|
+
residuals[network] = network_strengths[network] - self.models_residuals[network].predict(covariates)
|
|
108
|
+
if self.edges[network].shape[0] == 0:
|
|
109
|
+
connectome[network] = np.zeros((X.shape[0], 1))
|
|
110
|
+
else:
|
|
111
|
+
connectome[network] = X[:, self.edges[network]]
|
|
112
|
+
|
|
113
|
+
residuals['both'] = np.hstack((residuals['positive'], residuals['negative']))
|
|
114
|
+
network_strengths['both'] = np.hstack((network_strengths['positive'], network_strengths['negative']))
|
|
115
|
+
if np.concatenate([X[:, self.edges['positive']], X[:, self.edges['negative']]], axis=1).shape[1] == 0:
|
|
116
|
+
connectome['both'] = np.zeros((X.shape[0], 1))
|
|
117
|
+
else:
|
|
118
|
+
connectome['both'] = np.concatenate([X[:, self.edges['positive']], X[:, self.edges['negative']]], axis=1)
|
|
119
|
+
|
|
120
|
+
predictions = ModelDict()
|
|
121
|
+
for network in ['positive', 'negative', 'both']:
|
|
122
|
+
predictions['connectome'][network] = self.predict_model(self.models['connectome'][network],
|
|
123
|
+
connectome[network])
|
|
124
|
+
predictions['covariates'][network] = self.models['covariates'][network].predict(covariates)
|
|
125
|
+
predictions['residuals'][network] = self.models['residuals'][network].predict(residuals[network])
|
|
126
|
+
predictions['full'][network] = self.predict_model(self.models['full'][network],
|
|
127
|
+
np.concatenate([connectome[network], covariates], axis=1))
|
|
128
|
+
return predictions
|
|
129
|
+
|
|
130
|
+
def predict_model(self, model, X: np.ndarray) -> np.ndarray:
|
|
131
|
+
return model.predict(X)
|
|
132
|
+
|
|
133
|
+
def get_network_strengths(self, X: np.ndarray, covariates: np.ndarray):
|
|
134
|
+
connectome = {}
|
|
135
|
+
residuals = {}
|
|
136
|
+
for network in ['positive', 'negative']:
|
|
137
|
+
connectome[network] = np.sum(X[:, self.edges[network]], axis=1).reshape(-1, 1)
|
|
138
|
+
residuals[network] = connectome[network] - self.models_residuals[network].predict(covariates)
|
|
139
|
+
return {"connectome": connectome, "residuals": residuals}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# --- Concrete CPMs that only override the modeling bits ---
|
|
143
|
+
|
|
144
|
+
class LinearCPMModel(BaseCPMModel):
|
|
145
|
+
"""
|
|
146
|
+
CPM using ordinary least squares for the predictive pieces.
|
|
147
|
+
"""
|
|
148
|
+
def __init__(self, edges: Dict[str, np.ndarray], **linreg_kwargs):
|
|
149
|
+
super().__init__(edges)
|
|
150
|
+
self.linreg_kwargs = linreg_kwargs
|
|
151
|
+
self.name = 'LinearCPMModel'
|
|
152
|
+
|
|
153
|
+
def fit_model(self, X: np.ndarray, y: np.ndarray):
|
|
154
|
+
return LinearRegression(**self.linreg_kwargs).fit(X, y)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class DecisionTreeCPMModel(BaseCPMModel):
|
|
158
|
+
"""
|
|
159
|
+
CPM using a DecisionTreeRegressor for the predictive pieces.
|
|
160
|
+
"""
|
|
161
|
+
name = 'DecisionTreeCPMModel'
|
|
162
|
+
def __init__(self, edges: Dict[str, np.ndarray], **tree_kwargs):
|
|
163
|
+
super().__init__(edges)
|
|
164
|
+
# Sensible defaults; can be overridden via **tree_kwargs
|
|
165
|
+
defaults = dict(random_state=0)
|
|
166
|
+
defaults.update(tree_kwargs)
|
|
167
|
+
self.tree_kwargs = defaults
|
|
168
|
+
|
|
169
|
+
def fit_model(self, X: np.ndarray, y: np.ndarray):
|
|
170
|
+
return DecisionTreeRegressor(**self.tree_kwargs).fit(X, y)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class RandomForestCPMModel(BaseCPMModel):
|
|
174
|
+
"""
|
|
175
|
+
CPM using a RandomForestRegressor for the predictive pieces.
|
|
176
|
+
"""
|
|
177
|
+
name = 'RandomForestCPMModel'
|
|
178
|
+
def __init__(self, edges, **rf_kwargs):
|
|
179
|
+
super().__init__(edges)
|
|
180
|
+
# Sensible defaults; you can override via **rf_kwargs
|
|
181
|
+
defaults = dict(
|
|
182
|
+
n_estimators=50,
|
|
183
|
+
random_state=0,
|
|
184
|
+
n_jobs=-1,
|
|
185
|
+
)
|
|
186
|
+
defaults.update(rf_kwargs)
|
|
187
|
+
self.rf_kwargs = defaults
|
|
188
|
+
|
|
189
|
+
def fit_model(self, X, y):
|
|
190
|
+
return RandomForestRegressor(**self.rf_kwargs).fit(X, y)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class GAMCPMModel(BaseCPMModel):
|
|
194
|
+
name = 'GAMCPMModel'
|
|
195
|
+
def __init__(self, edges, **gam_kwargs):
|
|
196
|
+
super().__init__(edges)
|
|
197
|
+
# Sensible defaults; you can override via **rf_kwargs
|
|
198
|
+
defaults = dict(
|
|
199
|
+
|
|
200
|
+
)
|
|
201
|
+
defaults.update(gam_kwargs)
|
|
202
|
+
self.gam_kwargs = defaults
|
|
203
|
+
|
|
204
|
+
def fit_model(self, X, y):
|
|
205
|
+
return LinearGAM(**self.gam_kwargs).fit(X, y)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .html_report import HTMLReporter
|
|
Binary file
|