@yibeichan/claude-skills 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +98 -0
  3. package/cli.js +272 -0
  4. package/install.py +240 -0
  5. package/package.json +44 -0
  6. package/skills/bidsapp-nidm-standards/SKILL.md +202 -0
  7. package/skills/bidsapp-nidm-standards/references/babs_config.md +20 -0
  8. package/skills/bidsapp-nidm-standards/references/cli_arguments.md +76 -0
  9. package/skills/bidsapp-nidm-standards/references/container_patterns.md +53 -0
  10. package/skills/bidsapp-nidm-standards/references/nidm_integration.md +403 -0
  11. package/skills/bidsapp-nidm-standards/references/repo_structure.md +121 -0
  12. package/skills/bidsapp-nidm-standards/references/testing_patterns.md +82 -0
  13. package/skills/dicom2fmriprep/SKILL.md +377 -0
  14. package/skills/dicom2fmriprep/evals/evals.json +26 -0
  15. package/skills/dicom2fmriprep/references/babs-details.md +407 -0
  16. package/skills/dicom2fmriprep/references/fmriprep-details.md +250 -0
  17. package/skills/dicom2fmriprep/references/heudiconv-details.md +243 -0
  18. package/skills/fmri-ssm/SKILL.md +317 -0
  19. package/skills/fmri-ssm/references/code_templates.md +1570 -0
  20. package/skills/fmri-ssm/references/downstream_analysis.md +680 -0
  21. package/skills/fmri-ssm/references/group_inference.md +608 -0
  22. package/skills/fmri-ssm/references/hrf_modeling.md +447 -0
  23. package/skills/fmri-ssm/references/model_catalog.md +436 -0
  24. package/skills/fmri-ssm/references/paradigm_guide.md +406 -0
  25. package/skills/fmri-ssm/references/preprocessing.md +614 -0
  26. package/skills/fmri-ssm.zip +0 -0
  27. package/skills/neuroimaging-qc/SKILL.md +203 -0
  28. package/skills/neuroimaging-qc/references/eeg_qc.md +400 -0
  29. package/skills/neuroimaging-qc/references/fmri_qc.md +343 -0
  30. package/skills/neuroimaging-qc/references/fnirs_qc.md +430 -0
  31. package/skills/neuroimaging-qc/references/structural_qc.md +454 -0
  32. package/skills/neuroimaging-qc/scripts/parse_fmriprep_confounds.py +153 -0
  33. package/skills/neuroimaging-qc/scripts/parse_mriqc.py +114 -0
  34. package/skills/neuroimaging-qc/scripts/qc_report.py +295 -0
  35. package/skills/scientific-writer/SKILL.md +202 -0
  36. package/skills/scientific-writer/references/citation_styles.md +163 -0
  37. package/skills/scientific-writer/references/field_conventions.md +245 -0
  38. package/skills/scientific-writer/references/figures_tables.md +225 -0
  39. package/skills/scientific-writer/references/reporting_guidelines.md +225 -0
  40. package/skills.json +54 -0
@@ -0,0 +1,680 @@
1
+ # Downstream Analysis: Behavioral Correlates and Reporting
2
+
3
+ ## Table of Contents
4
+ 1. [From SSM Metrics to Neuroscience](#from-metrics)
5
+ 2. [Correlating SSM Metrics with Behavior](#behavioral-correlates)
6
+ 3. [Mixed-Effects Models with SSM Metrics](#mixed-effects)
7
+ 4. [Decoding Behavior from State Sequences](#decoding)
8
+ 5. [Simulation and Recovery Testing](#simulation)
9
+ 6. [Reporting Checklist (Methods + Results)](#reporting)
10
+ 7. [Required Figures](#figures)
11
+
12
+ ---
13
+
14
+ ## 1. From SSM Metrics to Neuroscience {#from-metrics}
15
+
16
+ Fitting an SSM is not the end of the analysis — it produces a set of per-subject metrics
17
+ that must then be related to behavior, cognition, or clinical variables.
18
+
19
+ **The SSM metrics you typically carry forward:**
20
+
21
+ | Metric | Shape | Meaning |
22
+ |--------|-------|---------|
23
+ | Fractional occupancy (FO) | (n_subjects, K) | Proportion of time each subject spends in each state |
24
+ | Mean dwell time | (n_subjects, K) | Average duration of state visits in seconds |
25
+ | Transition probability | (n_subjects, K, K) | A[i,j]: probability of moving from state i to state j |
26
+ | Transition rate | (n_subjects,) | Total transitions per minute — measure of flexibility |
27
+ | Switching entropy | (n_subjects,) | Entropy of the transition matrix — higher = more random switching |
28
+ | State-specific FC | (n_subjects, K, p, p) | Functional connectivity pattern per state per subject |
29
+
30
+ **Critical design decision before analysis:**
31
+ Decide which metrics are your primary outcomes (pre-register if possible). Running all metrics
32
+ and reporting only significant ones inflates false-positive rate severely — each K-state model
33
+ produces 2K + K² + 1 metrics per subject.
34
+
35
+ ---
36
+
37
+ ## 2. Correlating SSM Metrics with Behavior {#behavioral-correlates}
38
+
39
+ ### 2a. Simple correlation with behavioral outcome
40
+
41
+ ```python
42
+ """Correlate per-subject SSM metrics with behavioral outcomes.
43
+
44
+ Behavioral outcomes: reaction time, accuracy, questionnaire scores (anxiety,
45
+ depression, IQ), or clinical severity scores.
46
+ """
47
+ import numpy as np
48
+ from scipy import stats
49
+ from statsmodels.stats.multitest import multipletests
50
+
51
+
52
+ def correlate_metrics_with_behavior(ssm_metrics, behavior, K,
53
+ metric='fractional_occupancy',
54
+ correction='fdr_bh', alpha=0.05):
55
+ """Spearman correlation between SSM metrics and a behavioral variable.
56
+
57
+ Parameters
58
+ ----------
59
+ ssm_metrics : dict
60
+ Output of aggregate_metrics() from group_inference.md.
61
+ Keys are subject IDs; values have 'fractional_occupancy', 'mean_dwell_time', etc.
62
+ behavior : dict
63
+ {subject_id: behavioral_value} — e.g., mean RT, anxiety score.
64
+ Must cover the same subjects as ssm_metrics.
65
+ K : int
66
+ Number of states
67
+ metric : str
68
+ 'fractional_occupancy' or 'mean_dwell_time'
69
+ correction : str
70
+ Multiple comparison correction: 'fdr_bh', 'bonferroni', or 'none'
71
+
72
+ Returns
73
+ -------
74
+ results : dict keyed by state index with r, p, p_corrected, significant
75
+ """
76
+ sub_ids = [s for s in ssm_metrics if s in behavior]
77
+ beh_values = np.array([behavior[s] for s in sub_ids])
78
+
79
+ raw_p = []
80
+ raw_r = []
81
+
82
+ for k in range(K):
83
+ if metric == 'fractional_occupancy':
84
+ metric_values = np.array([ssm_metrics[s]['fractional_occupancy'][k]
85
+ for s in sub_ids])
86
+ elif metric == 'mean_dwell_time':
87
+ metric_values = np.array([ssm_metrics[s]['mean_dwell_time'][k]
88
+ for s in sub_ids])
89
+
90
+ r, p = stats.spearmanr(metric_values, beh_values)
91
+ raw_r.append(r)
92
+ raw_p.append(p)
93
+
94
+ if correction != 'none':
95
+ reject, p_corrected, _, _ = multipletests(raw_p, method=correction, alpha=alpha)
96
+ else:
97
+ p_corrected = raw_p
98
+ reject = [p < alpha for p in raw_p]
99
+
100
+ results = {}
101
+ for k in range(K):
102
+ results[k] = {
103
+ 'r': raw_r[k],
104
+ 'p': raw_p[k],
105
+ 'p_corrected': p_corrected[k],
106
+ 'significant': reject[k],
107
+ }
108
+ sig = '*' if reject[k] else ''
109
+ print(f"State {k}: r={raw_r[k]:.3f}, p={raw_p[k]:.4f}, "
110
+ f"p_corrected={p_corrected[k]:.4f} {sig}")
111
+
112
+ return results
113
+
114
+
115
+ def correlate_transition_with_behavior(ssm_metrics, behavior, K,
116
+ correction='fdr_bh'):
117
+ """Correlate each transition probability A[i,j] with behavior.
118
+
119
+ The transition matrix has K*(K-1) off-diagonal elements (the diagonals
120
+ are determined by the rest). Only test off-diagonal elements.
121
+ """
122
+ sub_ids = [s for s in ssm_metrics if s in behavior]
123
+ beh_values = np.array([behavior[s] for s in sub_ids])
124
+
125
+ pairs = [(i, j) for i in range(K) for j in range(K) if i != j]
126
+ raw_r, raw_p = [], []
127
+
128
+ for (i, j) in pairs:
129
+ trans_values = np.array([ssm_metrics[s]['transition_matrix'][i, j]
130
+ for s in sub_ids])
131
+ r, p = stats.spearmanr(trans_values, beh_values)
132
+ raw_r.append(r)
133
+ raw_p.append(p)
134
+
135
+ _, p_corrected, _, _ = multipletests(raw_p, method=correction)
136
+
137
+ results = {}
138
+ for idx, (i, j) in enumerate(pairs):
139
+ results[(i, j)] = {'r': raw_r[idx], 'p': raw_p[idx],
140
+ 'p_corrected': p_corrected[idx]}
141
+
142
+ return results
143
+ ```
144
+
145
+ ### 2b. Controlling for confounds (partial correlation)
146
+
147
+ Always control for head motion and scan length — both correlate with SSM metrics and
148
+ are not of scientific interest.
149
+
150
+ ```python
151
+ from sklearn.linear_model import LinearRegression
152
+
153
+
154
+ def partial_correlate(ssm_metric_values, behavior_values, confound_matrix):
155
+ """Compute partial Spearman correlation between metric and behavior,
156
+ after removing variance explained by confounds from both variables.
157
+
158
+ Parameters
159
+ ----------
160
+ ssm_metric_values : array, shape (n_subjects,)
161
+ behavior_values : array, shape (n_subjects,)
162
+ confound_matrix : array, shape (n_subjects, n_confounds)
163
+ Typical confounds: mean FD, age, sex (one-hot), scan length
164
+
165
+ Returns
166
+ -------
167
+ r_partial, p_partial : float
168
+ """
169
+ def residualize(y, X):
170
+ reg = LinearRegression().fit(X, y)
171
+ return y - reg.predict(X)
172
+
173
+ metric_resid = residualize(ssm_metric_values, confound_matrix)
174
+ behav_resid = residualize(behavior_values, confound_matrix)
175
+
176
+ r, p = stats.spearmanr(metric_resid, behav_resid)
177
+ return r, p
178
+
179
+
180
+ # Example: control for mean FD and age
181
+ # confounds = np.column_stack([mean_fd_per_subject, age_per_subject])
182
+ # r, p = partial_correlate(frac_occ[:, k], rt_values, confounds)
183
+ ```
184
+
185
+ ### 2c. Group difference in metrics (patient vs. control)
186
+
187
+ See `group_inference.md §6` for the full `compare_groups()` function.
188
+ Key principle: always report effect size (Cohen's d) alongside p-value.
189
+
190
+ ```python
191
+ def cohens_d(group1_values, group2_values):
192
+ """Cohen's d effect size for two independent groups."""
193
+ n1, n2 = len(group1_values), len(group2_values)
194
+ pooled_std = np.sqrt(
195
+ ((n1 - 1) * np.var(group1_values, ddof=1) +
196
+ (n2 - 1) * np.var(group2_values, ddof=1)) / (n1 + n2 - 2)
197
+ )
198
+ return (np.mean(group1_values) - np.mean(group2_values)) / pooled_std
199
+ ```
200
+
201
+ ---
202
+
203
+ ## 3. Mixed-Effects Models with SSM Metrics {#mixed-effects}
204
+
205
+ For within-subject designs (multiple conditions per subject) or multi-site data,
206
+ a linear mixed-effects model is more appropriate than a simple correlation.
207
+
208
+ ```python
209
+ """Linear mixed-effects model: SSM metric ~ condition + confounds + (1|subject).
210
+
211
+ Example: N-back task — does fractional occupancy of a 'task-engaged' state
212
+ differ between 0-back and 2-back conditions?
213
+ """
214
+ import pandas as pd
215
+ import statsmodels.formula.api as smf
216
+
217
+
218
+ def lme_ssm_by_condition(ssm_metrics_per_run, behavioral_df, K):
219
+ """Fit LME predicting SSM metric from condition, with random subject intercept.
220
+
221
+ Parameters
222
+ ----------
223
+ ssm_metrics_per_run : list of dicts
224
+ Each dict: {'subject': str, 'condition': str,
225
+ 'fractional_occupancy': array (K,), 'mean_fd': float}
226
+ behavioral_df : not used here — condition info is in ssm_metrics_per_run
227
+ K : int
228
+
229
+ Returns
230
+ -------
231
+ lme_results : dict keyed by state index
232
+ """
233
+ rows = []
234
+ for entry in ssm_metrics_per_run:
235
+ for k in range(K):
236
+ rows.append({
237
+ 'subject': entry['subject'],
238
+ 'condition': entry['condition'],
239
+ 'mean_fd': entry['mean_fd'],
240
+ 'frac_occ': entry['fractional_occupancy'][k],
241
+ 'state': k,
242
+ })
243
+ df = pd.DataFrame(rows)
244
+
245
+ lme_results = {}
246
+ for k in range(K):
247
+ df_k = df[df['state'] == k].copy()
248
+ # Random intercept per subject; condition as fixed effect; FD as covariate
249
+ model = smf.mixedlm(
250
+ 'frac_occ ~ C(condition) + mean_fd',
251
+ df_k,
252
+ groups=df_k['subject']
253
+ )
254
+ result = model.fit(reml=True)
255
+ lme_results[k] = result
256
+ print(f"\nState {k}:")
257
+ print(result.summary().tables[1])
258
+
259
+ return lme_results
260
+ ```
261
+
262
+ ---
263
+
264
+ ## 4. Decoding Behavior from State Sequences {#decoding}
265
+
266
+ Instead of correlating summary metrics, you can ask: *does the state sequence at time t
267
+ predict what the subject is doing or experiencing?*
268
+
269
+ ### 4a. Predict trial outcome from state at stimulus onset
270
+
271
+ ```python
272
+ """Decode trial-level behavior (hit/miss, fast/slow RT) from state at stimulus onset."""
273
+ from sklearn.linear_model import LogisticRegression
274
+ from sklearn.model_selection import StratifiedKFold
275
+ from sklearn.metrics import roc_auc_score
276
+ import numpy as np
277
+
278
+
279
+ def decode_trial_outcome(state_seq, events_df, tr, K,
280
+ outcome_col='response', hrf_delay_s=5.0):
281
+ """Predict trial outcome from brain state at (stimulus onset + HRF delay).
282
+
283
+ Parameters
284
+ ----------
285
+ state_seq : array, shape (T,)
286
+ Viterbi state sequence for one run
287
+ events_df : DataFrame
288
+ Columns: onset (s), duration (s), trial_type, <outcome_col>
289
+ tr : float
290
+ K : int
291
+ Number of states (for one-hot encoding)
292
+ outcome_col : str
293
+ Column in events_df with binary outcome (1=hit, 0=miss)
294
+ hrf_delay_s : float
295
+ Shift stimulus onset by this many seconds to account for HRF delay
296
+
297
+ Returns
298
+ -------
299
+ auc : float (cross-validated)
300
+ """
301
+ X, y = [], []
302
+ for _, trial in events_df.iterrows():
303
+ onset_tr = int(np.round((trial['onset'] + hrf_delay_s) / tr))
304
+ onset_tr = np.clip(onset_tr, 0, len(state_seq) - 1)
305
+
306
+ # One-hot encode state
307
+ state_onehot = np.zeros(K)
308
+ state_onehot[state_seq[onset_tr]] = 1.0
309
+ X.append(state_onehot)
310
+ y.append(trial[outcome_col])
311
+
312
+ X, y = np.array(X), np.array(y)
313
+ if len(np.unique(y)) < 2:
314
+ print("Only one class present — cannot decode.")
315
+ return np.nan
316
+
317
+ clf = LogisticRegression(max_iter=500, solver='lbfgs')
318
+ cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
319
+
320
+ aucs = []
321
+ for train, test in cv.split(X, y):
322
+ clf.fit(X[train], y[train])
323
+ prob = clf.predict_proba(X[test])[:, 1]
324
+ aucs.append(roc_auc_score(y[test], prob))
325
+
326
+ auc = np.mean(aucs)
327
+ print(f"Cross-validated AUC: {auc:.3f} (chance = 0.50)")
328
+ return auc
329
+ ```
330
+
331
+ ### 4b. State sequence as predictor in a GLM
332
+
333
+ If you have continuous behavioral ratings (e.g., moment-to-moment arousal from a naturalistic
334
+ paradigm), you can regress the state time-course directly onto the behavioral signal:
335
+
336
+ ```python
337
+ def state_behavioral_regression(state_probs, behavioral_signal, tr):
338
+ """Regress posterior state probabilities onto a continuous behavioral signal.
339
+
340
+ Parameters
341
+ ----------
342
+ state_probs : array, shape (T, K)
343
+ Posterior state probabilities from forward-backward smoother
344
+ behavioral_signal : array, shape (T,)
345
+ Continuous behavioral rating (arousal, attention, engagement) at TR resolution
346
+ tr : float
347
+
348
+ Returns
349
+ -------
350
+ betas : array, shape (K,)
351
+ Regression weight for each state
352
+ r_squared : float
353
+ """
354
+ from sklearn.linear_model import Ridge
355
+ from sklearn.metrics import r2_score
356
+
357
+ # Normalize behavioral signal
358
+ beh = (behavioral_signal - behavioral_signal.mean()) / behavioral_signal.std()
359
+
360
+ # Ridge regression (regularization important when states are correlated)
361
+ reg = Ridge(alpha=1.0)
362
+ reg.fit(state_probs, beh)
363
+ betas = reg.coef_
364
+ r_sq = r2_score(beh, reg.predict(state_probs))
365
+
366
+ print(f"R² = {r_sq:.3f}")
367
+ for k, b in enumerate(betas):
368
+ print(f" State {k}: β = {b:.3f}")
369
+
370
+ return betas, r_sq
371
+ ```
372
+
373
+ ---
374
+
375
+ ## 5. Simulation and Recovery Testing {#simulation}
376
+
377
+ **Always test your pipeline on simulated data before applying it to real fMRI.**
378
+ A simulation test answers: *can my pipeline recover the true states when I know the answer?*
379
+ If recovery fails on clean simulated data, it will fail worse on real data.
380
+
381
+ ```python
382
+ """Simulate BOLD data from a known HMM and verify recovery."""
383
+ import numpy as np
384
+ from hmmlearn import hmm
385
+
386
+
387
+ def simulate_and_recover(K=4, T=600, D=20, tr=2.0, n_runs=4,
388
+ covariance_type='full', n_restarts=30):
389
+ """
390
+ 1. Define a ground-truth HMM with known parameters.
391
+ 2. Simulate BOLD-like data from it.
392
+ 3. Fit an HMM on the simulated data.
393
+ 4. Measure parameter recovery accuracy.
394
+
395
+ Parameters
396
+ ----------
397
+ K : int
398
+ True number of states
399
+ T : int
400
+ TRs per run
401
+ D : int
402
+ Number of brain regions
403
+ tr : float
404
+ Repetition time (used only for dwell-time printout)
405
+
406
+ Returns
407
+ -------
408
+ recovery : dict with 'mean_recovery' (mean spatial correlation between
409
+ true and recovered state means, after Hungarian alignment)
410
+ """
411
+ from sklearn.cluster import KMeans
412
+ from scipy.optimize import linear_sum_assignment
413
+ from scipy.spatial.distance import cdist
414
+
415
+ rng = np.random.RandomState(0)
416
+
417
+ # --- Define ground-truth model ---
418
+ true_means = rng.randn(K, D)
419
+ # Well-separated means
420
+ true_means = true_means / np.linalg.norm(true_means, axis=1, keepdims=True) * 3.0
421
+
422
+ true_covs = np.array([0.5 * np.eye(D) for _ in range(K)]) # spherical for simplicity
423
+
424
+ # Sticky-ish transition matrix
425
+ true_transmat = np.full((K, K), 0.05 / (K - 1))
426
+ np.fill_diagonal(true_transmat, 0.95)
427
+
428
+ true_startprob = np.ones(K) / K
429
+
430
+ # --- Simulate data ---
431
+ gen_model = hmm.GaussianHMM(n_components=K, covariance_type='full')
432
+ gen_model.startprob_ = true_startprob
433
+ gen_model.transmat_ = true_transmat
434
+ gen_model.means_ = true_means
435
+ gen_model.covars_ = true_covs
436
+
437
+ all_data, all_lengths, true_states_all = [], [], []
438
+ for _ in range(n_runs):
439
+ obs, states = gen_model.sample(T)
440
+ all_data.append(obs)
441
+ all_lengths.append(T)
442
+ true_states_all.append(states)
443
+
444
+ data_concat = np.vstack(all_data)
445
+
446
+ # --- Fit recovered model ---
447
+ best_model = None
448
+ best_score = -np.inf
449
+ for restart in range(n_restarts):
450
+ model = hmm.GaussianHMM(
451
+ n_components=K, covariance_type=covariance_type,
452
+ n_iter=200, random_state=restart,
453
+ )
454
+ if restart == 0:
455
+ model.means_init = KMeans(n_clusters=K, n_init=10,
456
+ random_state=0).fit(data_concat).cluster_centers_
457
+ try:
458
+ model.fit(data_concat, lengths=all_lengths)
459
+ s = model.score(data_concat, lengths=all_lengths)
460
+ if s > best_score:
461
+ best_score = s
462
+ best_model = model
463
+ except Exception:
464
+ continue
465
+
466
+ # --- Measure recovery (Hungarian-matched spatial correlation) ---
467
+ cost = cdist(true_means, best_model.means_, metric='correlation')
468
+ row_ind, col_ind = linear_sum_assignment(cost)
469
+ matched_corr = 1 - cost[row_ind, col_ind]
470
+
471
+ print(f"State mean recovery (spatial correlation per state):")
472
+ for i, (true_k, rec_k) in enumerate(zip(row_ind, col_ind)):
473
+ print(f" True state {true_k} → Recovered state {rec_k}: r = {matched_corr[i]:.3f}")
474
+ print(f"Mean recovery: {matched_corr.mean():.3f} (>0.9 = good, >0.8 = acceptable)")
475
+
476
+ # --- Check state duration recovery ---
477
+ recovered_dwell = 1.0 / (1.0 - np.diag(best_model.transmat_[col_ind][:, col_ind]))
478
+ true_dwell = 1.0 / (1.0 - np.diag(true_transmat))
479
+ print(f"\nDwell time recovery (TRs):")
480
+ for k in range(K):
481
+ print(f" State {k}: true={true_dwell[k]:.1f}, recovered={recovered_dwell[k]:.1f}")
482
+
483
+ return {
484
+ 'mean_recovery': matched_corr.mean(),
485
+ 'per_state_recovery': matched_corr,
486
+ 'alignment': dict(zip(row_ind.tolist(), col_ind.tolist())),
487
+ }
488
+ ```
489
+
490
+ **Minimum recovery thresholds before trusting real-data results:**
491
+ - Mean spatial correlation > 0.85 → state patterns are recoverable
492
+ - Transition matrix within 10% of true values → dynamics are recoverable
493
+ - If recovery is poor at your intended K, reduce K or increase data quantity
494
+
495
+ ---
496
+
497
+ ## 6. Reporting Checklist (Methods + Results) {#reporting}
498
+
499
+ ### Methods section — minimum required information
500
+
501
+ ```
502
+ □ Model family and software
503
+ e.g., "We fitted a K-state Gaussian HMM with full covariance to parcellated
504
+ BOLD timeseries using hmmlearn v0.3.0 / dynamax v0.1.4 / ssm v0.0.1."
505
+
506
+ □ K selection procedure
507
+ e.g., "K was selected by BIC across K=2–15, with 30 random restarts per K.
508
+ Final model used K=8, initialized with K-means."
509
+ OR: "K was selected by leave-one-run-out cross-validated log-likelihood."
510
+
511
+ □ Initialization and restarts
512
+ e.g., "50 random restarts with K-means initialization of state means;
513
+ the model with the highest log-likelihood was retained."
514
+
515
+ □ Run boundary handling
516
+ e.g., "Runs were concatenated and the lengths parameter was passed to
517
+ the HMM to reset the forward algorithm at run boundaries."
518
+
519
+ □ HRF strategy
520
+ e.g., "SSM was fitted directly to preprocessed BOLD (approach 1 — BOLD-direct).
521
+ State timing should be interpreted at the BOLD timescale (delayed ~5s from neural events)."
522
+ OR: "Task regressors were convolved with the canonical SPM HRF before use as IO-HMM inputs."
523
+
524
+ □ Confound regression
525
+ e.g., "24-parameter motion model (6 motion params + derivatives + quadratics)
526
+ plus top 5 aCompCor components were regressed from BOLD prior to SSM fitting."
527
+
528
+ □ Motion scrubbing
529
+ e.g., "TRs with framewise displacement > 0.5mm and their two following TRs
530
+ were censored; runs with >25% censored TRs were excluded."
531
+
532
+ □ Parcellation / dimensionality
533
+ e.g., "Schaefer-200 cortical parcellation + Tian-16 subcortical (216 ROIs total)."
534
+
535
+ □ State alignment (if multi-subject)
536
+ e.g., "State labels were aligned across subjects using the Hungarian algorithm
537
+ on state mean activation patterns (Euclidean correlation distance)."
538
+
539
+ □ Statistical testing approach
540
+ e.g., "Group differences in fractional occupancy were tested with Mann-Whitney U,
541
+ FDR-corrected across K states (Benjamini-Hochberg)."
542
+ ```
543
+
544
+ ### Results section — minimum required reporting
545
+
546
+ ```
547
+ □ K selection: report BIC/CV curve across K values (or cite stability analysis)
548
+ □ State stability: report mean matched spatial correlation across random splits
549
+ (ideally > 0.85; report value explicitly)
550
+ □ Motion check: report mean FD per group; confirm no state correlates with FD > 0.3
551
+ □ Per-state metrics: fractional occupancy (mean ± SD across subjects), mean dwell time
552
+ □ Transition matrix: report or visualize full K×K matrix
553
+ □ State spatial patterns: brain maps for each state (mean activation or FC pattern)
554
+ □ Effect size: always report Cohen's d or Spearman r alongside p-values
555
+ □ Multiple comparison correction: report method and corrected p-values
556
+ ```
557
+
558
+ ---
559
+
560
+ ## 7. Required Figures {#figures}
561
+
562
+ A complete SSM paper typically includes these figures. Code uses utilities from
563
+ `code_templates.md §9` for visualization.
564
+
565
+ ```python
566
+ """Figure generation checklist — call these after fitting and decoding."""
567
+ import numpy as np
568
+ import matplotlib.pyplot as plt
569
+
570
+
571
+ def make_all_ssm_figures(model, state_seq, state_probs, dwell_times,
572
+ means, tr, run_boundaries, roi_labels=None,
573
+ group_metrics=None, behavior=None):
574
+ """Generate the standard figure set for an SSM paper.
575
+
576
+ Figures produced:
577
+ 1. State time-course (example subject)
578
+ 2. Transition matrix heatmap
579
+ 3. State spatial maps (top ROIs per state)
580
+ 4. Dwell time distributions
581
+ 5. Fractional occupancy (group-level bar plot with error bars)
582
+ 6. Behavioral correlation scatter (if behavior provided)
583
+ """
584
+ K = model.n_components
585
+
586
+ # --- Figure 1: State time-course ---
587
+ fig1, ax = plt.subplots(figsize=(14, 2))
588
+ T = len(state_seq)
589
+ times = np.arange(T) * tr
590
+ cmap = plt.cm.Set2
591
+ colors = [cmap(k / K) for k in range(K)]
592
+ for t in range(T - 1):
593
+ ax.axvspan(times[t], times[t + 1], color=colors[state_seq[t]], alpha=0.8)
594
+ for b in run_boundaries[1:]:
595
+ ax.axvline(b * tr, color='black', linewidth=1.5, linestyle='--', alpha=0.6)
596
+ ax.set_xlabel('Time (s)')
597
+ ax.set_title('State time-course (example subject)')
598
+ ax.set_yticks([])
599
+ fig1.tight_layout()
600
+
601
+ # --- Figure 2: Transition matrix ---
602
+ fig2, ax2 = plt.subplots(figsize=(5, 4))
603
+ im = ax2.imshow(model.transmat_, cmap='Blues', vmin=0, vmax=1)
604
+ for i in range(K):
605
+ for j in range(K):
606
+ ax2.text(j, i, f'{model.transmat_[i, j]:.2f}', ha='center', va='center',
607
+ color='white' if model.transmat_[i, j] > 0.5 else 'black', fontsize=9)
608
+ ax2.set_xticks(range(K))
609
+ ax2.set_yticks(range(K))
610
+ ax2.set_xticklabels([f'S{k+1}' for k in range(K)])
611
+ ax2.set_yticklabels([f'S{k+1}' for k in range(K)])
612
+ ax2.set_xlabel('To state')
613
+ ax2.set_ylabel('From state')
614
+ plt.colorbar(im, ax=ax2)
615
+ fig2.tight_layout()
616
+
617
+ # --- Figure 3: Dwell time distributions ---
618
+ fig3, ax3 = plt.subplots(figsize=(8, 4))
619
+ for k in range(K):
620
+ dwells_sec = np.array(dwell_times[k]) * tr
621
+ if len(dwells_sec) > 0:
622
+ ax3.hist(dwells_sec, bins=20, alpha=0.5,
623
+ label=f'S{k+1} (μ={dwells_sec.mean():.1f}s)', density=True)
624
+ ax3.set_xlabel('Dwell time (s)')
625
+ ax3.set_ylabel('Density')
626
+ ax3.legend(fontsize=8)
627
+ fig3.tight_layout()
628
+
629
+ # --- Figure 4: Group fractional occupancy ---
630
+ if group_metrics is not None:
631
+ sub_ids = list(group_metrics.keys())
632
+ fo_matrix = np.array([group_metrics[s]['fractional_occupancy'] for s in sub_ids])
633
+ fig4, ax4 = plt.subplots(figsize=(6, 4))
634
+ means_fo = fo_matrix.mean(axis=0)
635
+ sems_fo = fo_matrix.std(axis=0) / np.sqrt(len(sub_ids))
636
+ ax4.bar(range(K), means_fo, yerr=sems_fo, color=[colors[k] for k in range(K)],
637
+ capsize=4)
638
+ ax4.set_xticks(range(K))
639
+ ax4.set_xticklabels([f'S{k+1}' for k in range(K)])
640
+ ax4.set_ylabel('Fractional occupancy')
641
+ ax4.set_title(f'Group mean FO (n={len(sub_ids)})')
642
+ fig4.tight_layout()
643
+
644
+ # --- Figure 5: Behavioral correlation scatter (optional) ---
645
+ if behavior is not None:
646
+ sub_ids_beh = [s for s in group_metrics if s in behavior]
647
+ fo = np.array([group_metrics[s]['fractional_occupancy'] for s in sub_ids_beh])
648
+ beh = np.array([behavior[s] for s in sub_ids_beh])
649
+
650
+ fig5, axes5 = plt.subplots(1, K, figsize=(4 * K, 4))
651
+ for k in range(K):
652
+ ax = axes5[k] if K > 1 else axes5
653
+ ax.scatter(fo[:, k], beh, alpha=0.6, color=colors[k])
654
+ # Trend line
655
+ m, b = np.polyfit(fo[:, k], beh, 1)
656
+ xline = np.linspace(fo[:, k].min(), fo[:, k].max(), 50)
657
+ ax.plot(xline, m * xline + b, 'k--', linewidth=1)
658
+ from scipy.stats import spearmanr
659
+ r, p = spearmanr(fo[:, k], beh)
660
+ ax.set_title(f'S{k+1}: r={r:.2f}, p={p:.3f}')
661
+ ax.set_xlabel(f'FO state {k+1}')
662
+ ax.set_ylabel('Behavior')
663
+ fig5.tight_layout()
664
+
665
+ return {'timecourse': fig1, 'transmat': fig2, 'dwell': fig3}
666
+ ```
667
+
668
+ **Figure checklist for a complete paper:**
669
+
670
+ | Figure | Required | Notes |
671
+ |--------|----------|-------|
672
+ | State time-course (example subject) | Yes | Show run boundaries |
673
+ | State spatial maps (brain maps) | Yes | Use nilearn `plot_stat_map` or surface plots |
674
+ | Transition matrix | Yes | K×K heatmap with values |
675
+ | Dwell time distributions | Recommended | Histogram per state |
676
+ | Group fractional occupancy | Yes (if group study) | Bar plot, mean ± SEM |
677
+ | BIC / CV curve for K selection | Yes | Show all K values tested |
678
+ | Behavioral correlation scatter | Yes (if behavior) | One panel per state |
679
+ | Motion check (FD vs. state) | Recommended | Show r values |
680
+ | State stability plot | Recommended | Across-split reproducibility |