@yibeichan/claude-skills 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +98 -0
- package/cli.js +272 -0
- package/install.py +240 -0
- package/package.json +44 -0
- package/skills/bidsapp-nidm-standards/SKILL.md +202 -0
- package/skills/bidsapp-nidm-standards/references/babs_config.md +20 -0
- package/skills/bidsapp-nidm-standards/references/cli_arguments.md +76 -0
- package/skills/bidsapp-nidm-standards/references/container_patterns.md +53 -0
- package/skills/bidsapp-nidm-standards/references/nidm_integration.md +403 -0
- package/skills/bidsapp-nidm-standards/references/repo_structure.md +121 -0
- package/skills/bidsapp-nidm-standards/references/testing_patterns.md +82 -0
- package/skills/dicom2fmriprep/SKILL.md +377 -0
- package/skills/dicom2fmriprep/evals/evals.json +26 -0
- package/skills/dicom2fmriprep/references/babs-details.md +407 -0
- package/skills/dicom2fmriprep/references/fmriprep-details.md +250 -0
- package/skills/dicom2fmriprep/references/heudiconv-details.md +243 -0
- package/skills/fmri-ssm/SKILL.md +317 -0
- package/skills/fmri-ssm/references/code_templates.md +1570 -0
- package/skills/fmri-ssm/references/downstream_analysis.md +680 -0
- package/skills/fmri-ssm/references/group_inference.md +608 -0
- package/skills/fmri-ssm/references/hrf_modeling.md +447 -0
- package/skills/fmri-ssm/references/model_catalog.md +436 -0
- package/skills/fmri-ssm/references/paradigm_guide.md +406 -0
- package/skills/fmri-ssm/references/preprocessing.md +614 -0
- package/skills/fmri-ssm.zip +0 -0
- package/skills/neuroimaging-qc/SKILL.md +203 -0
- package/skills/neuroimaging-qc/references/eeg_qc.md +400 -0
- package/skills/neuroimaging-qc/references/fmri_qc.md +343 -0
- package/skills/neuroimaging-qc/references/fnirs_qc.md +430 -0
- package/skills/neuroimaging-qc/references/structural_qc.md +454 -0
- package/skills/neuroimaging-qc/scripts/parse_fmriprep_confounds.py +153 -0
- package/skills/neuroimaging-qc/scripts/parse_mriqc.py +114 -0
- package/skills/neuroimaging-qc/scripts/qc_report.py +295 -0
- package/skills/scientific-writer/SKILL.md +202 -0
- package/skills/scientific-writer/references/citation_styles.md +163 -0
- package/skills/scientific-writer/references/field_conventions.md +245 -0
- package/skills/scientific-writer/references/figures_tables.md +225 -0
- package/skills/scientific-writer/references/reporting_guidelines.md +225 -0
- package/skills.json +54 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Parse MRIQC group outputs and flag subjects for exclusion.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python parse_mriqc.py group_bold.tsv --fd-thresh 0.3 --fd-perc-thresh 30 -o qc.csv
|
|
7
|
+
python parse_mriqc.py group_T1w.tsv --type anat -o qc.csv
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import numpy as np
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load_mriqc_group(tsv_path: str) -> pd.DataFrame:
|
|
17
|
+
"""Load MRIQC group TSV file."""
|
|
18
|
+
return pd.read_csv(tsv_path, sep='\t')
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def flag_bold_subjects(df, fd_mean_thresh=0.3, fd_perc_thresh=30, tsnr_zscore=-2):
|
|
22
|
+
"""Flag BOLD subjects for exclusion."""
|
|
23
|
+
results = df[['bids_name']].copy()
|
|
24
|
+
|
|
25
|
+
results['fd_mean'] = df['fd_mean']
|
|
26
|
+
results['flag_fd_mean'] = df['fd_mean'] > fd_mean_thresh
|
|
27
|
+
|
|
28
|
+
results['fd_perc'] = df['fd_perc']
|
|
29
|
+
results['flag_fd_perc'] = df['fd_perc'] > fd_perc_thresh
|
|
30
|
+
|
|
31
|
+
results['tsnr'] = df['tsnr']
|
|
32
|
+
tsnr_z = (df['tsnr'] - df['tsnr'].mean()) / df['tsnr'].std()
|
|
33
|
+
results['flag_low_tsnr'] = tsnr_z < tsnr_zscore
|
|
34
|
+
|
|
35
|
+
flag_cols = [c for c in results.columns if c.startswith('flag_')]
|
|
36
|
+
results['exclude'] = results[flag_cols].any(axis=1)
|
|
37
|
+
|
|
38
|
+
return results
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def flag_anat_subjects(df, zscore_thresh=3):
|
|
42
|
+
"""Flag anatomical subjects for exclusion using z-score outlier detection."""
|
|
43
|
+
results = df[['bids_name']].copy()
|
|
44
|
+
|
|
45
|
+
higher_worse = ['qi_1', 'cjv', 'efc']
|
|
46
|
+
lower_worse = ['cnr', 'snr_gm', 'snr_wm']
|
|
47
|
+
|
|
48
|
+
for metric in higher_worse:
|
|
49
|
+
if metric in df.columns:
|
|
50
|
+
results[metric] = df[metric]
|
|
51
|
+
z = (df[metric] - df[metric].mean()) / df[metric].std()
|
|
52
|
+
results[f'flag_{metric}'] = z > zscore_thresh
|
|
53
|
+
|
|
54
|
+
for metric in lower_worse:
|
|
55
|
+
if metric in df.columns:
|
|
56
|
+
results[metric] = df[metric]
|
|
57
|
+
z = (df[metric] - df[metric].mean()) / df[metric].std()
|
|
58
|
+
results[f'flag_{metric}'] = z < -zscore_thresh
|
|
59
|
+
|
|
60
|
+
flag_cols = [c for c in results.columns if c.startswith('flag_')]
|
|
61
|
+
results['exclude'] = results[flag_cols].any(axis=1)
|
|
62
|
+
|
|
63
|
+
return results
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def generate_summary(results, modality):
|
|
67
|
+
"""Generate text summary of QC results."""
|
|
68
|
+
n_total = len(results)
|
|
69
|
+
n_exclude = results['exclude'].sum()
|
|
70
|
+
|
|
71
|
+
summary = f"""
|
|
72
|
+
MRIQC QC Summary ({modality.upper()})
|
|
73
|
+
{'=' * 40}
|
|
74
|
+
Total subjects: {n_total}
|
|
75
|
+
Excluded: {n_exclude} ({100*n_exclude/n_total:.1f}%)
|
|
76
|
+
Included: {n_total - n_exclude}
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
flag_cols = [c for c in results.columns if c.startswith('flag_')]
|
|
80
|
+
summary += "\nExclusion breakdown:\n"
|
|
81
|
+
for col in flag_cols:
|
|
82
|
+
n = results[col].sum()
|
|
83
|
+
summary += f" {col}: {n}\n"
|
|
84
|
+
|
|
85
|
+
return summary
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def main():
|
|
89
|
+
parser = argparse.ArgumentParser(description='Parse MRIQC outputs for QC')
|
|
90
|
+
parser.add_argument('input', help='MRIQC group TSV file')
|
|
91
|
+
parser.add_argument('--type', choices=['bold', 'anat'], default='bold')
|
|
92
|
+
parser.add_argument('--fd-thresh', type=float, default=0.3)
|
|
93
|
+
parser.add_argument('--fd-perc-thresh', type=float, default=30)
|
|
94
|
+
parser.add_argument('--zscore-thresh', type=float, default=3)
|
|
95
|
+
parser.add_argument('-o', '--output', help='Output CSV path')
|
|
96
|
+
|
|
97
|
+
args = parser.parse_args()
|
|
98
|
+
|
|
99
|
+
df = load_mriqc_group(args.input)
|
|
100
|
+
|
|
101
|
+
if args.type == 'bold':
|
|
102
|
+
results = flag_bold_subjects(df, args.fd_thresh, args.fd_perc_thresh)
|
|
103
|
+
else:
|
|
104
|
+
results = flag_anat_subjects(df, args.zscore_thresh)
|
|
105
|
+
|
|
106
|
+
print(generate_summary(results, args.type))
|
|
107
|
+
|
|
108
|
+
if args.output:
|
|
109
|
+
results.to_csv(args.output, index=False)
|
|
110
|
+
print(f"\nResults saved to {args.output}")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
if __name__ == '__main__':
|
|
114
|
+
main()
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Generate QC reports with visualizations for neuroimaging data.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python qc_report.py mriqc_bold.tsv --type bold -o qc_report
|
|
7
|
+
python qc_report.py qc_summary.csv --type custom --metrics fd_mean,tsnr -o report
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import numpy as np
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
# Try to import matplotlib, but don't fail if not available
|
|
16
|
+
try:
|
|
17
|
+
import matplotlib.pyplot as plt
|
|
18
|
+
HAS_MPL = True
|
|
19
|
+
except ImportError:
|
|
20
|
+
HAS_MPL = False
|
|
21
|
+
print("Warning: matplotlib not available, skipping plots")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def plot_qc_distributions(df: pd.DataFrame, metrics: list, thresholds: dict = None,
|
|
25
|
+
output_path: Path = None):
|
|
26
|
+
"""
|
|
27
|
+
Plot QC metric distributions with optional threshold lines.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
df : pd.DataFrame
|
|
32
|
+
QC data
|
|
33
|
+
metrics : list
|
|
34
|
+
Metrics to plot
|
|
35
|
+
thresholds : dict
|
|
36
|
+
Optional {metric: (value, direction)} where direction is 'above' or 'below'
|
|
37
|
+
output_path : Path
|
|
38
|
+
Optional output path for saving figure
|
|
39
|
+
"""
|
|
40
|
+
if not HAS_MPL:
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
n_metrics = len(metrics)
|
|
44
|
+
n_cols = min(2, n_metrics)
|
|
45
|
+
n_rows = (n_metrics + n_cols - 1) // n_cols
|
|
46
|
+
|
|
47
|
+
fig, axes = plt.subplots(n_rows, n_cols, figsize=(6*n_cols, 4*n_rows))
|
|
48
|
+
if n_metrics == 1:
|
|
49
|
+
axes = [axes]
|
|
50
|
+
else:
|
|
51
|
+
axes = axes.flatten()
|
|
52
|
+
|
|
53
|
+
for i, metric in enumerate(metrics):
|
|
54
|
+
if metric not in df.columns:
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
ax = axes[i]
|
|
58
|
+
data = df[metric].dropna()
|
|
59
|
+
|
|
60
|
+
ax.hist(data, bins=30, edgecolor='black', alpha=0.7)
|
|
61
|
+
ax.set_xlabel(metric)
|
|
62
|
+
ax.set_ylabel('Count')
|
|
63
|
+
ax.set_title(f'{metric} Distribution (n={len(data)})')
|
|
64
|
+
|
|
65
|
+
# Add threshold line if provided
|
|
66
|
+
if thresholds and metric in thresholds:
|
|
67
|
+
thresh_val, direction = thresholds[metric]
|
|
68
|
+
color = 'red'
|
|
69
|
+
ax.axvline(thresh_val, color=color, linestyle='--', linewidth=2,
|
|
70
|
+
label=f'Threshold: {thresh_val}')
|
|
71
|
+
ax.legend()
|
|
72
|
+
|
|
73
|
+
# Add mean line
|
|
74
|
+
ax.axvline(data.mean(), color='blue', linestyle='-', alpha=0.5,
|
|
75
|
+
label=f'Mean: {data.mean():.3f}')
|
|
76
|
+
|
|
77
|
+
# Hide unused axes
|
|
78
|
+
for j in range(i+1, len(axes)):
|
|
79
|
+
axes[j].set_visible(False)
|
|
80
|
+
|
|
81
|
+
plt.tight_layout()
|
|
82
|
+
|
|
83
|
+
if output_path:
|
|
84
|
+
plt.savefig(output_path, dpi=150, bbox_inches='tight')
|
|
85
|
+
print(f"Saved plot to {output_path}")
|
|
86
|
+
|
|
87
|
+
return fig
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def plot_qc_scatter(df: pd.DataFrame, x_metric: str, y_metric: str,
|
|
91
|
+
color_by: str = None, output_path: Path = None):
|
|
92
|
+
"""Plot scatter of two QC metrics."""
|
|
93
|
+
if not HAS_MPL:
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
fig, ax = plt.subplots(figsize=(8, 6))
|
|
97
|
+
|
|
98
|
+
if color_by and color_by in df.columns:
|
|
99
|
+
scatter = ax.scatter(df[x_metric], df[y_metric], c=df[color_by],
|
|
100
|
+
cmap='coolwarm', alpha=0.6)
|
|
101
|
+
plt.colorbar(scatter, label=color_by)
|
|
102
|
+
else:
|
|
103
|
+
ax.scatter(df[x_metric], df[y_metric], alpha=0.6)
|
|
104
|
+
|
|
105
|
+
ax.set_xlabel(x_metric)
|
|
106
|
+
ax.set_ylabel(y_metric)
|
|
107
|
+
ax.set_title(f'{x_metric} vs {y_metric}')
|
|
108
|
+
|
|
109
|
+
# Add correlation
|
|
110
|
+
valid = df[[x_metric, y_metric]].dropna()
|
|
111
|
+
if len(valid) > 2:
|
|
112
|
+
r = valid[x_metric].corr(valid[y_metric])
|
|
113
|
+
ax.text(0.05, 0.95, f'r = {r:.3f}', transform=ax.transAxes,
|
|
114
|
+
fontsize=12, verticalalignment='top')
|
|
115
|
+
|
|
116
|
+
plt.tight_layout()
|
|
117
|
+
|
|
118
|
+
if output_path:
|
|
119
|
+
plt.savefig(output_path, dpi=150, bbox_inches='tight')
|
|
120
|
+
|
|
121
|
+
return fig
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def generate_text_report(df: pd.DataFrame, metrics: list, exclusion_col: str = 'exclude') -> str:
|
|
125
|
+
"""Generate text summary report."""
|
|
126
|
+
report = []
|
|
127
|
+
report.append("=" * 60)
|
|
128
|
+
report.append("NEUROIMAGING QC REPORT")
|
|
129
|
+
report.append("=" * 60)
|
|
130
|
+
report.append("")
|
|
131
|
+
|
|
132
|
+
# Sample summary
|
|
133
|
+
n_total = len(df)
|
|
134
|
+
report.append(f"Total subjects/runs: {n_total}")
|
|
135
|
+
|
|
136
|
+
if exclusion_col in df.columns:
|
|
137
|
+
n_exclude = df[exclusion_col].sum()
|
|
138
|
+
report.append(f"Excluded: {n_exclude} ({100*n_exclude/n_total:.1f}%)")
|
|
139
|
+
report.append(f"Included: {n_total - n_exclude}")
|
|
140
|
+
|
|
141
|
+
report.append("")
|
|
142
|
+
report.append("-" * 40)
|
|
143
|
+
report.append("METRIC STATISTICS")
|
|
144
|
+
report.append("-" * 40)
|
|
145
|
+
|
|
146
|
+
for metric in metrics:
|
|
147
|
+
if metric not in df.columns:
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
data = df[metric].dropna()
|
|
151
|
+
report.append(f"\n{metric}:")
|
|
152
|
+
report.append(f" N: {len(data)}")
|
|
153
|
+
report.append(f" Mean: {data.mean():.4f}")
|
|
154
|
+
report.append(f" Std: {data.std():.4f}")
|
|
155
|
+
report.append(f" Median: {data.median():.4f}")
|
|
156
|
+
report.append(f" Min: {data.min():.4f}")
|
|
157
|
+
report.append(f" Max: {data.max():.4f}")
|
|
158
|
+
report.append(f" IQR: [{data.quantile(0.25):.4f}, {data.quantile(0.75):.4f}]")
|
|
159
|
+
|
|
160
|
+
return "\n".join(report)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def generate_html_report(df: pd.DataFrame, metrics: list, title: str = "QC Report",
|
|
164
|
+
output_path: Path = None) -> str:
|
|
165
|
+
"""Generate HTML report with embedded statistics."""
|
|
166
|
+
html = f"""
|
|
167
|
+
<!DOCTYPE html>
|
|
168
|
+
<html>
|
|
169
|
+
<head>
|
|
170
|
+
<title>{title}</title>
|
|
171
|
+
<style>
|
|
172
|
+
body {{ font-family: Arial, sans-serif; margin: 40px; }}
|
|
173
|
+
h1 {{ color: #333; }}
|
|
174
|
+
table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }}
|
|
175
|
+
th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
|
|
176
|
+
th {{ background-color: #4CAF50; color: white; }}
|
|
177
|
+
tr:nth-child(even) {{ background-color: #f2f2f2; }}
|
|
178
|
+
.excluded {{ background-color: #ffcccc; }}
|
|
179
|
+
.metric-summary {{ background-color: #f9f9f9; padding: 15px; margin: 10px 0; }}
|
|
180
|
+
</style>
|
|
181
|
+
</head>
|
|
182
|
+
<body>
|
|
183
|
+
<h1>{title}</h1>
|
|
184
|
+
<p>Generated from {len(df)} subjects/runs</p>
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
# Summary statistics
|
|
188
|
+
html += "<h2>Metric Summary</h2>"
|
|
189
|
+
html += "<table><tr><th>Metric</th><th>Mean</th><th>Std</th><th>Min</th><th>Max</th></tr>"
|
|
190
|
+
|
|
191
|
+
for metric in metrics:
|
|
192
|
+
if metric not in df.columns:
|
|
193
|
+
continue
|
|
194
|
+
data = df[metric].dropna()
|
|
195
|
+
html += f"""
|
|
196
|
+
<tr>
|
|
197
|
+
<td>{metric}</td>
|
|
198
|
+
<td>{data.mean():.4f}</td>
|
|
199
|
+
<td>{data.std():.4f}</td>
|
|
200
|
+
<td>{data.min():.4f}</td>
|
|
201
|
+
<td>{data.max():.4f}</td>
|
|
202
|
+
</tr>
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
html += "</table>"
|
|
206
|
+
|
|
207
|
+
# Subject table
|
|
208
|
+
html += "<h2>Subject Details</h2>"
|
|
209
|
+
html += "<table><tr>"
|
|
210
|
+
|
|
211
|
+
display_cols = ['bids_name'] + [m for m in metrics if m in df.columns]
|
|
212
|
+
if 'exclude' in df.columns:
|
|
213
|
+
display_cols.append('exclude')
|
|
214
|
+
|
|
215
|
+
for col in display_cols:
|
|
216
|
+
html += f"<th>{col}</th>"
|
|
217
|
+
html += "</tr>"
|
|
218
|
+
|
|
219
|
+
for _, row in df.iterrows():
|
|
220
|
+
row_class = 'excluded' if row.get('exclude', False) else ''
|
|
221
|
+
html += f"<tr class='{row_class}'>"
|
|
222
|
+
for col in display_cols:
|
|
223
|
+
val = row.get(col, '')
|
|
224
|
+
if isinstance(val, float):
|
|
225
|
+
val = f"{val:.4f}"
|
|
226
|
+
html += f"<td>{val}</td>"
|
|
227
|
+
html += "</tr>"
|
|
228
|
+
|
|
229
|
+
html += "</table></body></html>"
|
|
230
|
+
|
|
231
|
+
if output_path:
|
|
232
|
+
with open(output_path, 'w') as f:
|
|
233
|
+
f.write(html)
|
|
234
|
+
print(f"Saved HTML report to {output_path}")
|
|
235
|
+
|
|
236
|
+
return html
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def main():
|
|
240
|
+
parser = argparse.ArgumentParser(description='Generate QC reports')
|
|
241
|
+
parser.add_argument('input', help='Input CSV/TSV file')
|
|
242
|
+
parser.add_argument('--type', choices=['bold', 'anat', 'custom'], default='custom')
|
|
243
|
+
parser.add_argument('--metrics', help='Comma-separated metrics to analyze')
|
|
244
|
+
parser.add_argument('-o', '--output', help='Output prefix for reports')
|
|
245
|
+
parser.add_argument('--html', action='store_true', help='Generate HTML report')
|
|
246
|
+
parser.add_argument('--no-plots', action='store_true', help='Skip plot generation')
|
|
247
|
+
|
|
248
|
+
args = parser.parse_args()
|
|
249
|
+
|
|
250
|
+
# Load data
|
|
251
|
+
sep = '\t' if args.input.endswith('.tsv') else ','
|
|
252
|
+
df = pd.read_csv(args.input, sep=sep)
|
|
253
|
+
|
|
254
|
+
# Determine metrics
|
|
255
|
+
if args.metrics:
|
|
256
|
+
metrics = args.metrics.split(',')
|
|
257
|
+
elif args.type == 'bold':
|
|
258
|
+
metrics = ['fd_mean', 'fd_perc', 'tsnr', 'dvars_std']
|
|
259
|
+
elif args.type == 'anat':
|
|
260
|
+
metrics = ['qi_1', 'cnr', 'cjv', 'snr_gm', 'efc']
|
|
261
|
+
else:
|
|
262
|
+
# Auto-detect numeric columns
|
|
263
|
+
metrics = [c for c in df.columns if df[c].dtype in ['float64', 'int64']][:6]
|
|
264
|
+
|
|
265
|
+
# Filter to existing metrics
|
|
266
|
+
metrics = [m for m in metrics if m in df.columns]
|
|
267
|
+
|
|
268
|
+
# Generate text report
|
|
269
|
+
report = generate_text_report(df, metrics)
|
|
270
|
+
print(report)
|
|
271
|
+
|
|
272
|
+
output_prefix = args.output or 'qc_report'
|
|
273
|
+
|
|
274
|
+
# Save text report
|
|
275
|
+
with open(f"{output_prefix}.txt", 'w') as f:
|
|
276
|
+
f.write(report)
|
|
277
|
+
print(f"\nSaved text report to {output_prefix}.txt")
|
|
278
|
+
|
|
279
|
+
# Generate HTML if requested
|
|
280
|
+
if args.html:
|
|
281
|
+
generate_html_report(df, metrics, output_path=Path(f"{output_prefix}.html"))
|
|
282
|
+
|
|
283
|
+
# Generate plots
|
|
284
|
+
if not args.no_plots and HAS_MPL:
|
|
285
|
+
plot_qc_distributions(df, metrics, output_path=Path(f"{output_prefix}_distributions.png"))
|
|
286
|
+
|
|
287
|
+
# Create fd_mean vs tsnr scatter if both exist
|
|
288
|
+
if 'fd_mean' in df.columns and 'tsnr' in df.columns:
|
|
289
|
+
plot_qc_scatter(df, 'fd_mean', 'tsnr',
|
|
290
|
+
color_by='exclude' if 'exclude' in df.columns else None,
|
|
291
|
+
output_path=Path(f"{output_prefix}_scatter.png"))
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
if __name__ == '__main__':
|
|
295
|
+
main()
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-writing
|
|
3
|
+
description: Write rigorous scientific manuscripts, research papers, grant proposals, and literature reviews. Use when drafting or revising any part of a scientific document including abstracts, introductions, methods, results, and discussions. Applies IMRAD structure, citation styles (APA/AMA/Vancouver/IEEE), reporting guidelines (CONSORT/STROBE/PRISMA), and publication standards. Triggers on requests to write research papers, journal articles, scientific reports, academic manuscripts, grant applications, or improve scientific prose.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Scientific Writing
|
|
7
|
+
|
|
8
|
+
Write publication-ready scientific manuscripts using established academic conventions and evidence-based practices.
|
|
9
|
+
|
|
10
|
+
## Core Principles
|
|
11
|
+
|
|
12
|
+
### Evidence-Based Claims
|
|
13
|
+
|
|
14
|
+
Every substantive claim requires support from peer-reviewed literature. Use appropriate hedging: "suggests" for correlational evidence, "demonstrates" for causal evidence. Never assert without citation or data support.
|
|
15
|
+
|
|
16
|
+
### Clarity Over Flourish
|
|
17
|
+
|
|
18
|
+
Scientific writing prioritizes precision. Use domain-specific terminology correctly, define terms on first use, prefer active voice in methods, and avoid vague qualifiers ("very significant" → "statistically significant, p < 0.001").
|
|
19
|
+
|
|
20
|
+
### Reproducibility Standard
|
|
21
|
+
|
|
22
|
+
Methods must enable replication: include all parameters, software versions, statistical tests, sample sizes, inclusion/exclusion criteria, effect sizes alongside p-values, and relevant reporting guideline compliance.
|
|
23
|
+
|
|
24
|
+
## Two-Stage Writing Process
|
|
25
|
+
|
|
26
|
+
**Stage 1: Outline with Evidence**
|
|
27
|
+
Create structured outlines with bullet points marking main arguments, key citations, data points, and logical flow. This is scaffolding—not the final manuscript.
|
|
28
|
+
|
|
29
|
+
**Stage 2: Convert to Prose**
|
|
30
|
+
Transform outlines into complete paragraphs with proper transitions, integrated citations, and logical sentence flow. Final manuscripts contain NO bullet points except in specific Methods subsections (inclusion criteria, materials lists).
|
|
31
|
+
|
|
32
|
+
## Manuscript Structure (IMRAD)
|
|
33
|
+
|
|
34
|
+
### Abstract
|
|
35
|
+
Write as flowing paragraph(s), NOT labeled sections. Cover: (1) context/problem, (2) methods summary, (3) key findings with statistics, (4) significance. Typically 150-300 words. Only use structured format if journal explicitly requires it.
|
|
36
|
+
|
|
37
|
+
Example structure:
|
|
38
|
+
```
|
|
39
|
+
[Context sentence establishing importance]. [Gap or problem addressed].
|
|
40
|
+
[Study objective]. [Methods: design, N, key measures]. [Primary result with
|
|
41
|
+
statistics]. [Secondary findings]. [Conclusion and implications].
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Introduction
|
|
45
|
+
Follow funnel structure:
|
|
46
|
+
1. Broad context (1-2 paragraphs): field importance
|
|
47
|
+
2. Literature review (2-4 paragraphs): what is known, organized thematically
|
|
48
|
+
3. Gap identification (1 paragraph): what remains unknown
|
|
49
|
+
4. Present study (1 paragraph): objectives, hypotheses, approach
|
|
50
|
+
|
|
51
|
+
### Methods
|
|
52
|
+
Standard subsections:
|
|
53
|
+
- **Participants/Subjects**: demographics, recruitment, ethics approval
|
|
54
|
+
- **Materials/Stimuli**: detailed descriptions, validated instrument citations
|
|
55
|
+
- **Procedure**: chronological protocol with timing
|
|
56
|
+
- **Data Analysis**: statistical approach, software, multiple comparison corrections
|
|
57
|
+
|
|
58
|
+
Include power analysis. Report all conditions. Enable replication.
|
|
59
|
+
|
|
60
|
+
### Results
|
|
61
|
+
Structure: (1) preliminary analyses, (2) primary hypotheses, (3) secondary/exploratory (labeled).
|
|
62
|
+
|
|
63
|
+
Report consistently:
|
|
64
|
+
- Descriptive: M ± SD or Mdn (IQR)
|
|
65
|
+
- Inferential with effect sizes: "t(48) = 2.31, p = .025, d = 0.67"
|
|
66
|
+
- Confidence intervals when possible
|
|
67
|
+
|
|
68
|
+
No interpretation here—save for Discussion.
|
|
69
|
+
|
|
70
|
+
### Discussion
|
|
71
|
+
Structure: (1) summary without statistics, (2) interpretation in literature context, (3) limitations, (4) implications, (5) future directions, (6) conclusion.
|
|
72
|
+
|
|
73
|
+
Address alternative explanations. Avoid overclaiming.
|
|
74
|
+
|
|
75
|
+
## Citation Practices
|
|
76
|
+
|
|
77
|
+
### Source Hierarchy
|
|
78
|
+
1. Systematic reviews/meta-analyses (strongest synthesis)
|
|
79
|
+
2. Randomized controlled trials (causal inference)
|
|
80
|
+
3. Prospective cohort studies (temporal precedence)
|
|
81
|
+
4. Cross-sectional studies (associations)
|
|
82
|
+
5. Case studies/expert opinion (hypothesis-generating)
|
|
83
|
+
|
|
84
|
+
### Best Practices
|
|
85
|
+
- Cite primary sources, not secondary summaries
|
|
86
|
+
- Balance recent work (last 5 years) with foundational papers
|
|
87
|
+
- Verify claims against original sources
|
|
88
|
+
- Flag contradictory evidence rather than omitting it
|
|
89
|
+
- Self-citations <20%
|
|
90
|
+
|
|
91
|
+
For detailed citation formatting by style, see [references/citation_styles.md](references/citation_styles.md).
|
|
92
|
+
|
|
93
|
+
## Reporting Guidelines
|
|
94
|
+
|
|
95
|
+
Match guideline to study type:
|
|
96
|
+
- **CONSORT**: Randomized controlled trials
|
|
97
|
+
- **STROBE**: Observational studies (cohort, case-control, cross-sectional)
|
|
98
|
+
- **PRISMA**: Systematic reviews and meta-analyses
|
|
99
|
+
- **STARD**: Diagnostic accuracy studies
|
|
100
|
+
- **TRIPOD**: Prediction models
|
|
101
|
+
- **ARRIVE**: Animal research
|
|
102
|
+
- **CARE**: Case reports
|
|
103
|
+
|
|
104
|
+
For checklists and details, see [references/reporting_guidelines.md](references/reporting_guidelines.md).
|
|
105
|
+
|
|
106
|
+
## Writing Style
|
|
107
|
+
|
|
108
|
+
### Verb Tense
|
|
109
|
+
| Section | Tense |
|
|
110
|
+
|---------|-------|
|
|
111
|
+
| Introduction (established facts) | Present |
|
|
112
|
+
| Introduction (prior studies) | Past |
|
|
113
|
+
| Methods | Past |
|
|
114
|
+
| Results | Past |
|
|
115
|
+
| Discussion (your findings) | Past |
|
|
116
|
+
| Discussion (interpretations) | Present |
|
|
117
|
+
| Conclusions | Present |
|
|
118
|
+
|
|
119
|
+
### Common Errors
|
|
120
|
+
| Error | Correction |
|
|
121
|
+
|-------|------------|
|
|
122
|
+
| "Data shows..." | "Data show..." (plural) |
|
|
123
|
+
| "Proves that..." | "Suggests/demonstrates that..." |
|
|
124
|
+
| "Significant" (colloquial) | "Substantial" or "statistically significant" |
|
|
125
|
+
| "In order to" | "To" |
|
|
126
|
+
| "Due to the fact that" | "Because" |
|
|
127
|
+
|
|
128
|
+
### Paragraph Structure
|
|
129
|
+
- Open with topic sentence stating main point
|
|
130
|
+
- Provide evidence and elaboration
|
|
131
|
+
- Connect logically to next paragraph
|
|
132
|
+
- Typical length: 4-8 sentences
|
|
133
|
+
|
|
134
|
+
## Field-Specific Conventions
|
|
135
|
+
|
|
136
|
+
### Biomedical/Clinical
|
|
137
|
+
- Use precise anatomical/clinical terminology
|
|
138
|
+
- Follow standardized nomenclature (ICD, DSM, SNOMED-CT)
|
|
139
|
+
- Generic drug names first, brands in parentheses
|
|
140
|
+
- "Patients" for clinical, "participants" for community research
|
|
141
|
+
|
|
142
|
+
### Molecular Biology
|
|
143
|
+
- Gene symbols italicized (*TP53*), proteins regular (p53)
|
|
144
|
+
- Species-specific: human uppercase (*BRCA1*), mouse sentence case (*Brca1*)
|
|
145
|
+
- Full species name first, then abbreviation (*Escherichia coli* → *E. coli*)
|
|
146
|
+
|
|
147
|
+
### Chemistry
|
|
148
|
+
- IUPAC nomenclature for compounds
|
|
149
|
+
- Concentrations with units (mM, μM, nM)
|
|
150
|
+
- Standard notation (SMILES, InChI)
|
|
151
|
+
|
|
152
|
+
For comprehensive field conventions, see [references/field_conventions.md](references/field_conventions.md).
|
|
153
|
+
|
|
154
|
+
## Figures and Tables
|
|
155
|
+
|
|
156
|
+
### When to Use Which
|
|
157
|
+
- **Tables**: Precise numerical data, exact values needed
|
|
158
|
+
- **Figures**: Trends, patterns, relationships, visual comparisons
|
|
159
|
+
|
|
160
|
+
### Requirements
|
|
161
|
+
- Self-explanatory with complete captions
|
|
162
|
+
- Consistent formatting and terminology
|
|
163
|
+
- All axes/columns labeled with units
|
|
164
|
+
- Sample sizes (n) and statistical annotations
|
|
165
|
+
- No duplication between text, tables, and figures
|
|
166
|
+
|
|
167
|
+
For detailed guidance, see [references/figures_tables.md](references/figures_tables.md).
|
|
168
|
+
|
|
169
|
+
## Workflow Summary
|
|
170
|
+
|
|
171
|
+
**Stage 1: Planning**
|
|
172
|
+
1. Identify target journal, review author guidelines
|
|
173
|
+
2. Determine applicable reporting guideline
|
|
174
|
+
3. Outline manuscript structure
|
|
175
|
+
4. Plan figures/tables as data story backbone
|
|
176
|
+
|
|
177
|
+
**Stage 2: Drafting**
|
|
178
|
+
1. Start with figures/tables
|
|
179
|
+
2. Draft Methods first (easiest)
|
|
180
|
+
3. Write Results (describe figures/tables)
|
|
181
|
+
4. Compose Discussion (interpret findings)
|
|
182
|
+
5. Write Introduction (set up research question)
|
|
183
|
+
6. Craft Abstract last (summarize complete work)
|
|
184
|
+
|
|
185
|
+
**Stage 3: Revision**
|
|
186
|
+
1. Check logical flow throughout
|
|
187
|
+
2. Verify terminology consistency
|
|
188
|
+
3. Confirm reporting guideline adherence
|
|
189
|
+
4. Verify all citations accurate
|
|
190
|
+
5. Proofread for clarity
|
|
191
|
+
|
|
192
|
+
## Pre-Submission Checklist
|
|
193
|
+
|
|
194
|
+
- [ ] Abstract accurately reflects content (written as prose, not labeled sections)
|
|
195
|
+
- [ ] All claims have citations or data support
|
|
196
|
+
- [ ] Methods enable replication
|
|
197
|
+
- [ ] Statistics include effect sizes and CIs
|
|
198
|
+
- [ ] Figures have informative captions
|
|
199
|
+
- [ ] References follow target journal style
|
|
200
|
+
- [ ] Reporting guideline checklist completed
|
|
201
|
+
- [ ] Word count within limits
|
|
202
|
+
- [ ] Author contributions and conflicts declared
|