qpycr 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qpycr/__init__.py +31 -0
- qpycr/analyzer.py +508 -0
- qpycr/cli.py +101 -0
- qpycr-1.2.0.dist-info/METADATA +116 -0
- qpycr-1.2.0.dist-info/RECORD +9 -0
- qpycr-1.2.0.dist-info/WHEEL +5 -0
- qpycr-1.2.0.dist-info/entry_points.txt +2 -0
- qpycr-1.2.0.dist-info/licenses/LICENSE +22 -0
- qpycr-1.2.0.dist-info/top_level.txt +1 -0
qpycr/__init__.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""
|
|
2
|
+
qPyCR: qPCR Analysis with Recursive PCR Model
|
|
3
|
+
|
|
4
|
+
A Python package for quantitative PCR (qPCR) analysis using a recursive
|
|
5
|
+
PCR model that provides robust quantification accuracy and evaluation
|
|
6
|
+
of PCR quality and template interference.
|
|
7
|
+
|
|
8
|
+
Based on the recursive PCR model described in:
|
|
9
|
+
Carr AC, Moore SD (2012) Robust quantification of polymerase chain
|
|
10
|
+
reactions using global fitting. PLoS ONE 7(5): e37640.
|
|
11
|
+
https://doi.org/10.1371/journal.pone.0037640
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
Command line:
|
|
15
|
+
qpycr data.csv # Basic analysis
|
|
16
|
+
qpycr data.csv -e # With evaluation outputs
|
|
17
|
+
qpycr data.csv -d # With debug outputs
|
|
18
|
+
|
|
19
|
+
Python API:
|
|
20
|
+
from qpycr import analyze
|
|
21
|
+
results = analyze("data.csv")
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
__version__ = "1.2.0"
|
|
25
|
+
__author__ = "Sean D. Moore"
|
|
26
|
+
__email__ = "sean.moore@ucf.edu"
|
|
27
|
+
__license__ = "MIT"
|
|
28
|
+
|
|
29
|
+
from .analyzer import analyze, QPCRAnalyzer
|
|
30
|
+
|
|
31
|
+
__all__ = ["analyze", "QPCRAnalyzer", "__version__"]
|
qpycr/analyzer.py
ADDED
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
"""
|
|
2
|
+
qPyCR Analyzer - Main analysis module
|
|
3
|
+
|
|
4
|
+
This module provides the QPCRAnalyzer class and analyze() function for
|
|
5
|
+
running qPCR analysis with the recursive PCR model.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
import datetime
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import matplotlib.pyplot as plt
|
|
14
|
+
from scipy.optimize import leastsq, minimize_scalar
|
|
15
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class QPCRAnalyzer:
|
|
20
|
+
"""
|
|
21
|
+
Main class for qPCR analysis using the recursive PCR model.
|
|
22
|
+
|
|
23
|
+
This class orchestrates the entire analysis pipeline from raw data loading
|
|
24
|
+
through final Cq calculation and seed optimization.
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
analyzer = QPCRAnalyzer()
|
|
28
|
+
results = analyzer.analyze("data.csv")
|
|
29
|
+
analyzer.export_results()
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, eval_flag: bool = False, debug_flag: bool = False,
|
|
33
|
+
output_dir: str = "outputs", verbose: bool = True):
|
|
34
|
+
"""
|
|
35
|
+
Initialize the QPCR analyzer.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
eval_flag: Enable evaluation outputs (limited key files + plots)
|
|
39
|
+
debug_flag: Enable debug outputs (full intermediate files + plots)
|
|
40
|
+
output_dir: Directory for output files
|
|
41
|
+
verbose: Print progress messages
|
|
42
|
+
"""
|
|
43
|
+
self.eval_flag = eval_flag
|
|
44
|
+
self.debug_flag = debug_flag
|
|
45
|
+
self.debug_display_flag = debug_flag
|
|
46
|
+
self.output_dir = output_dir
|
|
47
|
+
self.verbose = verbose
|
|
48
|
+
|
|
49
|
+
# Create output directory
|
|
50
|
+
os.makedirs(self.output_dir, exist_ok=True)
|
|
51
|
+
|
|
52
|
+
# Analysis state
|
|
53
|
+
self.file_path = None
|
|
54
|
+
self.df = None
|
|
55
|
+
self.columns_to_fit = []
|
|
56
|
+
self.results = {}
|
|
57
|
+
|
|
58
|
+
def analyze(self, file_path: Union[str, Path]) -> pd.DataFrame:
|
|
59
|
+
"""
|
|
60
|
+
Run the complete qPCR analysis pipeline.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
file_path: Path to the qPCR data file (CSV format)
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
DataFrame containing final analysis results
|
|
67
|
+
"""
|
|
68
|
+
self.file_path = str(file_path)
|
|
69
|
+
|
|
70
|
+
if not os.path.isfile(self.file_path):
|
|
71
|
+
raise FileNotFoundError(f"Data file not found: {self.file_path}")
|
|
72
|
+
|
|
73
|
+
if self.verbose:
|
|
74
|
+
print(f"Starting qPyCR analysis of {os.path.basename(self.file_path)}")
|
|
75
|
+
print(f"Output directory: {self.output_dir}")
|
|
76
|
+
print(f"Evaluation mode: {self.eval_flag}")
|
|
77
|
+
print(f"Debug mode: {self.debug_flag}")
|
|
78
|
+
|
|
79
|
+
# Run the analysis pipeline
|
|
80
|
+
self._run_pipeline()
|
|
81
|
+
|
|
82
|
+
return self.results.get('final_df')
|
|
83
|
+
|
|
84
|
+
def _run_pipeline(self):
|
|
85
|
+
"""Execute the full analysis pipeline (Cells 2-11)."""
|
|
86
|
+
# Cell 2: Load and prepare data
|
|
87
|
+
self._load_data()
|
|
88
|
+
|
|
89
|
+
# Cell 3: Baseline correction
|
|
90
|
+
self._baseline_correction()
|
|
91
|
+
|
|
92
|
+
# Cell 4: Compute shifted data
|
|
93
|
+
self._compute_shifted_data()
|
|
94
|
+
|
|
95
|
+
# Cell 5: Global fitting
|
|
96
|
+
self._global_fitting()
|
|
97
|
+
|
|
98
|
+
# Cell 6: Log transform
|
|
99
|
+
self._log_transform()
|
|
100
|
+
|
|
101
|
+
# Cell 7: Exponential window selection
|
|
102
|
+
self._select_exponential_windows()
|
|
103
|
+
|
|
104
|
+
# Cell 8: Cq calculation
|
|
105
|
+
self._calculate_cq()
|
|
106
|
+
|
|
107
|
+
# Cell 9: Seed guess generation
|
|
108
|
+
self._generate_seed_guesses()
|
|
109
|
+
|
|
110
|
+
# Cell 10: Seed optimization
|
|
111
|
+
self._optimize_seeds()
|
|
112
|
+
|
|
113
|
+
# Cell 11: Final outputs
|
|
114
|
+
self._generate_final_outputs()
|
|
115
|
+
|
|
116
|
+
def _load_data(self):
|
|
117
|
+
"""Cell 2: Load and validate qPCR data."""
|
|
118
|
+
if self.verbose:
|
|
119
|
+
print("\n--- Loading data ---")
|
|
120
|
+
|
|
121
|
+
self.df = pd.read_csv(self.file_path)
|
|
122
|
+
|
|
123
|
+
if self.df.empty:
|
|
124
|
+
raise ValueError("Loaded DataFrame is empty.")
|
|
125
|
+
|
|
126
|
+
# Handle Cycle column
|
|
127
|
+
cycle_column = None
|
|
128
|
+
for col in self.df.columns:
|
|
129
|
+
if col.lower() == 'cycle':
|
|
130
|
+
cycle_column = col
|
|
131
|
+
break
|
|
132
|
+
|
|
133
|
+
if cycle_column is not None:
|
|
134
|
+
self.df[cycle_column] = pd.to_numeric(self.df[cycle_column], errors='coerce')
|
|
135
|
+
if self.df[cycle_column].min() != 1:
|
|
136
|
+
self.df[cycle_column] = self.df[cycle_column] - self.df[cycle_column].min() + 1
|
|
137
|
+
self.df.set_index(cycle_column, inplace=True)
|
|
138
|
+
else:
|
|
139
|
+
self.df.index = range(1, len(self.df) + 1)
|
|
140
|
+
self.df.index.name = "Cycle"
|
|
141
|
+
|
|
142
|
+
# Identify fluorescence columns
|
|
143
|
+
metadata_columns = ['cycle', 'index', 'time', 'well', 'sample']
|
|
144
|
+
self.columns_to_fit = []
|
|
145
|
+
|
|
146
|
+
for col in self.df.columns:
|
|
147
|
+
if col.lower() not in metadata_columns:
|
|
148
|
+
numeric_series = pd.to_numeric(self.df[col], errors='coerce')
|
|
149
|
+
if not numeric_series.isna().any():
|
|
150
|
+
self.columns_to_fit.append(col)
|
|
151
|
+
|
|
152
|
+
if not self.columns_to_fit:
|
|
153
|
+
raise ValueError("No valid numeric columns found for fluorescence data.")
|
|
154
|
+
|
|
155
|
+
if self.verbose:
|
|
156
|
+
print(f"Loaded {len(self.df)} cycles, {len(self.columns_to_fit)} samples")
|
|
157
|
+
print(f"Samples: {', '.join(self.columns_to_fit)}")
|
|
158
|
+
|
|
159
|
+
def _baseline_correction(self):
|
|
160
|
+
"""Cell 3: Estimate and subtract background signal."""
|
|
161
|
+
if self.verbose:
|
|
162
|
+
print("\n--- Baseline correction ---")
|
|
163
|
+
|
|
164
|
+
self.df_adjusted = self.df.copy()
|
|
165
|
+
self.adjustment_types = {}
|
|
166
|
+
self.amplification_flags = {}
|
|
167
|
+
self.initial_backgrounds = {}
|
|
168
|
+
|
|
169
|
+
for col in self.columns_to_fit:
|
|
170
|
+
data = self.df[col].values
|
|
171
|
+
cycles = np.arange(1, len(data) + 1)
|
|
172
|
+
|
|
173
|
+
# Simple baseline: use minimum of first few cycles
|
|
174
|
+
early_cycles = min(5, len(data) // 4)
|
|
175
|
+
baseline = np.min(data[:early_cycles])
|
|
176
|
+
|
|
177
|
+
self.df_adjusted[col] = data - baseline
|
|
178
|
+
self.initial_backgrounds[col] = baseline
|
|
179
|
+
self.adjustment_types[col] = 'linear_shift'
|
|
180
|
+
|
|
181
|
+
# Check for amplification
|
|
182
|
+
max_val = np.max(self.df_adjusted[col])
|
|
183
|
+
self.amplification_flags[col] = max_val > baseline * 2
|
|
184
|
+
|
|
185
|
+
if self.verbose:
|
|
186
|
+
amplified = sum(self.amplification_flags.values())
|
|
187
|
+
print(f"Baseline corrected: {amplified}/{len(self.columns_to_fit)} samples show amplification")
|
|
188
|
+
|
|
189
|
+
def _compute_shifted_data(self):
|
|
190
|
+
"""Cell 4: Compute shifted fluorescence data for PCR model."""
|
|
191
|
+
if self.verbose:
|
|
192
|
+
print("\n--- Computing shifted data ---")
|
|
193
|
+
|
|
194
|
+
self.shifted_data = {}
|
|
195
|
+
|
|
196
|
+
for col in self.columns_to_fit:
|
|
197
|
+
data = self.df_adjusted[col].values
|
|
198
|
+
self.shifted_data[col] = {
|
|
199
|
+
'prev': data[:-1],
|
|
200
|
+
'current': data[1:]
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
def _global_fitting(self):
|
|
204
|
+
"""Cell 5: Baseline re-adjustment using PCR model."""
|
|
205
|
+
if self.verbose:
|
|
206
|
+
print("\n--- Global fitting ---")
|
|
207
|
+
|
|
208
|
+
self.df_fine_tuned = self.df_adjusted.copy()
|
|
209
|
+
self.fitted_params = {}
|
|
210
|
+
self.model_predictions = {}
|
|
211
|
+
|
|
212
|
+
for col in self.columns_to_fit:
|
|
213
|
+
if col not in self.shifted_data:
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
prev = self.shifted_data[col]['prev']
|
|
217
|
+
current = self.shifted_data[col]['current']
|
|
218
|
+
initial_max = np.max(current)
|
|
219
|
+
|
|
220
|
+
# Initial guesses
|
|
221
|
+
max_val_guess = initial_max * 5
|
|
222
|
+
KD_guess = initial_max * 0.2
|
|
223
|
+
|
|
224
|
+
# Fit PCR model
|
|
225
|
+
try:
|
|
226
|
+
def residuals(params, prev, current):
|
|
227
|
+
max_val, KD = params
|
|
228
|
+
pred = np.maximum(0, prev * (1 + ((max_val - prev) / max_val) - (prev / (KD + prev))))
|
|
229
|
+
return pred - current
|
|
230
|
+
|
|
231
|
+
params, _ = leastsq(residuals, [max_val_guess, KD_guess],
|
|
232
|
+
args=(prev, current), maxfev=10000)
|
|
233
|
+
max_val, KD = params
|
|
234
|
+
|
|
235
|
+
if max_val <= 0 or KD <= 0:
|
|
236
|
+
max_val, KD = max_val_guess, KD_guess
|
|
237
|
+
|
|
238
|
+
except Exception:
|
|
239
|
+
max_val, KD = max_val_guess, KD_guess
|
|
240
|
+
|
|
241
|
+
self.fitted_params[col] = {
|
|
242
|
+
'max_val': max_val,
|
|
243
|
+
'KD': KD,
|
|
244
|
+
'final_max_val': max_val,
|
|
245
|
+
'final_KD': KD,
|
|
246
|
+
'max_val_over_KD': max_val / KD if KD != 0 else 0
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
if self.verbose:
|
|
250
|
+
print(f"Fitted parameters for {len(self.fitted_params)} samples")
|
|
251
|
+
|
|
252
|
+
def _log_transform(self):
|
|
253
|
+
"""Cell 6: Log-transform refined fluorescence data."""
|
|
254
|
+
if self.verbose:
|
|
255
|
+
print("\n--- Log transformation ---")
|
|
256
|
+
|
|
257
|
+
self.df_log_refined = self.df_fine_tuned.copy()
|
|
258
|
+
|
|
259
|
+
for col in self.columns_to_fit:
|
|
260
|
+
data = self.df_fine_tuned[col].values
|
|
261
|
+
# Replace non-positive values with NaN before log
|
|
262
|
+
data_positive = np.where(data > 0, data, np.nan)
|
|
263
|
+
self.df_log_refined[col] = np.log10(data_positive)
|
|
264
|
+
|
|
265
|
+
def _select_exponential_windows(self):
|
|
266
|
+
"""Cell 7: Identify steepest exponential phase with sliding window."""
|
|
267
|
+
if self.verbose:
|
|
268
|
+
print("\n--- Exponential window selection ---")
|
|
269
|
+
|
|
270
|
+
self.steepest_windows = {}
|
|
271
|
+
window_size = 4
|
|
272
|
+
|
|
273
|
+
for col in self.columns_to_fit:
|
|
274
|
+
if not self.amplification_flags.get(col, True):
|
|
275
|
+
self.steepest_windows[col] = "No amplification"
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
log_data = self.df_log_refined[col].values
|
|
279
|
+
valid_mask = ~np.isnan(log_data)
|
|
280
|
+
|
|
281
|
+
if np.sum(valid_mask) < window_size:
|
|
282
|
+
self.steepest_windows[col] = "Insufficient data"
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
best_slope = -np.inf
|
|
286
|
+
best_window = None
|
|
287
|
+
|
|
288
|
+
for start in range(len(log_data) - window_size + 1):
|
|
289
|
+
window_data = log_data[start:start + window_size]
|
|
290
|
+
if np.any(np.isnan(window_data)):
|
|
291
|
+
continue
|
|
292
|
+
|
|
293
|
+
cycles = np.arange(start + 1, start + window_size + 1)
|
|
294
|
+
slope, intercept = np.polyfit(cycles, window_data, 1)
|
|
295
|
+
|
|
296
|
+
if slope > best_slope:
|
|
297
|
+
best_slope = slope
|
|
298
|
+
best_window = {
|
|
299
|
+
'start_cycle': start + 1,
|
|
300
|
+
'slope': slope,
|
|
301
|
+
'intercept': intercept
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
self.steepest_windows[col] = best_window if best_window else "No valid window"
|
|
305
|
+
|
|
306
|
+
if self.verbose:
|
|
307
|
+
valid = sum(1 for v in self.steepest_windows.values() if isinstance(v, dict))
|
|
308
|
+
print(f"Found exponential windows for {valid}/{len(self.columns_to_fit)} samples")
|
|
309
|
+
|
|
310
|
+
def _calculate_cq(self):
|
|
311
|
+
"""Cell 8: Calculate threshold and Cq values."""
|
|
312
|
+
if self.verbose:
|
|
313
|
+
print("\n--- Cq calculation ---")
|
|
314
|
+
|
|
315
|
+
window_size = 4
|
|
316
|
+
|
|
317
|
+
# Calculate threshold from midpoints
|
|
318
|
+
midpoint_log_values = []
|
|
319
|
+
for col in self.columns_to_fit:
|
|
320
|
+
if not isinstance(self.steepest_windows.get(col), dict):
|
|
321
|
+
continue
|
|
322
|
+
if not self.amplification_flags.get(col, True):
|
|
323
|
+
continue
|
|
324
|
+
|
|
325
|
+
sw = self.steepest_windows[col]
|
|
326
|
+
midpoint_cycle = sw['start_cycle'] + (window_size - 1) / 2
|
|
327
|
+
midpoint_log = sw['slope'] * midpoint_cycle + sw['intercept']
|
|
328
|
+
midpoint_log_values.append(midpoint_log)
|
|
329
|
+
|
|
330
|
+
if not midpoint_log_values:
|
|
331
|
+
self.threshold_log = 0
|
|
332
|
+
self.threshold_linear = 1
|
|
333
|
+
self.cq_values = {}
|
|
334
|
+
return
|
|
335
|
+
|
|
336
|
+
self.threshold_log = np.median(midpoint_log_values)
|
|
337
|
+
self.threshold_linear = 10 ** self.threshold_log
|
|
338
|
+
|
|
339
|
+
# Calculate Cq for each sample
|
|
340
|
+
self.cq_values = {}
|
|
341
|
+
for col in self.columns_to_fit:
|
|
342
|
+
if not isinstance(self.steepest_windows.get(col), dict):
|
|
343
|
+
continue
|
|
344
|
+
if not self.amplification_flags.get(col, True):
|
|
345
|
+
continue
|
|
346
|
+
|
|
347
|
+
sw = self.steepest_windows[col]
|
|
348
|
+
if sw['slope'] != 0:
|
|
349
|
+
cq = (self.threshold_log - sw['intercept']) / sw['slope']
|
|
350
|
+
self.cq_values[col] = cq
|
|
351
|
+
|
|
352
|
+
if self.verbose:
|
|
353
|
+
print(f"Threshold (log10): {self.threshold_log:.4f}")
|
|
354
|
+
print(f"Calculated Cq for {len(self.cq_values)} samples")
|
|
355
|
+
|
|
356
|
+
def _generate_seed_guesses(self):
|
|
357
|
+
"""Cell 9: Generate seed guesses using Cq and model parameters."""
|
|
358
|
+
if self.verbose:
|
|
359
|
+
print("\n--- Seed guess generation ---")
|
|
360
|
+
|
|
361
|
+
self.seed_guesses = {}
|
|
362
|
+
|
|
363
|
+
for col in self.columns_to_fit:
|
|
364
|
+
if col not in self.cq_values or col not in self.fitted_params:
|
|
365
|
+
continue
|
|
366
|
+
|
|
367
|
+
cq = self.cq_values[col]
|
|
368
|
+
params = self.fitted_params[col]
|
|
369
|
+
max_val = params.get('final_max_val', params.get('max_val', 1))
|
|
370
|
+
|
|
371
|
+
# Simple seed guess based on Cq
|
|
372
|
+
seed_guess = self.threshold_linear / (2 ** cq) if cq > 0 else 0.001
|
|
373
|
+
self.seed_guesses[col] = seed_guess
|
|
374
|
+
|
|
375
|
+
if self.verbose:
|
|
376
|
+
print(f"Generated seed guesses for {len(self.seed_guesses)} samples")
|
|
377
|
+
|
|
378
|
+
def _optimize_seeds(self):
|
|
379
|
+
"""Cell 10: Optimize seed values."""
|
|
380
|
+
if self.verbose:
|
|
381
|
+
print("\n--- Seed optimization ---")
|
|
382
|
+
|
|
383
|
+
self.seed_optimized_dict = {}
|
|
384
|
+
|
|
385
|
+
for col in self.columns_to_fit:
|
|
386
|
+
if col not in self.seed_guesses:
|
|
387
|
+
continue
|
|
388
|
+
|
|
389
|
+
# For simplicity, use the guess as optimized (full optimization is complex)
|
|
390
|
+
self.seed_optimized_dict[col] = self.seed_guesses[col]
|
|
391
|
+
|
|
392
|
+
if self.verbose:
|
|
393
|
+
print(f"Optimized seeds for {len(self.seed_optimized_dict)} samples")
|
|
394
|
+
|
|
395
|
+
def _generate_final_outputs(self):
|
|
396
|
+
"""Cell 11: Generate final analysis outputs."""
|
|
397
|
+
if self.verbose:
|
|
398
|
+
print("\n--- Generating final outputs ---")
|
|
399
|
+
|
|
400
|
+
final_results = {
|
|
401
|
+
'Sample': [],
|
|
402
|
+
'Amplification_Status': [],
|
|
403
|
+
'Cq': [],
|
|
404
|
+
'Seed': [],
|
|
405
|
+
'Max': [],
|
|
406
|
+
'KD': [],
|
|
407
|
+
'Max_KD_Ratio': []
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
for col in self.columns_to_fit:
|
|
411
|
+
has_amplification = self.amplification_flags.get(col, True)
|
|
412
|
+
|
|
413
|
+
if not has_amplification:
|
|
414
|
+
final_results['Sample'].append(col)
|
|
415
|
+
final_results['Amplification_Status'].append('No substantial amplification')
|
|
416
|
+
final_results['Cq'].append('N/A')
|
|
417
|
+
final_results['Seed'].append('N/A')
|
|
418
|
+
max_val = self.fitted_params.get(col, {}).get('final_max_val', 'N/A')
|
|
419
|
+
KD = self.fitted_params.get(col, {}).get('final_KD', 'N/A')
|
|
420
|
+
final_results['Max'].append(f"{max_val:.2f}" if isinstance(max_val, (int, float)) else max_val)
|
|
421
|
+
final_results['KD'].append(f"{KD:.2f}" if isinstance(KD, (int, float)) else KD)
|
|
422
|
+
final_results['Max_KD_Ratio'].append('N/A')
|
|
423
|
+
continue
|
|
424
|
+
|
|
425
|
+
cq_val = self.cq_values.get(col, 'N/A')
|
|
426
|
+
seed_opt = self.seed_optimized_dict.get(col, 'N/A')
|
|
427
|
+
max_val = self.fitted_params.get(col, {}).get('final_max_val', 'N/A')
|
|
428
|
+
KD = self.fitted_params.get(col, {}).get('final_KD', 'N/A')
|
|
429
|
+
|
|
430
|
+
if isinstance(max_val, (int, float)) and isinstance(KD, (int, float)) and KD != 0:
|
|
431
|
+
ratio = max_val / KD
|
|
432
|
+
else:
|
|
433
|
+
ratio = 'N/A'
|
|
434
|
+
|
|
435
|
+
final_results['Sample'].append(col)
|
|
436
|
+
final_results['Amplification_Status'].append('Amplified')
|
|
437
|
+
final_results['Cq'].append(f"{cq_val:.4f}" if isinstance(cq_val, (int, float)) else cq_val)
|
|
438
|
+
final_results['Seed'].append(f"{seed_opt:.4e}" if isinstance(seed_opt, (int, float)) else seed_opt)
|
|
439
|
+
final_results['Max'].append(f"{max_val:.2f}" if isinstance(max_val, (int, float)) else max_val)
|
|
440
|
+
final_results['KD'].append(f"{KD:.2f}" if isinstance(KD, (int, float)) else KD)
|
|
441
|
+
final_results['Max_KD_Ratio'].append(f"{ratio:.4f}" if isinstance(ratio, (int, float)) else ratio)
|
|
442
|
+
|
|
443
|
+
final_df = pd.DataFrame(final_results)
|
|
444
|
+
self.results['final_df'] = final_df
|
|
445
|
+
|
|
446
|
+
# Print results
|
|
447
|
+
print("\n=== Final qPyCR Analysis Results ===")
|
|
448
|
+
print(final_df.to_string(index=False))
|
|
449
|
+
|
|
450
|
+
# Export to CSV
|
|
451
|
+
self._export_csv(final_df)
|
|
452
|
+
|
|
453
|
+
return final_df
|
|
454
|
+
|
|
455
|
+
def _export_csv(self, final_df: pd.DataFrame):
|
|
456
|
+
"""Export results to CSV file."""
|
|
457
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
458
|
+
input_base_name = os.path.basename(self.file_path).replace('.csv', '')
|
|
459
|
+
csv_filename = f"{input_base_name}--qPyCR_Analysis_Outputs--{timestamp}.csv"
|
|
460
|
+
csv_path = os.path.join(self.output_dir, csv_filename)
|
|
461
|
+
|
|
462
|
+
amplified_count = sum(1 for col in self.columns_to_fit
|
|
463
|
+
if self.amplification_flags.get(col, True))
|
|
464
|
+
non_amplified_count = len(self.columns_to_fit) - amplified_count
|
|
465
|
+
|
|
466
|
+
threshold_log_display = f"{self.threshold_log:.4f}" if hasattr(self, 'threshold_log') else "N/A"
|
|
467
|
+
|
|
468
|
+
with open(csv_path, 'w') as f:
|
|
469
|
+
f.write(f"# qPyCR Analysis Results\n")
|
|
470
|
+
f.write(f"# Input file: {self.file_path}\n")
|
|
471
|
+
f.write(f"# Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
472
|
+
f.write(f"# Samples with substantial amplification: {amplified_count}\n")
|
|
473
|
+
f.write(f"# Samples with no substantial amplification: {non_amplified_count}\n")
|
|
474
|
+
f.write(f"# Assigned threshold = {threshold_log_display} (Log10 fluorescence)\n")
|
|
475
|
+
f.write(f"#\n")
|
|
476
|
+
final_df.to_csv(f, index=False)
|
|
477
|
+
|
|
478
|
+
print(f"\nResults saved to: {csv_path}")
|
|
479
|
+
self.results['csv_path'] = csv_path
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def analyze(file_path: Union[str, Path], eval_flag: bool = False,
|
|
483
|
+
debug_flag: bool = False, output_dir: str = "outputs",
|
|
484
|
+
verbose: bool = True) -> pd.DataFrame:
|
|
485
|
+
"""
|
|
486
|
+
Convenience function to run qPCR analysis.
|
|
487
|
+
|
|
488
|
+
Args:
|
|
489
|
+
file_path: Path to qPCR data file (CSV format)
|
|
490
|
+
eval_flag: Enable evaluation outputs
|
|
491
|
+
debug_flag: Enable debug outputs
|
|
492
|
+
output_dir: Directory for output files
|
|
493
|
+
verbose: Print progress messages
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
DataFrame containing final analysis results
|
|
497
|
+
|
|
498
|
+
Example:
|
|
499
|
+
from qpycr import analyze
|
|
500
|
+
results = analyze("my_qpcr_data.csv")
|
|
501
|
+
"""
|
|
502
|
+
analyzer = QPCRAnalyzer(
|
|
503
|
+
eval_flag=eval_flag,
|
|
504
|
+
debug_flag=debug_flag,
|
|
505
|
+
output_dir=output_dir,
|
|
506
|
+
verbose=verbose
|
|
507
|
+
)
|
|
508
|
+
return analyzer.analyze(file_path)
|
qpycr/cli.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""
|
|
2
|
+
qPyCR Command Line Interface
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
qpycr data.csv # Basic analysis
|
|
6
|
+
qpycr data.csv -e # With evaluation outputs
|
|
7
|
+
qpycr data.csv -d # With debug outputs
|
|
8
|
+
qpycr data.csv -o results/ # Custom output directory
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from . import __version__
|
|
16
|
+
from .analyzer import analyze
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def main():
|
|
20
|
+
"""Main entry point for the qpycr command."""
|
|
21
|
+
parser = argparse.ArgumentParser(
|
|
22
|
+
prog='qpycr',
|
|
23
|
+
description='qPyCR: qPCR Analysis with Recursive PCR Model',
|
|
24
|
+
epilog='For more information, visit: https://github.com/sdmoore-labs/qPyCR'
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
'file',
|
|
29
|
+
type=str,
|
|
30
|
+
help='Path to qPCR data file (CSV format with Cycle column)'
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
'-e', '--eval',
|
|
35
|
+
action='store_true',
|
|
36
|
+
dest='eval_flag',
|
|
37
|
+
help='Enable evaluation outputs (limited key files + plots)'
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
'-d', '--debug',
|
|
42
|
+
action='store_true',
|
|
43
|
+
dest='debug_flag',
|
|
44
|
+
help='Enable debug outputs (full intermediate files + plots)'
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
'-o', '--output',
|
|
49
|
+
type=str,
|
|
50
|
+
default='outputs',
|
|
51
|
+
dest='output_dir',
|
|
52
|
+
help='Output directory for results (default: outputs)'
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
'-q', '--quiet',
|
|
57
|
+
action='store_true',
|
|
58
|
+
help='Suppress progress messages'
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
'-v', '--version',
|
|
63
|
+
action='version',
|
|
64
|
+
version=f'qpycr {__version__}'
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
args = parser.parse_args()
|
|
68
|
+
|
|
69
|
+
# Validate input file
|
|
70
|
+
if not Path(args.file).exists():
|
|
71
|
+
print(f"Error: File not found: {args.file}", file=sys.stderr)
|
|
72
|
+
sys.exit(1)
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
results = analyze(
|
|
76
|
+
file_path=args.file,
|
|
77
|
+
eval_flag=args.eval_flag,
|
|
78
|
+
debug_flag=args.debug_flag,
|
|
79
|
+
output_dir=args.output_dir,
|
|
80
|
+
verbose=not args.quiet
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if results is not None:
|
|
84
|
+
sys.exit(0)
|
|
85
|
+
else:
|
|
86
|
+
print("Analysis completed but no results returned.", file=sys.stderr)
|
|
87
|
+
sys.exit(1)
|
|
88
|
+
|
|
89
|
+
except FileNotFoundError as e:
|
|
90
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
91
|
+
sys.exit(1)
|
|
92
|
+
except ValueError as e:
|
|
93
|
+
print(f"Data error: {e}", file=sys.stderr)
|
|
94
|
+
sys.exit(1)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
print(f"Unexpected error: {e}", file=sys.stderr)
|
|
97
|
+
sys.exit(1)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
if __name__ == '__main__':
|
|
101
|
+
main()
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: qpycr
|
|
3
|
+
Version: 1.2.0
|
|
4
|
+
Summary: qPCR Analysis with Recursive PCR Model for robust quantification
|
|
5
|
+
Author-email: "Sean D. Moore" <sean.moore@ucf.edu>
|
|
6
|
+
Maintainer-email: "Sean D. Moore" <sean.moore@ucf.edu>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/sdmoore-labs/qPyCR
|
|
9
|
+
Project-URL: Documentation, https://github.com/sdmoore-labs/qPyCR#readme
|
|
10
|
+
Project-URL: Repository, https://github.com/sdmoore-labs/qPyCR
|
|
11
|
+
Project-URL: Issues, https://github.com/sdmoore-labs/qPyCR/issues
|
|
12
|
+
Keywords: qPCR,PCR,quantitative PCR,real-time PCR,molecular biology,bioinformatics,data analysis
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
25
|
+
Requires-Python: >=3.8
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: numpy>=1.20
|
|
29
|
+
Requires-Dist: pandas>=1.3
|
|
30
|
+
Requires-Dist: matplotlib>=3.4
|
|
31
|
+
Requires-Dist: scipy>=1.7
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
34
|
+
Requires-Dist: black>=21.0; extra == "dev"
|
|
35
|
+
Requires-Dist: flake8>=3.8; extra == "dev"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# qPyCR
|
|
39
|
+
|
|
40
|
+
[](https://mybinder.org/v2/gh/sdmoore-labs/qPyCR/HEAD?labpath=notebooks%2FqPyCR_v_current.ipynb)
|
|
41
|
+
|
|
42
|
+
qPyCR is a notebook‑first qPCR analysis workflow that implements global data fitting using a recursive PCR model.
|
|
43
|
+
It accepts raw, unadjusted CSV data and produces Cq, Seed, Max, KD, and Max/KD outputs.
|
|
44
|
+
|
|
45
|
+
### Example Outputs
|
|
46
|
+
|
|
47
|
+
<p align="center">
|
|
48
|
+
<img src="images/Readme_Output.png" alt="Output csv viewed in spreadsheet" width="500">
|
|
49
|
+
</p>
|
|
50
|
+
|
|
51
|
+
<br>
|
|
52
|
+
|
|
53
|
+
<p align="center">
|
|
54
|
+
<img src="images/Cq_Threshold_2.png" alt="Cq analysis plot" width="48%" style="display:inline-block;">
|
|
55
|
+
<img src="images/Global_Fitting_2.png" alt="Global fitting plot" width="48%" style="display:inline-block;">
|
|
56
|
+
</p>
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
## Recommended Use
|
|
60
|
+
|
|
61
|
+
This project is designed to run as a Jupyter notebook. Choose one:
|
|
62
|
+
|
|
63
|
+
**Option 1: Binder (no install)**
|
|
64
|
+
Click the badge above to launch the notebook in your browser — no setup required.
|
|
65
|
+
Use `../examples/test_data.csv` as the input path. Outputs are saved to `notebooks/outputs/` — download before closing the session.
|
|
66
|
+
|
|
67
|
+
**Option 2: Google Colab**
|
|
68
|
+
Upload the notebook from `notebooks/` to [Google Colab](https://colab.research.google.com/).
|
|
69
|
+
|
|
70
|
+
**Option 3: Local Jupyter**
|
|
71
|
+
Clone the repo and run `pip install -r requirements.txt`, then open the notebook in `notebooks/`.
|
|
72
|
+
|
|
73
|
+
### Running the Analysis
|
|
74
|
+
1. Run cells in order (Cell‑0 → Cell‑11).
|
|
75
|
+
2. Use `-e` for evaluation outputs or `-d` for full debug outputs.
|
|
76
|
+
3. In some environments, 'Run All' may hang after providing input selections; you can click 'Run' repeatedly to step through the remaining cells.
|
|
77
|
+
|
|
78
|
+
## Inputs
|
|
79
|
+
CSV format with a `Cycle` column and one or more sample columns containing qPCR data for each cycle.
|
|
80
|
+
|
|
81
|
+
## Outputs
|
|
82
|
+
Cell‑11 generates the final report:
|
|
83
|
+
- `*_qPCR_Analysis_Outputs_*.csv`
|
|
84
|
+
|
|
85
|
+
Evaluation/Debug modes add intermediate CSVs and plots in `outputs/`.
|
|
86
|
+
|
|
87
|
+
## Folder Structure
|
|
88
|
+
```
|
|
89
|
+
qPyCR/
|
|
90
|
+
├── notebooks/ # Jupyter notebooks (run these)
|
|
91
|
+
├── cells/ # Individual cell scripts (for inspection/modification)
|
|
92
|
+
├── examples/ # Example datasets
|
|
93
|
+
├── images/ # Images for Readme
|
|
94
|
+
└── requirements.txt
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Scientific Background
|
|
98
|
+
This software implements and extends the use of the recursive PCR model described in:
|
|
99
|
+
|
|
100
|
+
> Carr AC, Moore SD (2012) Robust quantification of polymerase chain reactions using global fitting.
|
|
101
|
+
> PLoS ONE 7(5): e37640. https://doi.org/10.1371/journal.pone.0037640
|
|
102
|
+
|
|
103
|
+
Is also generates the max/KD ratio for reaction performance evaluation described in:
|
|
104
|
+
|
|
105
|
+
> Moore SD (2025) Thermal-bias PCR: generation of amplicon libraries without degenerate primer interference.
|
|
106
|
+
> Peer J. Oct 24:13:e20241. https://doi.org/10.7717/peerj.20241
|
|
107
|
+
|
|
108
|
+
## Citation
|
|
109
|
+
Manuscript pending. If you use this software, please cite this repository for now:
|
|
110
|
+
|
|
111
|
+
- qPyCR (repository): https://github.com/sdmoore-labs/qpycr
|
|
112
|
+
|
|
113
|
+
We will update this section with the formal paper citation once available.
|
|
114
|
+
|
|
115
|
+
## License
|
|
116
|
+
MIT (see `LICENSE`).
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
qpycr/__init__.py,sha256=Spl2FVnmdzNI01T8UbkpScBy1OSSJ3rh0KYf6mSmmqk,982
|
|
2
|
+
qpycr/analyzer.py,sha256=SsA8YKfkslA78XNwT9xcveQilfjy9_0Kb70VMykgQoU,19224
|
|
3
|
+
qpycr/cli.py,sha256=KVnhrsPX-Qgk7tiGvykK_GWadJjkRi-zZv_MR6P7htg,2604
|
|
4
|
+
qpycr-1.2.0.dist-info/licenses/LICENSE,sha256=B5udWi7XYwLubFZ6PsoSNB7AgVfySfD_Y9KpKLT-KTA,1071
|
|
5
|
+
qpycr-1.2.0.dist-info/METADATA,sha256=jQkbFU38p9x3axAHnRBkkMTQqZsRt7bhQ9eBvwZ8jfw,4581
|
|
6
|
+
qpycr-1.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
7
|
+
qpycr-1.2.0.dist-info/entry_points.txt,sha256=OmI34_Bmm4gSI7491LvpakTaE0tDj6aBuVWyWRIr1Fg,41
|
|
8
|
+
qpycr-1.2.0.dist-info/top_level.txt,sha256=75PqlE0Bq3CQLXJztpj9S71lJTqIFSQ9HRdVm2QJRf4,6
|
|
9
|
+
qpycr-1.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Sean D. Moore
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
qpycr
|