primer-target-planner 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- primer_target_planner-0.1.0/PKG-INFO +220 -0
- primer_target_planner-0.1.0/README.md +210 -0
- primer_target_planner-0.1.0/primer_target_planner/__init__.py +30 -0
- primer_target_planner-0.1.0/primer_target_planner/models.py +97 -0
- primer_target_planner-0.1.0/primer_target_planner/planner.py +359 -0
- primer_target_planner-0.1.0/primer_target_planner.egg-info/PKG-INFO +220 -0
- primer_target_planner-0.1.0/primer_target_planner.egg-info/SOURCES.txt +11 -0
- primer_target_planner-0.1.0/primer_target_planner.egg-info/dependency_links.txt +1 -0
- primer_target_planner-0.1.0/primer_target_planner.egg-info/requires.txt +3 -0
- primer_target_planner-0.1.0/primer_target_planner.egg-info/top_level.txt +1 -0
- primer_target_planner-0.1.0/pyproject.toml +18 -0
- primer_target_planner-0.1.0/setup.cfg +4 -0
- primer_target_planner-0.1.0/tests/test_planner.py +539 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: primer-target-planner
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Interval-based PCR target window planner
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
10
|
+
|
|
11
|
+
# primer-target-planner
|
|
12
|
+
|
|
13
|
+
Interval-based PCR target window planner.
|
|
14
|
+
|
|
15
|
+
Given a set of required genomic intervals (e.g. CDS exons) and PCR product size
|
|
16
|
+
constraints, generate the minimal set of target windows that fully cover every
|
|
17
|
+
required interval.
|
|
18
|
+
|
|
19
|
+
This is a **pure algorithm library** — no primer design, no external
|
|
20
|
+
bioinformatics services.
|
|
21
|
+
|
|
22
|
+
> **This library uses 0-based half-open intervals: `[start, end)`, `length = end - start`.**
|
|
23
|
+
>
|
|
24
|
+
> All coordinates in `RequiredInterval`, `PlanningBounds`, and `TargetWindow`
|
|
25
|
+
> follow this convention. For example, `RequiredInterval("exon1", 1000, 1200)`
|
|
26
|
+
> covers positions 1000–1199 inclusive (200 bp).
|
|
27
|
+
|
|
28
|
+
## Install
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install -e ".[dev]"
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick start
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from primer_target_planner import (
|
|
38
|
+
plan_targets,
|
|
39
|
+
PlannerConfig,
|
|
40
|
+
PlanningBounds,
|
|
41
|
+
RequiredInterval,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
intervals = [
|
|
45
|
+
RequiredInterval("exon1", 1000, 1200), # 200 bp
|
|
46
|
+
RequiredInterval("exon2", 1500, 1800), # 300 bp
|
|
47
|
+
RequiredInterval("exon3", 2200, 2500), # 300 bp
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
config = PlannerConfig(
|
|
51
|
+
product_min=600,
|
|
52
|
+
product_max=1000,
|
|
53
|
+
strand="+",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
targets = plan_targets(intervals, config)
|
|
57
|
+
for t in targets:
|
|
58
|
+
print(
|
|
59
|
+
f"[{t.start}, {t.end}) len={t.length} mode={t.planning_mode} "
|
|
60
|
+
f"covers={t.covered_ids} reason={t.reason}"
|
|
61
|
+
)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Negative-strand example
|
|
65
|
+
|
|
66
|
+
On the **negative strand** the planner processes intervals from high genomic
|
|
67
|
+
coordinates (transcript 5') to low coordinates (transcript 3'). Input and
|
|
68
|
+
output coordinates are always genomic `start < end` — the strand only affects
|
|
69
|
+
**planning order**, not coordinate direction.
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from primer_target_planner import (
|
|
73
|
+
plan_targets,
|
|
74
|
+
PlannerConfig,
|
|
75
|
+
RequiredInterval,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Four exons on the minus strand.
|
|
79
|
+
# Transcript order (5'→3'): exonD → exonC → exonB → exonA
|
|
80
|
+
# (high genomic coords → low genomic coords)
|
|
81
|
+
intervals = [
|
|
82
|
+
RequiredInterval("exonA", 300, 400),
|
|
83
|
+
RequiredInterval("exonB", 700, 800),
|
|
84
|
+
RequiredInterval("exonC", 1100, 1200),
|
|
85
|
+
RequiredInterval("exonD", 1500, 1600),
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
config = PlannerConfig(product_min=500, product_max=900, strand="-")
|
|
89
|
+
targets = plan_targets(intervals, config)
|
|
90
|
+
|
|
91
|
+
for t in targets:
|
|
92
|
+
# start < end always — genomic coordinates, not transcript direction
|
|
93
|
+
print(
|
|
94
|
+
f"[{t.start}, {t.end}) len={t.length} mode={t.planning_mode} "
|
|
95
|
+
f"covers={t.covered_ids}"
|
|
96
|
+
)
|
|
97
|
+
# Possible output:
|
|
98
|
+
# [701, 1600) len=899 mode=product_max covers=['exonD', 'exonC']
|
|
99
|
+
# [300, 800) len=500 mode=product_max covers=['exonB', 'exonA']
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## API
|
|
103
|
+
|
|
104
|
+
### `RequiredInterval`
|
|
105
|
+
|
|
106
|
+
| Field | Type | Description |
|
|
107
|
+
|------------|-------------------|------------------------------------------|
|
|
108
|
+
| `id` | `str` | Identifier (e.g. exon name) |
|
|
109
|
+
| `start` | `int` | Genomic start (0-based, inclusive) |
|
|
110
|
+
| `end` | `int` | Genomic end (exclusive) |
|
|
111
|
+
| `metadata` | `dict \| None` | Optional user metadata |
|
|
112
|
+
|
|
113
|
+
All coordinates are 0-based half-open `[start, end)`.
|
|
114
|
+
`length = end - start`.
|
|
115
|
+
|
|
116
|
+
### `PlannerConfig`
|
|
117
|
+
|
|
118
|
+
| Field | Type | Default | Description |
|
|
119
|
+
|----------------|----------------|---------|---------------------------------------|
|
|
120
|
+
| `product_min` | `int` | — | Minimum PCR product length (bp) |
|
|
121
|
+
| `product_max` | `int` | — | Maximum PCR product length (bp) |
|
|
122
|
+
| `strand` | `"+" \| "-"` | — | Transcript strand |
|
|
123
|
+
| `tile_overlap` | `int` | `200` | Overlap between tiles for long spans |
|
|
124
|
+
| `allow_overlap`| `bool` | `True` | Allow adjacent targets to overlap |
|
|
125
|
+
|
|
126
|
+
### `PlanningBounds`
|
|
127
|
+
|
|
128
|
+
| Field | Type | Description |
|
|
129
|
+
|---------|-------|--------------------------------------------------|
|
|
130
|
+
| `start` | `int` | Gene / transcript genomic start (inclusive) |
|
|
131
|
+
| `end` | `int` | Gene / transcript genomic end (exclusive) |
|
|
132
|
+
|
|
133
|
+
0-based half-open `[start, end)`. `length = end - start`.
|
|
134
|
+
|
|
135
|
+
### `TargetWindow`
|
|
136
|
+
|
|
137
|
+
| Field | Type | Description |
|
|
138
|
+
|----------------|-------------|-------------------------------------------|
|
|
139
|
+
| `start` | `int` | Genomic start (inclusive) |
|
|
140
|
+
| `end` | `int` | Genomic end (exclusive) |
|
|
141
|
+
| `length` | `int` | `end - start` |
|
|
142
|
+
| `covered_ids` | `list[str]` | IDs of fully covered intervals |
|
|
143
|
+
| `anchor_id` | `str` | The interval that anchored this target |
|
|
144
|
+
| `anchor_side` | `"5prime" \| "3prime"` | Anchor side |
|
|
145
|
+
| `planning_mode`| `str` | `product_min`, `product_max`, `single`, `terminal_reverse`, `tiled` |
|
|
146
|
+
| `reason` | `str` | Human-readable explanation |
|
|
147
|
+
|
|
148
|
+
### `plan_targets(intervals, config, bounds=None) -> list[TargetWindow]`
|
|
149
|
+
|
|
150
|
+
Main entry point.
|
|
151
|
+
|
|
152
|
+
- `intervals`: required intervals (any order; sorted internally).
|
|
153
|
+
- `config`: product-size and strand configuration.
|
|
154
|
+
- `bounds`: optional gene extent; enables terminal-reverse logic.
|
|
155
|
+
|
|
156
|
+
Returns target windows in transcript 5'→3' order.
|
|
157
|
+
|
|
158
|
+
## Algorithm
|
|
159
|
+
|
|
160
|
+
### Min-first / max-rescue planner
|
|
161
|
+
|
|
162
|
+
Processing proceeds from the transcript 5' end:
|
|
163
|
+
|
|
164
|
+
1. **Try `product_min`** — if a window of `product_min` bp can fully cover the
|
|
165
|
+
next consecutive required interval, merge it. Continue merging while
|
|
166
|
+
`product_min` still covers the next interval.
|
|
167
|
+
|
|
168
|
+
2. **Try `product_max`** — if `product_min` cannot cover the next interval but
|
|
169
|
+
`product_max` can, use `product_max` and merge all intervals it covers.
|
|
170
|
+
|
|
171
|
+
3. **Independent target** — if neither size covers the next interval, the
|
|
172
|
+
current anchor becomes its own target and the next interval starts a new
|
|
173
|
+
anchor.
|
|
174
|
+
|
|
175
|
+
4. **Terminal reverse** — if a forward window from the current anchor would
|
|
176
|
+
extend past the gene 3' boundary, instead anchor at the gene 3' end and
|
|
177
|
+
extend toward 5'. Tries `product_min` first; upgrades to `product_max` if
|
|
178
|
+
the previous interval can also be covered.
|
|
179
|
+
Window: `[gene_end - product_size, gene_end)`.
|
|
180
|
+
|
|
181
|
+
5. **Tiling** — when a single required interval exceeds `product_max`, it is
|
|
182
|
+
automatically tiled into overlapping windows of `product_max` bp with
|
|
183
|
+
`tile_overlap` bp overlap.
|
|
184
|
+
|
|
185
|
+
### Coverage rule
|
|
186
|
+
|
|
187
|
+
A required interval is considered **fully covered** only when:
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
target.start <= interval.start AND target.end >= interval.end
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
**Partial coverage does not count.** A target that overlaps an interval but
|
|
194
|
+
does not span its full extent does not mark that interval as covered.
|
|
195
|
+
|
|
196
|
+
### Bounds behaviour
|
|
197
|
+
|
|
198
|
+
| `bounds` provided? | Behaviour |
|
|
199
|
+
|---|---|
|
|
200
|
+
| **No** (`None`) | Gene extent is inferred from the intervals themselves. Windows may extend freely beyond the inferred span. Terminal reverse is **not** triggered (there is no external 3' boundary to respect). |
|
|
201
|
+
| **Yes** | The planner keeps all windows within `[bounds.start, bounds.end)`. When a forward window would extend past the 3' boundary, **terminal reverse** anchors at `bounds.end` and extends toward 5'. |
|
|
202
|
+
|
|
203
|
+
Providing bounds is recommended when you know the gene / transcript extent — it
|
|
204
|
+
prevents targets from stretching beyond the biological region and enables the
|
|
205
|
+
terminal-reverse optimisation at the 3' end.
|
|
206
|
+
|
|
207
|
+
### Strand handling
|
|
208
|
+
|
|
209
|
+
- **"+" strand**: 5' is at low genomic coordinates; intervals are processed in
|
|
210
|
+
ascending genomic order.
|
|
211
|
+
- **"-" strand**: 5' is at high genomic coordinates; intervals are processed in
|
|
212
|
+
descending genomic order.
|
|
213
|
+
- **All output coordinates are genomic `start < end`.**
|
|
214
|
+
The strand only affects planning order, never coordinate direction.
|
|
215
|
+
|
|
216
|
+
## Running tests
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
python -m pytest -q
|
|
220
|
+
```
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# primer-target-planner
|
|
2
|
+
|
|
3
|
+
Interval-based PCR target window planner.
|
|
4
|
+
|
|
5
|
+
Given a set of required genomic intervals (e.g. CDS exons) and PCR product size
|
|
6
|
+
constraints, generate the minimal set of target windows that fully cover every
|
|
7
|
+
required interval.
|
|
8
|
+
|
|
9
|
+
This is a **pure algorithm library** — no primer design, no external
|
|
10
|
+
bioinformatics services.
|
|
11
|
+
|
|
12
|
+
> **This library uses 0-based half-open intervals: `[start, end)`, `length = end - start`.**
|
|
13
|
+
>
|
|
14
|
+
> All coordinates in `RequiredInterval`, `PlanningBounds`, and `TargetWindow`
|
|
15
|
+
> follow this convention. For example, `RequiredInterval("exon1", 1000, 1200)`
|
|
16
|
+
> covers positions 1000–1199 inclusive (200 bp).
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install -e ".[dev]"
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Quick start
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from primer_target_planner import (
|
|
28
|
+
plan_targets,
|
|
29
|
+
PlannerConfig,
|
|
30
|
+
PlanningBounds,
|
|
31
|
+
RequiredInterval,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
intervals = [
|
|
35
|
+
RequiredInterval("exon1", 1000, 1200), # 200 bp
|
|
36
|
+
RequiredInterval("exon2", 1500, 1800), # 300 bp
|
|
37
|
+
RequiredInterval("exon3", 2200, 2500), # 300 bp
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
config = PlannerConfig(
|
|
41
|
+
product_min=600,
|
|
42
|
+
product_max=1000,
|
|
43
|
+
strand="+",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
targets = plan_targets(intervals, config)
|
|
47
|
+
for t in targets:
|
|
48
|
+
print(
|
|
49
|
+
f"[{t.start}, {t.end}) len={t.length} mode={t.planning_mode} "
|
|
50
|
+
f"covers={t.covered_ids} reason={t.reason}"
|
|
51
|
+
)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Negative-strand example
|
|
55
|
+
|
|
56
|
+
On the **negative strand** the planner processes intervals from high genomic
|
|
57
|
+
coordinates (transcript 5') to low coordinates (transcript 3'). Input and
|
|
58
|
+
output coordinates are always genomic `start < end` — the strand only affects
|
|
59
|
+
**planning order**, not coordinate direction.
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from primer_target_planner import (
|
|
63
|
+
plan_targets,
|
|
64
|
+
PlannerConfig,
|
|
65
|
+
RequiredInterval,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Four exons on the minus strand.
|
|
69
|
+
# Transcript order (5'→3'): exonD → exonC → exonB → exonA
|
|
70
|
+
# (high genomic coords → low genomic coords)
|
|
71
|
+
intervals = [
|
|
72
|
+
RequiredInterval("exonA", 300, 400),
|
|
73
|
+
RequiredInterval("exonB", 700, 800),
|
|
74
|
+
RequiredInterval("exonC", 1100, 1200),
|
|
75
|
+
RequiredInterval("exonD", 1500, 1600),
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
config = PlannerConfig(product_min=500, product_max=900, strand="-")
|
|
79
|
+
targets = plan_targets(intervals, config)
|
|
80
|
+
|
|
81
|
+
for t in targets:
|
|
82
|
+
# start < end always — genomic coordinates, not transcript direction
|
|
83
|
+
print(
|
|
84
|
+
f"[{t.start}, {t.end}) len={t.length} mode={t.planning_mode} "
|
|
85
|
+
f"covers={t.covered_ids}"
|
|
86
|
+
)
|
|
87
|
+
# Possible output:
|
|
88
|
+
# [701, 1600) len=899 mode=product_max covers=['exonD', 'exonC']
|
|
89
|
+
# [300, 800) len=500 mode=product_max covers=['exonB', 'exonA']
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## API
|
|
93
|
+
|
|
94
|
+
### `RequiredInterval`
|
|
95
|
+
|
|
96
|
+
| Field | Type | Description |
|
|
97
|
+
|------------|-------------------|------------------------------------------|
|
|
98
|
+
| `id` | `str` | Identifier (e.g. exon name) |
|
|
99
|
+
| `start` | `int` | Genomic start (0-based, inclusive) |
|
|
100
|
+
| `end` | `int` | Genomic end (exclusive) |
|
|
101
|
+
| `metadata` | `dict \| None` | Optional user metadata |
|
|
102
|
+
|
|
103
|
+
All coordinates are 0-based half-open `[start, end)`.
|
|
104
|
+
`length = end - start`.
|
|
105
|
+
|
|
106
|
+
### `PlannerConfig`
|
|
107
|
+
|
|
108
|
+
| Field | Type | Default | Description |
|
|
109
|
+
|----------------|----------------|---------|---------------------------------------|
|
|
110
|
+
| `product_min` | `int` | — | Minimum PCR product length (bp) |
|
|
111
|
+
| `product_max` | `int` | — | Maximum PCR product length (bp) |
|
|
112
|
+
| `strand` | `"+" \| "-"` | — | Transcript strand |
|
|
113
|
+
| `tile_overlap` | `int` | `200` | Overlap between tiles for long spans |
|
|
114
|
+
| `allow_overlap`| `bool` | `True` | Allow adjacent targets to overlap |
|
|
115
|
+
|
|
116
|
+
### `PlanningBounds`
|
|
117
|
+
|
|
118
|
+
| Field | Type | Description |
|
|
119
|
+
|---------|-------|--------------------------------------------------|
|
|
120
|
+
| `start` | `int` | Gene / transcript genomic start (inclusive) |
|
|
121
|
+
| `end` | `int` | Gene / transcript genomic end (exclusive) |
|
|
122
|
+
|
|
123
|
+
0-based half-open `[start, end)`. `length = end - start`.
|
|
124
|
+
|
|
125
|
+
### `TargetWindow`
|
|
126
|
+
|
|
127
|
+
| Field | Type | Description |
|
|
128
|
+
|----------------|-------------|-------------------------------------------|
|
|
129
|
+
| `start` | `int` | Genomic start (inclusive) |
|
|
130
|
+
| `end` | `int` | Genomic end (exclusive) |
|
|
131
|
+
| `length` | `int` | `end - start` |
|
|
132
|
+
| `covered_ids` | `list[str]` | IDs of fully covered intervals |
|
|
133
|
+
| `anchor_id` | `str` | The interval that anchored this target |
|
|
134
|
+
| `anchor_side` | `"5prime" \| "3prime"` | Anchor side |
|
|
135
|
+
| `planning_mode`| `str` | `product_min`, `product_max`, `single`, `terminal_reverse`, `tiled` |
|
|
136
|
+
| `reason` | `str` | Human-readable explanation |
|
|
137
|
+
|
|
138
|
+
### `plan_targets(intervals, config, bounds=None) -> list[TargetWindow]`
|
|
139
|
+
|
|
140
|
+
Main entry point.
|
|
141
|
+
|
|
142
|
+
- `intervals`: required intervals (any order; sorted internally).
|
|
143
|
+
- `config`: product-size and strand configuration.
|
|
144
|
+
- `bounds`: optional gene extent; enables terminal-reverse logic.
|
|
145
|
+
|
|
146
|
+
Returns target windows in transcript 5'→3' order.
|
|
147
|
+
|
|
148
|
+
## Algorithm
|
|
149
|
+
|
|
150
|
+
### Min-first / max-rescue planner
|
|
151
|
+
|
|
152
|
+
Processing proceeds from the transcript 5' end:
|
|
153
|
+
|
|
154
|
+
1. **Try `product_min`** — if a window of `product_min` bp can fully cover the
|
|
155
|
+
next consecutive required interval, merge it. Continue merging while
|
|
156
|
+
`product_min` still covers the next interval.
|
|
157
|
+
|
|
158
|
+
2. **Try `product_max`** — if `product_min` cannot cover the next interval but
|
|
159
|
+
`product_max` can, use `product_max` and merge all intervals it covers.
|
|
160
|
+
|
|
161
|
+
3. **Independent target** — if neither size covers the next interval, the
|
|
162
|
+
current anchor becomes its own target and the next interval starts a new
|
|
163
|
+
anchor.
|
|
164
|
+
|
|
165
|
+
4. **Terminal reverse** — if a forward window from the current anchor would
|
|
166
|
+
extend past the gene 3' boundary, instead anchor at the gene 3' end and
|
|
167
|
+
extend toward 5'. Tries `product_min` first; upgrades to `product_max` if
|
|
168
|
+
the previous interval can also be covered.
|
|
169
|
+
Window: `[gene_end - product_size, gene_end)`.
|
|
170
|
+
|
|
171
|
+
5. **Tiling** — when a single required interval exceeds `product_max`, it is
|
|
172
|
+
automatically tiled into overlapping windows of `product_max` bp with
|
|
173
|
+
`tile_overlap` bp overlap.
|
|
174
|
+
|
|
175
|
+
### Coverage rule
|
|
176
|
+
|
|
177
|
+
A required interval is considered **fully covered** only when:
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
target.start <= interval.start AND target.end >= interval.end
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
**Partial coverage does not count.** A target that overlaps an interval but
|
|
184
|
+
does not span its full extent does not mark that interval as covered.
|
|
185
|
+
|
|
186
|
+
### Bounds behaviour
|
|
187
|
+
|
|
188
|
+
| `bounds` provided? | Behaviour |
|
|
189
|
+
|---|---|
|
|
190
|
+
| **No** (`None`) | Gene extent is inferred from the intervals themselves. Windows may extend freely beyond the inferred span. Terminal reverse is **not** triggered (there is no external 3' boundary to respect). |
|
|
191
|
+
| **Yes** | The planner keeps all windows within `[bounds.start, bounds.end)`. When a forward window would extend past the 3' boundary, **terminal reverse** anchors at `bounds.end` and extends toward 5'. |
|
|
192
|
+
|
|
193
|
+
Providing bounds is recommended when you know the gene / transcript extent — it
|
|
194
|
+
prevents targets from stretching beyond the biological region and enables the
|
|
195
|
+
terminal-reverse optimisation at the 3' end.
|
|
196
|
+
|
|
197
|
+
### Strand handling
|
|
198
|
+
|
|
199
|
+
- **"+" strand**: 5' is at low genomic coordinates; intervals are processed in
|
|
200
|
+
ascending genomic order.
|
|
201
|
+
- **"-" strand**: 5' is at high genomic coordinates; intervals are processed in
|
|
202
|
+
descending genomic order.
|
|
203
|
+
- **All output coordinates are genomic `start < end`.**
|
|
204
|
+
The strand only affects planning order, never coordinate direction.
|
|
205
|
+
|
|
206
|
+
## Running tests
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
python -m pytest -q
|
|
210
|
+
```
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""primer-target-planner: interval-based PCR target window planner.
|
|
2
|
+
|
|
3
|
+
All coordinates are 0-based half-open [start, end):
|
|
4
|
+
- start inclusive, end exclusive
|
|
5
|
+
- length = end - start
|
|
6
|
+
|
|
7
|
+
Core API::
|
|
8
|
+
|
|
9
|
+
from primer_target_planner import plan_targets, PlannerConfig, RequiredInterval
|
|
10
|
+
|
|
11
|
+
intervals = [RequiredInterval("A", 100, 300), RequiredInterval("B", 400, 600)]
|
|
12
|
+
config = PlannerConfig(product_min=500, product_max=1000, strand="+")
|
|
13
|
+
targets = plan_targets(intervals, config)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from .models import (
|
|
17
|
+
PlannerConfig,
|
|
18
|
+
PlanningBounds,
|
|
19
|
+
RequiredInterval,
|
|
20
|
+
TargetWindow,
|
|
21
|
+
)
|
|
22
|
+
from .planner import plan_targets
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"PlannerConfig",
|
|
26
|
+
"PlanningBounds",
|
|
27
|
+
"RequiredInterval",
|
|
28
|
+
"TargetWindow",
|
|
29
|
+
"plan_targets",
|
|
30
|
+
]
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Data models for the primer target planner.
|
|
2
|
+
|
|
3
|
+
All coordinates are **0-based half-open** [start, end):
|
|
4
|
+
- start is inclusive
|
|
5
|
+
- end is exclusive
|
|
6
|
+
- length = end - start
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import Literal
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class RequiredInterval:
|
|
17
|
+
"""A genomic interval that must be fully covered by at least one target window.
|
|
18
|
+
|
|
19
|
+
Coordinates are 0-based half-open [start, end):
|
|
20
|
+
- ``start`` is inclusive (first covered position)
|
|
21
|
+
- ``end`` is exclusive (first position NOT covered)
|
|
22
|
+
- length = end - start
|
|
23
|
+
|
|
24
|
+
Example: ``RequiredInterval("exon1", 1000, 1200)`` covers positions
|
|
25
|
+
1000–1199 inclusive, length = 200 bp.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
id: str
|
|
29
|
+
start: int
|
|
30
|
+
end: int
|
|
31
|
+
metadata: dict | None = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class PlannerConfig:
|
|
36
|
+
"""Configuration for the target planner.
|
|
37
|
+
|
|
38
|
+
product_min / product_max are PCR product length constraints.
|
|
39
|
+
strand is the transcript strand ("+" or "-").
|
|
40
|
+
tile_overlap is the overlap (bp) between adjacent tiles when a single
|
|
41
|
+
interval exceeds product_max.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
product_min: int
|
|
45
|
+
product_max: int
|
|
46
|
+
strand: Literal["+", "-"]
|
|
47
|
+
tile_overlap: int = 200
|
|
48
|
+
allow_overlap: bool = True
|
|
49
|
+
|
|
50
|
+
def __post_init__(self) -> None:
|
|
51
|
+
if self.strand not in ("+", "-"):
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"strand must be '+' or '-', got {self.strand!r}"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True)
|
|
58
|
+
class PlanningBounds:
|
|
59
|
+
"""The genomic extent of the gene / transcript being planned.
|
|
60
|
+
|
|
61
|
+
Coordinates are 0-based half-open [start, end):
|
|
62
|
+
- ``start`` is inclusive, ``end`` is exclusive
|
|
63
|
+
- length = end - start
|
|
64
|
+
|
|
65
|
+
For strand="+": start is the 5' end, end is the 3' end.
|
|
66
|
+
For strand="-": start is the 3' end, end is the 5' end.
|
|
67
|
+
|
|
68
|
+
Example: ``PlanningBounds(0, 5000)`` describes a 5 000 bp gene.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
start: int
|
|
72
|
+
end: int
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class TargetWindow:
|
|
77
|
+
"""A planned PCR target window.
|
|
78
|
+
|
|
79
|
+
Coordinates are 0-based half-open [start, end):
|
|
80
|
+
- ``start`` is inclusive (first covered position)
|
|
81
|
+
- ``end`` is exclusive (first position NOT covered)
|
|
82
|
+
- ``length = end - start``
|
|
83
|
+
|
|
84
|
+
Example: ``TargetWindow(start=4000, end=5000, length=1000, ...)``
|
|
85
|
+
covers positions 4000–4999 inclusive.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
start: int
|
|
89
|
+
end: int
|
|
90
|
+
length: int
|
|
91
|
+
covered_ids: list[str] = field(default_factory=list)
|
|
92
|
+
anchor_id: str = ""
|
|
93
|
+
anchor_side: Literal["5prime", "3prime"] = "5prime"
|
|
94
|
+
planning_mode: Literal[
|
|
95
|
+
"product_min", "product_max", "single", "terminal_reverse", "tiled"
|
|
96
|
+
] = "single"
|
|
97
|
+
reason: str = ""
|