@nanalogue/node 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +218 -46
- package/package.json +7 -7
package/README.md
CHANGED
|
@@ -28,6 +28,7 @@ in a BAM file in the mod BAM format (using MM/ML tags as specified in the
|
|
|
28
28
|
- [seqTable](#seqtable)
|
|
29
29
|
- [simulateModBam](#simulatemodbam)
|
|
30
30
|
- [TypeScript Support](#typescript-support)
|
|
31
|
+
- [Pagination](#pagination)
|
|
31
32
|
- [Filtering Options](#filtering-options)
|
|
32
33
|
- [Further Documentation](#further-documentation)
|
|
33
34
|
- [Versioning](#versioning)
|
|
@@ -52,83 +53,167 @@ All functions return Promises and support extensive filtering options.
|
|
|
52
53
|
|
|
53
54
|
Quickly extract BAM file metadata without processing all records.
|
|
54
55
|
|
|
56
|
+
<!-- TEST CODE: START peek -->
|
|
55
57
|
```typescript
|
|
56
58
|
import { peek } from '@nanalogue/node';
|
|
57
59
|
|
|
58
60
|
const result = await peek({ bamPath: 'tests/data/examples/example_1.bam' });
|
|
59
|
-
console.log(result);
|
|
60
|
-
// Output:
|
|
61
|
-
// {
|
|
62
|
-
// contigs: { dummyI: 22, dummyII: 48, dummyIII: 76 },
|
|
63
|
-
// modifications: [ [ 'G', '-', '7200' ], [ 'T', '+', 'T' ] ]
|
|
64
|
-
// }
|
|
61
|
+
console.log(JSON.stringify(result));
|
|
65
62
|
```
|
|
63
|
+
<!-- TEST CODE: END peek -->
|
|
64
|
+
|
|
65
|
+
The output is a JSON object with two keys: `contigs` (contig names to lengths)
|
|
66
|
+
and `modifications` (modification entries as `[base, strand, code]` where
|
|
67
|
+
`+` indicates the basecalled strand and `-` indicates its complement).
|
|
68
|
+
|
|
69
|
+
<!-- TEST OUTPUT: START peek -->
|
|
70
|
+
```json
|
|
71
|
+
{"contigs":{"dummyI":22,"dummyII":48,"dummyIII":76},"modifications":[["G","-","7200"],["T","+","T"]]}
|
|
72
|
+
```
|
|
73
|
+
<!-- TEST OUTPUT: END peek -->
|
|
66
74
|
|
|
67
75
|
### readInfo
|
|
68
76
|
|
|
69
77
|
Get information about reads in the BAM file.
|
|
70
78
|
|
|
79
|
+
<!-- TEST CODE: START readInfo -->
|
|
71
80
|
```typescript
|
|
72
81
|
import { readInfo } from '@nanalogue/node';
|
|
73
82
|
|
|
74
83
|
const reads = await readInfo({ bamPath: 'tests/data/examples/example_1.bam' });
|
|
75
|
-
console.log(reads[0]);
|
|
76
|
-
// Output (first read):
|
|
77
|
-
// {
|
|
78
|
-
// read_id: '5d10eb9a-aae1-4db8-8ec6-7ebb34d32575',
|
|
79
|
-
// sequence_length: 8,
|
|
80
|
-
// contig: 'dummyI',
|
|
81
|
-
// reference_start: 9,
|
|
82
|
-
// reference_end: 17,
|
|
83
|
-
// alignment_length: 8,
|
|
84
|
-
// alignment_type: 'primary_forward',
|
|
85
|
-
// mod_count: 'T+T:0;(probabilities >= 0.5020, PHRED base qual >= 0)'
|
|
86
|
-
// }
|
|
84
|
+
console.log(JSON.stringify(reads[0], null, 2));
|
|
87
85
|
```
|
|
86
|
+
<!-- TEST CODE: END readInfo -->
|
|
87
|
+
|
|
88
|
+
The output is a JSON object for the first read:
|
|
89
|
+
|
|
90
|
+
<!-- TEST OUTPUT: START readInfo -->
|
|
91
|
+
```json
|
|
92
|
+
{
|
|
93
|
+
"read_id": "5d10eb9a-aae1-4db8-8ec6-7ebb34d32575",
|
|
94
|
+
"sequence_length": 8,
|
|
95
|
+
"contig": "dummyI",
|
|
96
|
+
"reference_start": 9,
|
|
97
|
+
"reference_end": 17,
|
|
98
|
+
"alignment_length": 8,
|
|
99
|
+
"alignment_type": "primary_forward",
|
|
100
|
+
"mod_count": "T+T:0;(probabilities >= 0.5020, PHRED base qual >= 0)"
|
|
101
|
+
}
|
|
102
|
+
```
|
|
103
|
+
<!-- TEST OUTPUT: END readInfo -->
|
|
88
104
|
|
|
89
105
|
### bamMods
|
|
90
106
|
|
|
91
107
|
Extract detailed modification data for each read.
|
|
92
108
|
|
|
109
|
+
<!-- TEST CODE: START bamMods -->
|
|
93
110
|
```typescript
|
|
94
111
|
import { bamMods } from '@nanalogue/node';
|
|
95
112
|
|
|
96
113
|
const mods = await bamMods({ bamPath: 'tests/data/examples/example_1.bam' });
|
|
97
|
-
console.log(mods[0]);
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
114
|
+
console.log(JSON.stringify(mods[0], null, 2));
|
|
115
|
+
```
|
|
116
|
+
<!-- TEST CODE: END bamMods -->
|
|
117
|
+
|
|
118
|
+
The output is a JSON object for the first read. The `data` arrays contain
|
|
119
|
+
`[seq_pos, ref_pos, mod_quality]` tuples:
|
|
120
|
+
|
|
121
|
+
<!-- TEST OUTPUT: START bamMods -->
|
|
122
|
+
```json
|
|
123
|
+
{
|
|
124
|
+
"alignment_type": "primary_forward",
|
|
125
|
+
"alignment": {
|
|
126
|
+
"start": 9,
|
|
127
|
+
"end": 17,
|
|
128
|
+
"contig": "dummyI",
|
|
129
|
+
"contig_id": 0
|
|
130
|
+
},
|
|
131
|
+
"mod_table": [
|
|
132
|
+
{
|
|
133
|
+
"base": "T",
|
|
134
|
+
"is_strand_plus": true,
|
|
135
|
+
"mod_code": "T",
|
|
136
|
+
"data": [
|
|
137
|
+
[
|
|
138
|
+
0,
|
|
139
|
+
9,
|
|
140
|
+
4
|
|
141
|
+
],
|
|
142
|
+
[
|
|
143
|
+
3,
|
|
144
|
+
12,
|
|
145
|
+
7
|
|
146
|
+
],
|
|
147
|
+
[
|
|
148
|
+
4,
|
|
149
|
+
13,
|
|
150
|
+
9
|
|
151
|
+
],
|
|
152
|
+
[
|
|
153
|
+
7,
|
|
154
|
+
16,
|
|
155
|
+
6
|
|
156
|
+
]
|
|
157
|
+
]
|
|
158
|
+
}
|
|
159
|
+
],
|
|
160
|
+
"read_id": "5d10eb9a-aae1-4db8-8ec6-7ebb34d32575",
|
|
161
|
+
"seq_len": 8
|
|
162
|
+
}
|
|
111
163
|
```
|
|
164
|
+
<!-- TEST OUTPUT: END bamMods -->
|
|
112
165
|
|
|
113
166
|
### windowReads
|
|
114
167
|
|
|
115
168
|
Compute windowed modification densities across reads.
|
|
116
169
|
|
|
170
|
+
<!-- TEST CODE: START windowReads -->
|
|
117
171
|
```typescript
|
|
118
172
|
import { windowReads } from '@nanalogue/node';
|
|
119
173
|
|
|
120
|
-
const
|
|
174
|
+
const json = await windowReads({
|
|
121
175
|
bamPath: 'tests/data/examples/example_1.bam',
|
|
122
176
|
win: 2,
|
|
123
177
|
step: 1
|
|
124
178
|
});
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
// #contig ref_win_start ref_win_end read_id win_val strand base mod_strand mod_type win_start win_end basecall_qual
|
|
128
|
-
// dummyI 9 13 5d10eb9a-aae1-4db8-8ec6-7ebb34d32575 0 + T + T 0 4 255
|
|
129
|
-
// dummyI 12 14 5d10eb9a-aae1-4db8-8ec6-7ebb34d32575 0 + T + T 3 5 255
|
|
130
|
-
// (basecall_qual is 255 as base quality scores are unavailable in this example file)
|
|
179
|
+
const entries = JSON.parse(json);
|
|
180
|
+
console.log(JSON.stringify(entries[0], null, 2));
|
|
131
181
|
```
|
|
182
|
+
<!-- TEST CODE: END windowReads -->
|
|
183
|
+
|
|
184
|
+
The output is a JSON array of per-read entries. Each entry contains alignment
|
|
185
|
+
info and a `mod_table` with windowed data tuples
|
|
186
|
+
`[win_start, win_end, win_val, mean_base_qual, ref_win_start, ref_win_end]`.
|
|
187
|
+
(mean\_base\_qual is 255 as base quality scores are unavailable in this example file.).
|
|
188
|
+
NOTE: If the `alignment_type` is "unmapped", then the `alignment` field is not present.
|
|
189
|
+
|
|
190
|
+
<!-- TEST OUTPUT: START windowReads -->
|
|
191
|
+
```json
|
|
192
|
+
{
|
|
193
|
+
"alignment_type": "primary_forward",
|
|
194
|
+
"alignment": {
|
|
195
|
+
"start": 9,
|
|
196
|
+
"end": 17,
|
|
197
|
+
"contig": "dummyI",
|
|
198
|
+
"contig_id": 0
|
|
199
|
+
},
|
|
200
|
+
"mod_table": [
|
|
201
|
+
{
|
|
202
|
+
"base": "T",
|
|
203
|
+
"is_strand_plus": true,
|
|
204
|
+
"mod_code": "T",
|
|
205
|
+
"data": [
|
|
206
|
+
[0, 4, 0.0, 255, 9, 13],
|
|
207
|
+
[3, 5, 0.0, 255, 12, 14],
|
|
208
|
+
[4, 8, 0.0, 255, 13, 17]
|
|
209
|
+
]
|
|
210
|
+
}
|
|
211
|
+
],
|
|
212
|
+
"read_id": "5d10eb9a-aae1-4db8-8ec6-7ebb34d32575",
|
|
213
|
+
"seq_len": 8
|
|
214
|
+
}
|
|
215
|
+
```
|
|
216
|
+
<!-- TEST OUTPUT: END windowReads -->
|
|
132
217
|
|
|
133
218
|
Supports `winOp: 'grad_density'` for gradient mode.
|
|
134
219
|
|
|
@@ -136,25 +221,36 @@ Supports `winOp: 'grad_density'` for gradient mode.
|
|
|
136
221
|
|
|
137
222
|
Extract sequences and qualities for a genomic region.
|
|
138
223
|
|
|
224
|
+
<!-- TEST CODE: START seqTable -->
|
|
139
225
|
```typescript
|
|
140
226
|
import { seqTable } from '@nanalogue/node';
|
|
141
227
|
|
|
142
228
|
const tsv = await seqTable({
|
|
143
229
|
bamPath: 'tests/data/examples/example_pynanalogue_1.bam',
|
|
144
|
-
region: 'contig_00000:0-10'
|
|
230
|
+
region: 'contig_00000:0-10'
|
|
145
231
|
});
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
// 1... ACGTACGTAC 30.30.30.30.30.30.30.30.30.30
|
|
150
|
-
// 0... AZGTAZGTAZ 20.20.20.20.20.20.20.20.20.20
|
|
151
|
-
// Sequence uses: . for deletion, lowercase for insertion, Z for modification
|
|
232
|
+
const lines = tsv.trimEnd().split('\n');
|
|
233
|
+
const sorted = [lines[0], ...lines.slice(1).sort()].join('\n');
|
|
234
|
+
console.log(sorted);
|
|
152
235
|
```
|
|
236
|
+
<!-- TEST CODE: END seqTable -->
|
|
237
|
+
|
|
238
|
+
The output is a TSV with three columns: `read_id`, `sequence`, and `qualities`.
|
|
239
|
+
Sequence uses: `.` for deletion, lowercase for insertion, `Z` for modification.
|
|
240
|
+
|
|
241
|
+
<!-- TEST OUTPUT: START seqTable -->
|
|
242
|
+
```text
|
|
243
|
+
read_id sequence qualities
|
|
244
|
+
0.dc09ae0d-6b6e-4cb2-b092-078f251a778e AZGTAZGTAZ 20.20.20.20.20.20.20.20.20.20
|
|
245
|
+
1.cb098e1d-26d6-4e14-b979-b089e492c068 ACGTACGTAC 30.30.30.30.30.30.30.30.30.30
|
|
246
|
+
```
|
|
247
|
+
<!-- TEST OUTPUT: END seqTable -->
|
|
153
248
|
|
|
154
249
|
### simulateModBam
|
|
155
250
|
|
|
156
251
|
Generate synthetic BAM files with defined modification patterns (useful for testing).
|
|
157
252
|
|
|
253
|
+
<!-- TEST CODE: NOOUTPUT simulateModBam -->
|
|
158
254
|
```typescript
|
|
159
255
|
import { simulateModBam } from '@nanalogue/node';
|
|
160
256
|
|
|
@@ -179,6 +275,7 @@ await simulateModBam({
|
|
|
179
275
|
fastaPath: 'output.fasta'
|
|
180
276
|
});
|
|
181
277
|
```
|
|
278
|
+
<!-- TEST CODE: END simulateModBam -->
|
|
182
279
|
|
|
183
280
|
## TypeScript Support
|
|
184
281
|
|
|
@@ -190,6 +287,61 @@ to enforce constraints at compile time (e.g., `fullRegion` can only be set when
|
|
|
190
287
|
import type { ReadOptions, BamModRecord, ReadInfoRecord } from '@nanalogue/node';
|
|
191
288
|
```
|
|
192
289
|
|
|
290
|
+
## Pagination
|
|
291
|
+
|
|
292
|
+
All query functions (`readInfo`, `bamMods`, `windowReads`, `seqTable`) support pagination
|
|
293
|
+
via `limit` and `offset` parameters. Pagination is applied after filtering, using lazy
|
|
294
|
+
`.skip(offset).take(limit)` on the BAM record iterator, so only the requested records
|
|
295
|
+
are processed.
|
|
296
|
+
|
|
297
|
+
<!-- TEST CODE: NOOUTPUT pagination_readInfo -->
|
|
298
|
+
```typescript
|
|
299
|
+
import { readInfo } from '@nanalogue/node';
|
|
300
|
+
|
|
301
|
+
// Get the first 10 reads
|
|
302
|
+
const page1 = await readInfo({
|
|
303
|
+
bamPath: 'tests/data/examples/example_1.bam',
|
|
304
|
+
limit: 10,
|
|
305
|
+
offset: 0
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
// Get the next 10 reads
|
|
309
|
+
const page2 = await readInfo({
|
|
310
|
+
bamPath: 'tests/data/examples/example_1.bam',
|
|
311
|
+
limit: 10,
|
|
312
|
+
offset: 10
|
|
313
|
+
});
|
|
314
|
+
```
|
|
315
|
+
<!-- TEST CODE: END pagination_readInfo -->
|
|
316
|
+
|
|
317
|
+
When combining pagination with `sampleFraction`, use `sampleSeed` to ensure
|
|
318
|
+
deterministic sampling across pages. Without a seed, each call may sample
|
|
319
|
+
different reads, making pagination unstable.
|
|
320
|
+
|
|
321
|
+
<!-- TEST CODE: NOOUTPUT pagination_bamMods -->
|
|
322
|
+
```typescript
|
|
323
|
+
import { bamMods } from '@nanalogue/node';
|
|
324
|
+
|
|
325
|
+
// Deterministic 50% subsample, paginated
|
|
326
|
+
const page1 = await bamMods({
|
|
327
|
+
bamPath: 'tests/data/examples/example_1.bam',
|
|
328
|
+
sampleFraction: 0.5,
|
|
329
|
+
sampleSeed: 42,
|
|
330
|
+
limit: 10,
|
|
331
|
+
offset: 0
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
// Same seed ensures consistent ordering across pages
|
|
335
|
+
const page2 = await bamMods({
|
|
336
|
+
bamPath: 'tests/data/examples/example_1.bam',
|
|
337
|
+
sampleFraction: 0.5,
|
|
338
|
+
sampleSeed: 42,
|
|
339
|
+
limit: 10,
|
|
340
|
+
offset: 10
|
|
341
|
+
});
|
|
342
|
+
```
|
|
343
|
+
<!-- TEST CODE: END pagination_bamMods -->
|
|
344
|
+
|
|
193
345
|
## Filtering Options
|
|
194
346
|
|
|
195
347
|
All read functions support extensive filtering:
|
|
@@ -206,6 +358,7 @@ All read functions support extensive filtering:
|
|
|
206
358
|
| `mapqFilter` | Minimum mapping quality |
|
|
207
359
|
| `excludeMapqUnavail` | Exclude reads without mapping quality |
|
|
208
360
|
| `sampleFraction` | Subsample reads (0.0 to 1.0) |
|
|
361
|
+
| `sampleSeed` | Seed for deterministic sampling (for reproducible subsampling) |
|
|
209
362
|
| `threads` | Number of threads for BAM reading |
|
|
210
363
|
| `tag` | Filter by modification type |
|
|
211
364
|
| `modStrand` | Filter by modification strand ("bc" or "bc_comp") |
|
|
@@ -214,6 +367,8 @@ All read functions support extensive filtering:
|
|
|
214
367
|
| `trimReadEndsMod` | Trim modification info from read ends |
|
|
215
368
|
| `baseQualFilterMod` | Base quality filter for modifications |
|
|
216
369
|
| `modRegion` | Genomic region for modification filtering |
|
|
370
|
+
| `limit` | Maximum number of records to return (must be > 0) |
|
|
371
|
+
| `offset` | Number of records to skip before returning results (default: 0) |
|
|
217
372
|
|
|
218
373
|
## Further Documentation
|
|
219
374
|
|
|
@@ -237,6 +392,23 @@ While in 0.x.y versions:
|
|
|
237
392
|
|
|
238
393
|
After 1.0.0, we will guarantee backwards compatibility in minor/patch releases.
|
|
239
394
|
|
|
395
|
+
## README Example Testing
|
|
396
|
+
|
|
397
|
+
Code examples in this README are automatically tested by `tests/readme-examples.test.ts`.
|
|
398
|
+
HTML comment markers identify which code blocks to test and what output to expect.
|
|
399
|
+
|
|
400
|
+
The following marker types are used (shown without angle brackets to avoid parser interference;
|
|
401
|
+
in practice, wrap each marker in standard HTML comment delimiters i.e. `<` + `!--` ... `--` + `>`):
|
|
402
|
+
|
|
403
|
+
- `!-- TEST CODE: START my_example --` / `!-- TEST CODE: END my_example --` wraps a testable code block.
|
|
404
|
+
- `!-- TEST OUTPUT: START my_example --` / `!-- TEST OUTPUT: END my_example --` wraps the expected stdout.
|
|
405
|
+
- `!-- TEST CODE: NOOUTPUT my_example --` / `!-- TEST CODE: END my_example --` wraps code that is executed
|
|
406
|
+
but has no expected output (e.g. it just verifies the code runs without error).
|
|
407
|
+
|
|
408
|
+
The marker name (e.g. `my_example` above) is a plain identifier that links a code block
|
|
409
|
+
to its output block. Each tested code block must include `console.log()` calls that produce
|
|
410
|
+
exactly the text shown in the corresponding output block.
|
|
411
|
+
|
|
240
412
|
## License
|
|
241
413
|
|
|
242
414
|
MIT License - see [LICENSE](LICENSE) for details.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nanalogue/node",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"description": "Node.js bindings for Nanalogue: single-molecule BAM/Mod-BAM analysis",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|
|
@@ -57,11 +57,11 @@
|
|
|
57
57
|
},
|
|
58
58
|
"author": "Sathish Thiyagarajan <mail@unintegrable.com>",
|
|
59
59
|
"optionalDependencies": {
|
|
60
|
-
"@nanalogue/node-darwin-x64": "0.1.
|
|
61
|
-
"@nanalogue/node-darwin-arm64": "0.1.
|
|
62
|
-
"@nanalogue/node-linux-x64-gnu": "0.1.
|
|
63
|
-
"@nanalogue/node-linux-x64-musl": "0.1.
|
|
64
|
-
"@nanalogue/node-linux-arm64-gnu": "0.1.
|
|
65
|
-
"@nanalogue/node-linux-arm64-musl": "0.1.
|
|
60
|
+
"@nanalogue/node-darwin-x64": "0.1.4",
|
|
61
|
+
"@nanalogue/node-darwin-arm64": "0.1.4",
|
|
62
|
+
"@nanalogue/node-linux-x64-gnu": "0.1.4",
|
|
63
|
+
"@nanalogue/node-linux-x64-musl": "0.1.4",
|
|
64
|
+
"@nanalogue/node-linux-arm64-gnu": "0.1.4",
|
|
65
|
+
"@nanalogue/node-linux-arm64-musl": "0.1.4"
|
|
66
66
|
}
|
|
67
67
|
}
|