immunum 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +77 -34
- package/immunum.d.ts +26 -2
- package/immunum_bg.wasm +0 -0
- package/package.json +1 -1
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 ENPICOM
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -2,23 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
High-performance antibody and TCR sequence numbering in Rust, Python, and WebAssembly.
|
|
4
4
|
|
|
5
|
+
[](https://crates.io/crates/immunum)
|
|
6
|
+
[](https://pypi.org/project/immunum/)
|
|
7
|
+
[](https://www.npmjs.com/package/immunum)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
[](https://github.com/ENPICOM/immunum/actions/workflows/ci.yml)
|
|
10
|
+
[](https://immunum.enpicom.com)
|
|
11
|
+
|
|
5
12
|
## Overview
|
|
6
13
|
|
|
7
|
-
`immunum` is a library for numbering antibody and T-cell receptor (TCR) variable domain sequences. It uses Needleman-Wunsch semi-global alignment against position-specific scoring matrices
|
|
14
|
+
`immunum` is a library for numbering antibody and T-cell receptor (TCR) variable domain sequences. It uses Needleman-Wunsch semi-global alignment against position-specific scoring matrices built from consensus sequences, with BLOSUM62-based substitution scores.
|
|
8
15
|
|
|
9
16
|
Available as:
|
|
17
|
+
|
|
10
18
|
- **Rust crate** — core library and CLI
|
|
11
19
|
- **Python package** — via PyPI (`pip install immunum`), with a [Polars](https://pola.rs) plugin for vectorized batch processing
|
|
12
|
-
- **npm package** —
|
|
20
|
+
- **npm package** — for Node.js and browsers
|
|
13
21
|
|
|
14
22
|
### Supported chains
|
|
15
23
|
|
|
16
|
-
| Antibody
|
|
17
|
-
|
|
18
|
-
| IGH (heavy)
|
|
19
|
-
| IGK (kappa)
|
|
24
|
+
| Antibody | TCR |
|
|
25
|
+
| ------------ | ----------- |
|
|
26
|
+
| IGH (heavy) | TRA (alpha) |
|
|
27
|
+
| IGK (kappa) | TRB (beta) |
|
|
20
28
|
| IGL (lambda) | TRD (delta) |
|
|
21
|
-
|
|
|
29
|
+
| | TRG (gamma) |
|
|
22
30
|
|
|
23
31
|
### Numbering schemes
|
|
24
32
|
|
|
@@ -27,6 +35,26 @@ Available as:
|
|
|
27
35
|
|
|
28
36
|
Chain type is automatically detected by aligning against all loaded chains and selecting the best match.
|
|
29
37
|
|
|
38
|
+
## Table of Contents
|
|
39
|
+
|
|
40
|
+
- [Python](#python)
|
|
41
|
+
- [Installation](#installation)
|
|
42
|
+
- [Numbering](#numbering)
|
|
43
|
+
- [Segmentation](#segmentation)
|
|
44
|
+
- [Polars plugin](#polars-plugin)
|
|
45
|
+
- [JavaScript / npm](#javascript--npm)
|
|
46
|
+
- [Installation](#installation-1)
|
|
47
|
+
- [Usage](#usage)
|
|
48
|
+
- [Rust](#rust)
|
|
49
|
+
- [Usage](#usage-1)
|
|
50
|
+
- [CLI](#cli)
|
|
51
|
+
- [Options](#options)
|
|
52
|
+
- [Input](#input)
|
|
53
|
+
- [Output](#output)
|
|
54
|
+
- [Examples](#examples)
|
|
55
|
+
- [Development](#development)
|
|
56
|
+
- [Project structure](#project-structure)
|
|
57
|
+
|
|
30
58
|
## Python
|
|
31
59
|
|
|
32
60
|
### Installation
|
|
@@ -46,8 +74,8 @@ sequence = "QVQLVQSGAEVKRPGSSVTVSCKASGGSFSTYALSWVRQAPGRGLEWMGGVIPLLTITNYAPRFQGRI
|
|
|
46
74
|
|
|
47
75
|
result = annotator.number(sequence)
|
|
48
76
|
print(result.chain) # H
|
|
49
|
-
print(result.confidence) # 0.
|
|
50
|
-
print(result.numbering) # {"1": "
|
|
77
|
+
print(result.confidence) # 0.78
|
|
78
|
+
print(result.numbering) # {"1": "Q", "2": "V", "3": "Q", ...}
|
|
51
79
|
```
|
|
52
80
|
|
|
53
81
|
### Segmentation
|
|
@@ -55,14 +83,20 @@ print(result.numbering) # {"1": "E", "2": "V", "3": "Q", ...}
|
|
|
55
83
|
`segment` splits the sequence into FR/CDR regions:
|
|
56
84
|
|
|
57
85
|
```python
|
|
86
|
+
from immunum import Annotator
|
|
87
|
+
|
|
88
|
+
annotator = Annotator(chains=["H", "K", "L"], scheme="imgt")
|
|
89
|
+
|
|
90
|
+
sequence = "QVQLVQSGAEVKRPGSSVTVSCKASGGSFSTYALSWVRQAPGRGLEWMGGVIPLLTITNYAPRFQGRITITADRSTSTAYLELNSLRPEDTAVYYCAREGTTGKPIGAFAHWGQGTLVTVSS"
|
|
91
|
+
|
|
58
92
|
result = annotator.segment(sequence)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
93
|
+
assert result.fr1 == 'QVQLVQSGAEVKRPGSSVTVSCKAS'
|
|
94
|
+
assert result.cdr1 == 'GGSFSTYA'
|
|
95
|
+
assert result.fr2 == 'LSWVRQAPGRGLEWMGG'
|
|
96
|
+
assert result.cdr2 == 'VIPLLTIT'
|
|
97
|
+
assert result.fr3 == 'NYAPRFQGRITITADRSTSTAYLELNSLRPEDTAVYYC'
|
|
98
|
+
assert result.cdr3 == 'AREGTTGKPIGAFAH'
|
|
99
|
+
assert result.fr4 == 'WGQGTLVTVSS'
|
|
66
100
|
```
|
|
67
101
|
|
|
68
102
|
Chains: `"H"` (heavy), `"K"` (kappa), `"L"` (lambda), `"A"` (TRA), `"B"` (TRB), `"G"` (TRG), `"D"` (TRD).
|
|
@@ -73,7 +107,7 @@ For batch processing, `immunum.polars` registers elementwise Polars expressions:
|
|
|
73
107
|
|
|
74
108
|
```python
|
|
75
109
|
import polars as pl
|
|
76
|
-
import immunum.polars as
|
|
110
|
+
import immunum.polars as imp
|
|
77
111
|
|
|
78
112
|
df = pl.DataFrame({"sequence": [
|
|
79
113
|
"QVQLVQSGAEVKRPGSSVTVSCKASGGSFSTYALSWVRQAPGRGLEWMGGVIPLLTITNYAPRFQGRITITADRSTSTAYLELNSLRPEDTAVYYCAREGTTGKPIGAFAHWGQGTLVTVSS",
|
|
@@ -82,18 +116,18 @@ df = pl.DataFrame({"sequence": [
|
|
|
82
116
|
|
|
83
117
|
# Add a struct column with chain, scheme, confidence, numbering
|
|
84
118
|
result = df.with_columns(
|
|
85
|
-
|
|
119
|
+
imp.number(pl.col("sequence"), chains=["H", "K", "L"], scheme="imgt").alias("numbered")
|
|
86
120
|
)
|
|
87
121
|
|
|
88
122
|
# Add a struct column with FR/CDR segments
|
|
89
123
|
result = df.with_columns(
|
|
90
|
-
|
|
124
|
+
imp.segment(pl.col("sequence"), chains=["H", "K", "L"], scheme="imgt").alias("segmented")
|
|
91
125
|
)
|
|
92
126
|
```
|
|
93
127
|
|
|
94
128
|
The `number` expression returns a struct with fields `chain`, `scheme`, `confidence`, and `numbering` (a struct of position→residue). The `segment` expression returns a struct with fields `fr1`, `cdr1`, `fr2`, `cdr2`, `fr3`, `cdr3`, `fr4`, `prefix`, `postfix`.
|
|
95
129
|
|
|
96
|
-
##
|
|
130
|
+
## JavaScript / npm
|
|
97
131
|
|
|
98
132
|
### Installation
|
|
99
133
|
|
|
@@ -110,12 +144,13 @@ await init(); // load the wasm module
|
|
|
110
144
|
|
|
111
145
|
const annotator = new Annotator(["H", "K", "L"], "imgt");
|
|
112
146
|
|
|
113
|
-
const sequence =
|
|
147
|
+
const sequence =
|
|
148
|
+
"QVQLVQSGAEVKRPGSSVTVSCKASGGSFSTYALSWVRQAPGRGLEWMGGVIPLLTITNYAPRFQGRITITADRSTSTAYLELNSLRPEDTAVYYCAREGTTGKPIGAFAHWGQGTLVTVSS";
|
|
114
149
|
|
|
115
150
|
const result = annotator.number(sequence);
|
|
116
|
-
console.log(result.chain);
|
|
117
|
-
console.log(result.confidence);
|
|
118
|
-
console.log(result.numbering);
|
|
151
|
+
console.log(result.chain); // "H"
|
|
152
|
+
console.log(result.confidence); // 0.97
|
|
153
|
+
console.log(result.numbering); // { "1": "E", "2": "V", ... }
|
|
119
154
|
|
|
120
155
|
const segments = annotator.segment(sequence);
|
|
121
156
|
console.log(segments.cdr3);
|
|
@@ -148,6 +183,13 @@ for (aa, pos) in sequence.chars().zip(result.positions.iter()) {
|
|
|
148
183
|
}
|
|
149
184
|
```
|
|
150
185
|
|
|
186
|
+
Add to `Cargo.toml`:
|
|
187
|
+
|
|
188
|
+
```toml
|
|
189
|
+
[dependencies]
|
|
190
|
+
immunum = "0.9"
|
|
191
|
+
```
|
|
192
|
+
|
|
151
193
|
## CLI
|
|
152
194
|
|
|
153
195
|
```bash
|
|
@@ -156,11 +198,11 @@ immunum number [OPTIONS] [INPUT] [OUTPUT]
|
|
|
156
198
|
|
|
157
199
|
### Options
|
|
158
200
|
|
|
159
|
-
| Flag
|
|
160
|
-
|
|
161
|
-
| `-s, --scheme` | Numbering scheme: `imgt` (`i`), `kabat` (`k`)
|
|
162
|
-
| `-c, --chain`
|
|
163
|
-
| `-f, --format` | Output format: `tsv`, `json`, `jsonl`
|
|
201
|
+
| Flag | Description | Default |
|
|
202
|
+
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------- | ------- |
|
|
203
|
+
| `-s, --scheme` | Numbering scheme: `imgt` (`i`), `kabat` (`k`) | `imgt` |
|
|
204
|
+
| `-c, --chain` | Chain filter: `h`,`k`,`l`,`a`,`b`,`g`,`d` or groups: `ig`, `tcr`, `all`. Accepts any form (`h`, `heavy`, `igh`), case-insensitive. | `ig` |
|
|
205
|
+
| `-f, --format` | Output format: `tsv`, `json`, `jsonl` | `tsv` |
|
|
164
206
|
|
|
165
207
|
### Input
|
|
166
208
|
|
|
@@ -219,7 +261,7 @@ uv tool install go-task-bin
|
|
|
219
261
|
|
|
220
262
|
And then run `task` or `task --list-all` to get the full list of available tasks.
|
|
221
263
|
|
|
222
|
-
By default, `dev` profile will be used in all but `
|
|
264
|
+
By default, `dev` profile will be used in all but `benchmark-*` tasks, but you can change it
|
|
223
265
|
via providing `PROFILE=release` to your task.
|
|
224
266
|
|
|
225
267
|
Also, by default, `task` caches results, but you can ignore it by running `task my-task -f`.
|
|
@@ -251,12 +293,12 @@ task lint # runs linting for python and rust
|
|
|
251
293
|
|
|
252
294
|
### Benchmarking
|
|
253
295
|
|
|
254
|
-
There are multiple benchmarks in the repository. For full list, see `task | grep
|
|
296
|
+
There are multiple benchmarks in the repository. For full list, see `task | grep benchmark`:
|
|
255
297
|
|
|
256
298
|
```bash
|
|
257
299
|
$ task | grep benchmark
|
|
258
300
|
* benchmark-accuracy: Accuracy benchmark across all fixtures (1k sequences, 7 rounds each)
|
|
259
|
-
* benchmark-cli:
|
|
301
|
+
* benchmark-cli: Benchmark correctness of the CLI tool
|
|
260
302
|
* benchmark-comparison: Speed + correctness benchmark: immunum vs antpack vs anarci (1k IGH sequences)
|
|
261
303
|
* benchmark-scaling: Scaling benchmark: sizes 100..10M (10x steps), 1 round, H/imgt. Pass CLI_ARGS to filter tools, e.g. -- --tools immunum
|
|
262
304
|
* benchmark-speed: Speed benchmark across dataset sizes (100 to 1M sequences, 7 rounds, H/imgt)
|
|
@@ -264,6 +306,7 @@ $ task | grep benchmark
|
|
|
264
306
|
```
|
|
265
307
|
|
|
266
308
|
## Project structure
|
|
309
|
+
|
|
267
310
|
```
|
|
268
311
|
src/
|
|
269
312
|
├── main.rs # CLI binary (immunum number ...)
|
|
@@ -291,8 +334,8 @@ fixtures/
|
|
|
291
334
|
└── ig.tsv # Example TSV input
|
|
292
335
|
scripts/ # Python tooling for generating consensus data
|
|
293
336
|
immunum/
|
|
294
|
-
|
|
295
|
-
|
|
337
|
+
├── _internal.pyi # python stub file for pyo3
|
|
338
|
+
├── polars.py # polars extension module
|
|
296
339
|
└── python.py # python module
|
|
297
340
|
```
|
|
298
341
|
|
package/immunum.d.ts
CHANGED
|
@@ -8,7 +8,7 @@ export type Numbering = Record<string, string>;
|
|
|
8
8
|
export interface NumberingResult {
|
|
9
9
|
/** Detected chain type: `"H"`, `"K"`, `"L"`, `"A"`, `"B"`, `"G"`, or `"D"`. */
|
|
10
10
|
chain: string;
|
|
11
|
-
/** Numbering scheme used: `"imgt"` or `"kabat"
|
|
11
|
+
/** Numbering scheme used: `"imgt"` or `"kabat"`. */
|
|
12
12
|
scheme: string;
|
|
13
13
|
/** Alignment confidence score between 0 and 1. */
|
|
14
14
|
confidence: number;
|
|
@@ -31,7 +31,31 @@ export interface SegmentationResult {
|
|
|
31
31
|
postfix: string;
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
-
/**
|
|
34
|
+
/**
|
|
35
|
+
* Annotates antibody and T-cell receptor sequences with IMGT or Kabat position numbers.
|
|
36
|
+
*
|
|
37
|
+
* @param chains - Chain types to consider during auto-detection. Each entry is a
|
|
38
|
+
* case-insensitive string. Accepted values:
|
|
39
|
+
* - Antibody heavy chain: `"IGH"` / `"H"` / `"heavy"`
|
|
40
|
+
* - Antibody kappa chain: `"IGK"` / `"K"` / `"kappa"`
|
|
41
|
+
* - Antibody lambda chain: `"IGL"` / `"L"` / `"lambda"`
|
|
42
|
+
* - TCR alpha chain: `"TRA"` / `"A"` / `"alpha"`
|
|
43
|
+
* - TCR beta chain: `"TRB"` / `"B"` / `"beta"`
|
|
44
|
+
* - TCR gamma chain: `"TRG"` / `"G"` / `"gamma"`
|
|
45
|
+
* - TCR delta chain: `"TRD"` / `"D"` / `"delta"`
|
|
46
|
+
*
|
|
47
|
+
* Pass all chains you want to consider; the annotator scores each and picks the
|
|
48
|
+
* best-matching one. To consider every supported chain pass all seven values.
|
|
49
|
+
*
|
|
50
|
+
* @param scheme - Numbering scheme to use for output positions. Accepted values
|
|
51
|
+
* (case-insensitive):
|
|
52
|
+
* - `"IMGT"` / `"i"` — IMGT numbering (recommended; used internally)
|
|
53
|
+
* - `"Kabat"` / `"k"` — Kabat numbering (derived from IMGT)
|
|
54
|
+
*
|
|
55
|
+
* @param min_confidence - Optional minimum alignment confidence threshold in the
|
|
56
|
+
* range `[0, 1]`. Sequences scoring below this value are rejected with an error.
|
|
57
|
+
* Defaults to `0.5` when `null` or omitted.
|
|
58
|
+
*/
|
|
35
59
|
export class Annotator {
|
|
36
60
|
free(): void;
|
|
37
61
|
[Symbol.dispose](): void;
|
package/immunum_bg.wasm
CHANGED
|
Binary file
|