immunum 0.9.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +89 -44
- package/immunum.d.ts +26 -2
- package/immunum_bg.wasm +0 -0
- package/package.json +1 -1
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 ENPICOM
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -2,30 +2,61 @@
|
|
|
2
2
|
|
|
3
3
|
High-performance antibody and TCR sequence numbering in Rust, Python, and WebAssembly.
|
|
4
4
|
|
|
5
|
+
[](https://crates.io/crates/immunum)
|
|
6
|
+
[](https://pypi.org/project/immunum/)
|
|
7
|
+
[](https://www.npmjs.com/package/immunum)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
[](https://github.com/ENPICOM/immunum/actions/workflows/ci.yml)
|
|
10
|
+
[](https://immunum.enpicom.com)
|
|
11
|
+
|
|
5
12
|
## Overview
|
|
6
13
|
|
|
7
|
-
`immunum` is a library for numbering antibody and T-cell receptor (TCR) variable domain sequences. It uses Needleman-Wunsch semi-global alignment against position-specific scoring matrices
|
|
14
|
+
`immunum` is a library for numbering antibody and T-cell receptor (TCR) variable domain sequences. It uses Needleman-Wunsch semi-global alignment against position-specific scoring matrices built from consensus sequences, with BLOSUM62-based substitution scores.
|
|
8
15
|
|
|
9
16
|
Available as:
|
|
17
|
+
|
|
10
18
|
- **Rust crate** — core library and CLI
|
|
11
|
-
- **Python package** —
|
|
12
|
-
- **npm package** —
|
|
19
|
+
- **Python package** — with a [Polars](https://pola.rs) plugin for vectorized batch processing
|
|
20
|
+
- **npm package** — for Node.js and browsers
|
|
13
21
|
|
|
14
22
|
### Supported chains
|
|
15
23
|
|
|
16
|
-
| Antibody
|
|
17
|
-
|
|
18
|
-
| IGH (heavy)
|
|
19
|
-
| IGK (kappa)
|
|
24
|
+
| Antibody | TCR |
|
|
25
|
+
| ------------ | ----------- |
|
|
26
|
+
| IGH (heavy) | TRA (alpha) |
|
|
27
|
+
| IGK (kappa) | TRB (beta) |
|
|
20
28
|
| IGL (lambda) | TRD (delta) |
|
|
21
|
-
|
|
|
29
|
+
| | TRG (gamma) |
|
|
30
|
+
|
|
31
|
+
Chain codes: `H` (IGH), `K` (IGK), `L` (IGL), `A` (TRA), `B` (TRB), `D` (TRD), `G` (TRG).
|
|
32
|
+
|
|
33
|
+
Chain type is automatically detected by aligning against all loaded chains and selecting the best match.
|
|
22
34
|
|
|
23
35
|
### Numbering schemes
|
|
24
36
|
|
|
25
37
|
- **IMGT** — all 7 chain types
|
|
26
38
|
- **Kabat** — antibody chains (IGH, IGK, IGL)
|
|
27
39
|
|
|
28
|
-
|
|
40
|
+
## Table of Contents
|
|
41
|
+
|
|
42
|
+
- [Python](#python)
|
|
43
|
+
- [Installation](#installation)
|
|
44
|
+
- [Numbering](#numbering)
|
|
45
|
+
- [Segmentation](#segmentation)
|
|
46
|
+
- [Polars plugin](#polars-plugin)
|
|
47
|
+
- [JavaScript / npm](#javascript--npm)
|
|
48
|
+
- [Installation](#installation-1)
|
|
49
|
+
- [Usage](#usage)
|
|
50
|
+
- [Rust](#rust)
|
|
51
|
+
- [Installation](#installation-2)
|
|
52
|
+
- [Usage](#usage-1)
|
|
53
|
+
- [CLI](#cli)
|
|
54
|
+
- [Options](#options)
|
|
55
|
+
- [Input](#input)
|
|
56
|
+
- [Output](#output)
|
|
57
|
+
- [Examples](#examples)
|
|
58
|
+
- [Development](#development)
|
|
59
|
+
- [Project structure](#project-structure)
|
|
29
60
|
|
|
30
61
|
## Python
|
|
31
62
|
|
|
@@ -46,8 +77,8 @@ sequence = "QVQLVQSGAEVKRPGSSVTVSCKASGGSFSTYALSWVRQAPGRGLEWMGGVIPLLTITNYAPRFQGRI
|
|
|
46
77
|
|
|
47
78
|
result = annotator.number(sequence)
|
|
48
79
|
print(result.chain) # H
|
|
49
|
-
print(result.confidence) # 0.
|
|
50
|
-
print(result.numbering) # {"1": "
|
|
80
|
+
print(result.confidence) # 0.78
|
|
81
|
+
print(result.numbering) # {"1": "Q", "2": "V", "3": "Q", ...}
|
|
51
82
|
```
|
|
52
83
|
|
|
53
84
|
### Segmentation
|
|
@@ -55,25 +86,29 @@ print(result.numbering) # {"1": "E", "2": "V", "3": "Q", ...}
|
|
|
55
86
|
`segment` splits the sequence into FR/CDR regions:
|
|
56
87
|
|
|
57
88
|
```python
|
|
89
|
+
from immunum import Annotator
|
|
90
|
+
|
|
91
|
+
annotator = Annotator(chains=["H", "K", "L"], scheme="imgt")
|
|
92
|
+
|
|
93
|
+
sequence = "QVQLVQSGAEVKRPGSSVTVSCKASGGSFSTYALSWVRQAPGRGLEWMGGVIPLLTITNYAPRFQGRITITADRSTSTAYLELNSLRPEDTAVYYCAREGTTGKPIGAFAHWGQGTLVTVSS"
|
|
94
|
+
|
|
58
95
|
result = annotator.segment(sequence)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
96
|
+
assert result.fr1 == 'QVQLVQSGAEVKRPGSSVTVSCKAS'
|
|
97
|
+
assert result.cdr1 == 'GGSFSTYA'
|
|
98
|
+
assert result.fr2 == 'LSWVRQAPGRGLEWMGG'
|
|
99
|
+
assert result.cdr2 == 'VIPLLTIT'
|
|
100
|
+
assert result.fr3 == 'NYAPRFQGRITITADRSTSTAYLELNSLRPEDTAVYYC'
|
|
101
|
+
assert result.cdr3 == 'AREGTTGKPIGAFAH'
|
|
102
|
+
assert result.fr4 == 'WGQGTLVTVSS'
|
|
66
103
|
```
|
|
67
104
|
|
|
68
|
-
Chains: `"H"` (heavy), `"K"` (kappa), `"L"` (lambda), `"A"` (TRA), `"B"` (TRB), `"G"` (TRG), `"D"` (TRD).
|
|
69
|
-
|
|
70
105
|
### Polars plugin
|
|
71
106
|
|
|
72
107
|
For batch processing, `immunum.polars` registers elementwise Polars expressions:
|
|
73
108
|
|
|
74
109
|
```python
|
|
75
110
|
import polars as pl
|
|
76
|
-
import immunum.polars as
|
|
111
|
+
import immunum.polars as imp
|
|
77
112
|
|
|
78
113
|
df = pl.DataFrame({"sequence": [
|
|
79
114
|
"QVQLVQSGAEVKRPGSSVTVSCKASGGSFSTYALSWVRQAPGRGLEWMGGVIPLLTITNYAPRFQGRITITADRSTSTAYLELNSLRPEDTAVYYCAREGTTGKPIGAFAHWGQGTLVTVSS",
|
|
@@ -82,18 +117,18 @@ df = pl.DataFrame({"sequence": [
|
|
|
82
117
|
|
|
83
118
|
# Add a struct column with chain, scheme, confidence, numbering
|
|
84
119
|
result = df.with_columns(
|
|
85
|
-
|
|
120
|
+
imp.number(pl.col("sequence"), chains=["H", "K", "L"], scheme="imgt").alias("numbered")
|
|
86
121
|
)
|
|
87
122
|
|
|
88
123
|
# Add a struct column with FR/CDR segments
|
|
89
124
|
result = df.with_columns(
|
|
90
|
-
|
|
125
|
+
imp.segment(pl.col("sequence"), chains=["H", "K", "L"], scheme="imgt").alias("segmented")
|
|
91
126
|
)
|
|
92
127
|
```
|
|
93
128
|
|
|
94
129
|
The `number` expression returns a struct with fields `chain`, `scheme`, `confidence`, and `numbering` (a struct of position→residue). The `segment` expression returns a struct with fields `fr1`, `cdr1`, `fr2`, `cdr2`, `fr3`, `cdr3`, `fr4`, `prefix`, `postfix`.
|
|
95
130
|
|
|
96
|
-
##
|
|
131
|
+
## JavaScript / npm
|
|
97
132
|
|
|
98
133
|
### Installation
|
|
99
134
|
|
|
@@ -104,27 +139,35 @@ npm install immunum
|
|
|
104
139
|
### Usage
|
|
105
140
|
|
|
106
141
|
```js
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
await init(); // load the wasm module
|
|
142
|
+
const { Annotator } = require("immunum");
|
|
110
143
|
|
|
111
144
|
const annotator = new Annotator(["H", "K", "L"], "imgt");
|
|
112
145
|
|
|
113
|
-
const sequence =
|
|
146
|
+
const sequence =
|
|
147
|
+
"QVQLVQSGAEVKRPGSSVTVSCKASGGSFSTYALSWVRQAPGRGLEWMGGVIPLLTITNYAPRFQGRITITADRSTSTAYLELNSLRPEDTAVYYCAREGTTGKPIGAFAHWGQGTLVTVSS";
|
|
114
148
|
|
|
115
149
|
const result = annotator.number(sequence);
|
|
116
|
-
console.log(result.chain);
|
|
117
|
-
console.log(result.confidence);
|
|
118
|
-
console.log(result.numbering);
|
|
150
|
+
console.log(result.chain); // "H"
|
|
151
|
+
console.log(result.confidence); // 0.97
|
|
152
|
+
console.log(result.numbering); // { "1": "Q", "2": "V", ... }
|
|
119
153
|
|
|
120
154
|
const segments = annotator.segment(sequence);
|
|
121
|
-
console.log(segments.cdr3);
|
|
155
|
+
console.log(segments.cdr3); // "AREGTTGKPIGAFAH"
|
|
122
156
|
|
|
123
157
|
annotator.free(); // or use `using annotator = new Annotator(...)` with explicit resource management
|
|
124
158
|
```
|
|
125
159
|
|
|
126
160
|
## Rust
|
|
127
161
|
|
|
162
|
+
### Installation
|
|
163
|
+
|
|
164
|
+
Add to `Cargo.toml`:
|
|
165
|
+
|
|
166
|
+
```toml
|
|
167
|
+
[dependencies]
|
|
168
|
+
immunum = "0.9"
|
|
169
|
+
```
|
|
170
|
+
|
|
128
171
|
### Usage
|
|
129
172
|
|
|
130
173
|
```rust
|
|
@@ -139,13 +182,14 @@ let annotator = Annotator::new(
|
|
|
139
182
|
let sequence = "QVQLVQSGAEVKRPGSSVTVSCKASGGSFSTYALSWVRQAPGRGLEWMGGVIPLLTITNYAPRFQGRITITADRSTSTAYLELNSLRPEDTAVYYCAREGTTGKPIGAFAHWGQGTLVTVSS";
|
|
140
183
|
|
|
141
184
|
let result = annotator.number(sequence).unwrap();
|
|
142
|
-
|
|
143
185
|
println!("Chain: {}", result.chain); // IGH
|
|
144
186
|
println!("Confidence: {:.2}", result.confidence);
|
|
145
|
-
|
|
146
187
|
for (aa, pos) in sequence.chars().zip(result.positions.iter()) {
|
|
147
188
|
println!("{} -> {}", aa, pos);
|
|
148
189
|
}
|
|
190
|
+
|
|
191
|
+
let segments = annotator.segment(sequence).unwrap();
|
|
192
|
+
println!("CDR3: {}", segments.cdr3);
|
|
149
193
|
```
|
|
150
194
|
|
|
151
195
|
## CLI
|
|
@@ -156,11 +200,11 @@ immunum number [OPTIONS] [INPUT] [OUTPUT]
|
|
|
156
200
|
|
|
157
201
|
### Options
|
|
158
202
|
|
|
159
|
-
| Flag
|
|
160
|
-
|
|
161
|
-
| `-s, --scheme` | Numbering scheme: `imgt` (`i`), `kabat` (`k`)
|
|
162
|
-
| `-c, --chain`
|
|
163
|
-
| `-f, --format` | Output format: `tsv`, `json`, `jsonl`
|
|
203
|
+
| Flag | Description | Default |
|
|
204
|
+
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------- | ------- |
|
|
205
|
+
| `-s, --scheme` | Numbering scheme: `imgt` (`i`), `kabat` (`k`) | `imgt` |
|
|
206
|
+
| `-c, --chain` | Chain filter: `h`,`k`,`l`,`a`,`b`,`g`,`d` or groups: `ig`, `tcr`, `all`. Accepts any form (`h`, `heavy`, `igh`), case-insensitive. | `ig` |
|
|
207
|
+
| `-f, --format` | Output format: `tsv`, `json`, `jsonl` | `tsv` |
|
|
164
208
|
|
|
165
209
|
### Input
|
|
166
210
|
|
|
@@ -219,7 +263,7 @@ uv tool install go-task-bin
|
|
|
219
263
|
|
|
220
264
|
And then run `task` or `task --list-all` to get the full list of available tasks.
|
|
221
265
|
|
|
222
|
-
By default, `dev` profile will be used in all but `
|
|
266
|
+
By default, `dev` profile will be used in all but `benchmark-*` tasks, but you can change it
|
|
223
267
|
via providing `PROFILE=release` to your task.
|
|
224
268
|
|
|
225
269
|
Also, by default, `task` caches results, but you can ignore it by running `task my-task -f`.
|
|
@@ -251,12 +295,12 @@ task lint # runs linting for python and rust
|
|
|
251
295
|
|
|
252
296
|
### Benchmarking
|
|
253
297
|
|
|
254
|
-
There are multiple benchmarks in the repository. For full list, see `task | grep
|
|
298
|
+
There are multiple benchmarks in the repository. For full list, see `task | grep benchmark`:
|
|
255
299
|
|
|
256
300
|
```bash
|
|
257
301
|
$ task | grep benchmark
|
|
258
302
|
* benchmark-accuracy: Accuracy benchmark across all fixtures (1k sequences, 7 rounds each)
|
|
259
|
-
* benchmark-cli:
|
|
303
|
+
* benchmark-cli: Benchmark correctness of the CLI tool
|
|
260
304
|
* benchmark-comparison: Speed + correctness benchmark: immunum vs antpack vs anarci (1k IGH sequences)
|
|
261
305
|
* benchmark-scaling: Scaling benchmark: sizes 100..10M (10x steps), 1 round, H/imgt. Pass CLI_ARGS to filter tools, e.g. -- --tools immunum
|
|
262
306
|
* benchmark-speed: Speed benchmark across dataset sizes (100 to 1M sequences, 7 rounds, H/imgt)
|
|
@@ -264,6 +308,7 @@ $ task | grep benchmark
|
|
|
264
308
|
```
|
|
265
309
|
|
|
266
310
|
## Project structure
|
|
311
|
+
|
|
267
312
|
```
|
|
268
313
|
src/
|
|
269
314
|
├── main.rs # CLI binary (immunum number ...)
|
|
@@ -291,8 +336,8 @@ fixtures/
|
|
|
291
336
|
└── ig.tsv # Example TSV input
|
|
292
337
|
scripts/ # Python tooling for generating consensus data
|
|
293
338
|
immunum/
|
|
294
|
-
|
|
295
|
-
|
|
339
|
+
├── _internal.pyi # python stub file for pyo3
|
|
340
|
+
├── polars.py # polars extension module
|
|
296
341
|
└── python.py # python module
|
|
297
342
|
```
|
|
298
343
|
|
package/immunum.d.ts
CHANGED
|
@@ -8,7 +8,7 @@ export type Numbering = Record<string, string>;
|
|
|
8
8
|
export interface NumberingResult {
|
|
9
9
|
/** Detected chain type: `"H"`, `"K"`, `"L"`, `"A"`, `"B"`, `"G"`, or `"D"`. */
|
|
10
10
|
chain: string;
|
|
11
|
-
/** Numbering scheme used: `"imgt"` or `"kabat"
|
|
11
|
+
/** Numbering scheme used: `"imgt"` or `"kabat"`. */
|
|
12
12
|
scheme: string;
|
|
13
13
|
/** Alignment confidence score between 0 and 1. */
|
|
14
14
|
confidence: number;
|
|
@@ -31,7 +31,31 @@ export interface SegmentationResult {
|
|
|
31
31
|
postfix: string;
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
-
/**
|
|
34
|
+
/**
|
|
35
|
+
* Annotates antibody and T-cell receptor sequences with IMGT or Kabat position numbers.
|
|
36
|
+
*
|
|
37
|
+
* @param chains - Chain types to consider during auto-detection. Each entry is a
|
|
38
|
+
* case-insensitive string. Accepted values:
|
|
39
|
+
* - Antibody heavy chain: `"IGH"` / `"H"` / `"heavy"`
|
|
40
|
+
* - Antibody kappa chain: `"IGK"` / `"K"` / `"kappa"`
|
|
41
|
+
* - Antibody lambda chain: `"IGL"` / `"L"` / `"lambda"`
|
|
42
|
+
* - TCR alpha chain: `"TRA"` / `"A"` / `"alpha"`
|
|
43
|
+
* - TCR beta chain: `"TRB"` / `"B"` / `"beta"`
|
|
44
|
+
* - TCR gamma chain: `"TRG"` / `"G"` / `"gamma"`
|
|
45
|
+
* - TCR delta chain: `"TRD"` / `"D"` / `"delta"`
|
|
46
|
+
*
|
|
47
|
+
* Pass all chains you want to consider; the annotator scores each and picks the
|
|
48
|
+
* best-matching one. To consider every supported chain pass all seven values.
|
|
49
|
+
*
|
|
50
|
+
* @param scheme - Numbering scheme to use for output positions. Accepted values
|
|
51
|
+
* (case-insensitive):
|
|
52
|
+
* - `"IMGT"` / `"i"` — IMGT numbering (recommended; used internally)
|
|
53
|
+
* - `"Kabat"` / `"k"` — Kabat numbering (derived from IMGT)
|
|
54
|
+
*
|
|
55
|
+
* @param min_confidence - Optional minimum alignment confidence threshold in the
|
|
56
|
+
* range `[0, 1]`. Sequences scoring below this value are rejected with an error.
|
|
57
|
+
* Defaults to `0.5` when `null` or omitted.
|
|
58
|
+
*/
|
|
35
59
|
export class Annotator {
|
|
36
60
|
free(): void;
|
|
37
61
|
[Symbol.dispose](): void;
|
package/immunum_bg.wasm
CHANGED
|
Binary file
|