@datagrok/bio 2.27.1 → 2.27.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +50 -0
- package/dist/682.js +1 -1
- package/dist/682.js.map +1 -1
- package/dist/705.js +1 -1
- package/dist/705.js.map +1 -1
- package/dist/909.js +2 -0
- package/dist/909.js.map +1 -0
- package/dist/immunum_bg.wasm +0 -0
- package/dist/package-test.js +3 -3
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/package.json +5 -3
- package/src/package-api.ts +15 -1
- package/src/package-test.ts +1 -0
- package/src/package.g.ts +19 -1
- package/src/package.ts +32 -4
- package/src/tests/antibody-numbering-tests.ts +190 -0
- package/src/utils/annotations/numbering-ui.ts +34 -90
- package/src/utils/antibody-numbering/immunum-client.ts +45 -0
- package/src/utils/antibody-numbering/immunum-glue.js +275 -0
- package/src/utils/antibody-numbering/immunum.worker.ts +159 -0
- package/src/utils/antibody-numbering/number-antibody.ts +105 -0
- package/src/utils/antibody-numbering/types.ts +48 -0
- package/src/utils/compare-sequences.ts +104 -0
- package/test-console-output-1.log +568 -535
- package/test-record-1.mp4 +0 -0
- package/webpack.config.js +13 -0
- package/dist/282.js +0 -2
- package/dist/282.js.map +0 -1
- package/dist/287.js +0 -2
- package/dist/287.js.map +0 -1
- package/dist/422.js +0 -2
- package/dist/422.js.map +0 -1
- package/dist/767.js +0 -2
- package/dist/767.js.map +0 -1
- package/src/utils/antibody-numbering (WIP)/alignment.ts +0 -578
- package/src/utils/antibody-numbering (WIP)/annotator.ts +0 -120
- package/src/utils/antibody-numbering (WIP)/data/blosum62.ts +0 -55
- package/src/utils/antibody-numbering (WIP)/data/consensus-aho.ts +0 -155
- package/src/utils/antibody-numbering (WIP)/data/consensus-imgt.ts +0 -162
- package/src/utils/antibody-numbering (WIP)/data/consensus-kabat.ts +0 -157
- package/src/utils/antibody-numbering (WIP)/data/consensus-martin.ts +0 -152
- package/src/utils/antibody-numbering (WIP)/data/consensus.ts +0 -36
- package/src/utils/antibody-numbering (WIP)/data/regions.ts +0 -63
- package/src/utils/antibody-numbering (WIP)/index.ts +0 -31
- package/src/utils/antibody-numbering (WIP)/testdata.ts +0 -5356
- package/src/utils/antibody-numbering (WIP)/types.ts +0 -69
- /package/dist/{8473fcbfb6e85ca6c852.wasm → wasmCluster.wasm} +0 -0
- /package/dist/{9a8fbf37666e32487835.wasm → wasmDbscan.wasm} +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.27.
|
|
8
|
+
"version": "2.27.3",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.63.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.63.7",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.9",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.6",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.10.11",
|
|
@@ -59,6 +59,8 @@
|
|
|
59
59
|
"datagrok-api": "^1.26.3",
|
|
60
60
|
"dayjs": "^1.11.4",
|
|
61
61
|
"fastest-levenshtein": "^1.0.16",
|
|
62
|
+
"file-loader": "^6.2.0",
|
|
63
|
+
"immunum": "^1.1.0",
|
|
62
64
|
"openchemlib": "^7.2.3",
|
|
63
65
|
"rxjs": "^6.5.5",
|
|
64
66
|
"style-loader": "^3.3.1",
|
|
@@ -69,7 +71,7 @@
|
|
|
69
71
|
"@datagrok-libraries/helm-web-editor": "^1.1.16",
|
|
70
72
|
"@datagrok-libraries/js-draw-lite": "^0.0.10",
|
|
71
73
|
"@datagrok/chem": "^1.17.1",
|
|
72
|
-
"@datagrok/dendrogram": "^1.
|
|
74
|
+
"@datagrok/dendrogram": "^1.4.11",
|
|
73
75
|
"@datagrok/eda": "^1.4.13",
|
|
74
76
|
"@datagrok/helm": "^2.13.1",
|
|
75
77
|
"@datagrok/peptides": "^1.27.4",
|
package/src/package-api.ts
CHANGED
|
@@ -162,7 +162,7 @@ export namespace funcs {
|
|
|
162
162
|
}
|
|
163
163
|
|
|
164
164
|
/**
|
|
165
|
-
Assigns antibody numbering (IMGT/Kabat/Chothia/AHo)
|
|
165
|
+
Assigns antibody numbering (IMGT/Kabat/Chothia/AHo)
|
|
166
166
|
*/
|
|
167
167
|
export async function applyNumberingScheme(): Promise<void> {
|
|
168
168
|
return await grok.functions.call('Bio:ApplyNumberingScheme', {});
|
|
@@ -284,6 +284,20 @@ export namespace funcs {
|
|
|
284
284
|
return await grok.functions.call('Bio:PepseaMsa', { sequenceCol, method, gapOpen, gapExtend });
|
|
285
285
|
}
|
|
286
286
|
|
|
287
|
+
/**
|
|
288
|
+
Assigns antibody numbering (IMGT/Kabat) using the immunum WASM library
|
|
289
|
+
*/
|
|
290
|
+
export async function immunumAntibodyNumbering(df: DG.DataFrame , seqCol: DG.Column , scheme: string ): Promise<DG.DataFrame> {
|
|
291
|
+
return await grok.functions.call('Bio:ImmunumAntibodyNumbering', { df, seqCol, scheme });
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
Builds a MacromoleculeDifference column from two sequence columns (seq1#seq2)
|
|
296
|
+
*/
|
|
297
|
+
export async function compareSequences(): Promise<void> {
|
|
298
|
+
return await grok.functions.call('Bio:CompareSequences', {});
|
|
299
|
+
}
|
|
300
|
+
|
|
287
301
|
/**
|
|
288
302
|
Visualizes sequence composition on a WebLogo plot
|
|
289
303
|
*/
|
package/src/package-test.ts
CHANGED
package/src/package.g.ts
CHANGED
|
@@ -234,7 +234,7 @@ export async function getRegionTopMenu(table: DG.DataFrame, sequence: DG.Column,
|
|
|
234
234
|
}
|
|
235
235
|
|
|
236
236
|
//name: Apply Numbering Scheme
|
|
237
|
-
//description: Assigns antibody numbering (IMGT/Kabat/Chothia/AHo)
|
|
237
|
+
//description: Assigns antibody numbering (IMGT/Kabat/Chothia/AHo)
|
|
238
238
|
//top-menu: Bio | Annotate | Apply Numbering Scheme...
|
|
239
239
|
export function applyNumberingScheme() : void {
|
|
240
240
|
PackageFunctions.applyNumberingScheme();
|
|
@@ -456,6 +456,24 @@ export async function pepseaMsa(sequenceCol: DG.Column<any>, method: string, gap
|
|
|
456
456
|
return await PackageFunctions.pepseaMsa(sequenceCol, method, gapOpen, gapExtend);
|
|
457
457
|
}
|
|
458
458
|
|
|
459
|
+
//name: Immunum
|
|
460
|
+
//description: Assigns antibody numbering (IMGT/Kabat) using the immunum WASM library
|
|
461
|
+
//input: dataframe df
|
|
462
|
+
//input: column seqCol { semType: Macromolecule }
|
|
463
|
+
//input: string scheme = 'imgt' { choices: ["imgt","kabat"] }
|
|
464
|
+
//output: dataframe result
|
|
465
|
+
//meta.role: antibodyNumbering
|
|
466
|
+
export async function immunumAntibodyNumbering(df: DG.DataFrame, seqCol: DG.Column<any>, scheme: string) : Promise<any> {
|
|
467
|
+
return await PackageFunctions.immunumAntibodyNumbering(df, seqCol, scheme);
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
//name: Compare Sequences
|
|
471
|
+
//description: Builds a MacromoleculeDifference column from two sequence columns (seq1#seq2)
|
|
472
|
+
//top-menu: Bio | Analyze | Compare sequences...
|
|
473
|
+
export function compareSequences() : void {
|
|
474
|
+
PackageFunctions.compareSequences();
|
|
475
|
+
}
|
|
476
|
+
|
|
459
477
|
//name: Composition Analysis
|
|
460
478
|
//description: Visualizes sequence composition on a WebLogo plot
|
|
461
479
|
//output: viewer result
|
package/src/package.ts
CHANGED
|
@@ -44,6 +44,7 @@ import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
|
44
44
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
45
45
|
import {invalidateMols, MONOMERIC_COL_TAGS, SubstructureSearchDialog} from './substructure-search/substructure-search';
|
|
46
46
|
import {convert} from './utils/convert';
|
|
47
|
+
import {compareSequencesUI} from './utils/compare-sequences';
|
|
47
48
|
import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
|
|
48
49
|
import {getMonomerInfoWidget} from './widgets/monomer-info-widget';
|
|
49
50
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
@@ -85,6 +86,10 @@ import {BilnNotationProvider} from './utils/biln';
|
|
|
85
86
|
import {showMonomerCollectionsView} from './utils/monomer-lib/monomer-collections-view';
|
|
86
87
|
import {ISequenceColumnInput} from '@datagrok-libraries/bio/src/utils/sequence-column-input';
|
|
87
88
|
import {SequenceColumnInput} from './utils/sequence-column-input';
|
|
89
|
+
import {showNumberingSchemeDialog} from './utils/annotations/numbering-ui';
|
|
90
|
+
import {showLiabilityScannerDialog} from './utils/annotations/liability-scanner-ui';
|
|
91
|
+
import {showAnnotationManagerDialog} from './utils/annotations/annotation-manager-ui';
|
|
92
|
+
import {numberAntibodyColumn} from './utils/antibody-numbering/number-antibody';
|
|
88
93
|
|
|
89
94
|
import * as api from './package-api';
|
|
90
95
|
export const _package = new BioPackage(/*{debug: true}/**/);
|
|
@@ -478,11 +483,11 @@ export class PackageFunctions {
|
|
|
478
483
|
|
|
479
484
|
@grok.decorators.func({
|
|
480
485
|
name: 'Apply Numbering Scheme',
|
|
481
|
-
description: 'Assigns antibody numbering (IMGT/Kabat/Chothia/AHo)
|
|
486
|
+
description: 'Assigns antibody numbering (IMGT/Kabat/Chothia/AHo)',
|
|
482
487
|
'top-menu': 'Bio | Annotate | Apply Numbering Scheme...',
|
|
483
488
|
})
|
|
484
489
|
static applyNumberingScheme(): void {
|
|
485
|
-
|
|
490
|
+
showNumberingSchemeDialog();
|
|
486
491
|
}
|
|
487
492
|
|
|
488
493
|
@grok.decorators.func({
|
|
@@ -491,7 +496,7 @@ export class PackageFunctions {
|
|
|
491
496
|
'top-menu': 'Bio | Annotate | Scan Liabilities...',
|
|
492
497
|
})
|
|
493
498
|
static scanLiabilities(): void {
|
|
494
|
-
|
|
499
|
+
showLiabilityScannerDialog();
|
|
495
500
|
}
|
|
496
501
|
|
|
497
502
|
@grok.decorators.func({
|
|
@@ -500,7 +505,7 @@ export class PackageFunctions {
|
|
|
500
505
|
'top-menu': 'Bio | Annotate | Manage Annotations...',
|
|
501
506
|
})
|
|
502
507
|
static manageAnnotations(): void {
|
|
503
|
-
|
|
508
|
+
showAnnotationManagerDialog();
|
|
504
509
|
}
|
|
505
510
|
|
|
506
511
|
@grok.decorators.func({
|
|
@@ -992,6 +997,29 @@ export class PackageFunctions {
|
|
|
992
997
|
return alignWithPepsea(sequenceCol, method, gapOpen, gapExtend);
|
|
993
998
|
}
|
|
994
999
|
|
|
1000
|
+
@grok.decorators.func({
|
|
1001
|
+
name: 'Immunum',
|
|
1002
|
+
description: 'Assigns antibody numbering (IMGT/Kabat) using the immunum WASM library',
|
|
1003
|
+
meta: {role: 'antibodyNumbering'},
|
|
1004
|
+
})
|
|
1005
|
+
static async immunumAntibodyNumbering(
|
|
1006
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
1007
|
+
@grok.decorators.param({type: 'dataframe'}) df: DG.DataFrame,
|
|
1008
|
+
@grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) seqCol: DG.Column<string>,
|
|
1009
|
+
@grok.decorators.param({type: 'string', options: {choices: ['imgt', 'kabat'], initialValue: 'imgt'}}) scheme: string,
|
|
1010
|
+
): Promise<DG.DataFrame> {
|
|
1011
|
+
return numberAntibodyColumn(seqCol, scheme);
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
@grok.decorators.func({
|
|
1015
|
+
name: 'Compare Sequences',
|
|
1016
|
+
description: 'Builds a MacromoleculeDifference column from two sequence columns (seq1#seq2)',
|
|
1017
|
+
'top-menu': 'Bio | Analyze | Compare sequences...',
|
|
1018
|
+
})
|
|
1019
|
+
static compareSequences(): void {
|
|
1020
|
+
compareSequencesUI();
|
|
1021
|
+
}
|
|
1022
|
+
|
|
995
1023
|
@grok.decorators.func({
|
|
996
1024
|
name: 'Composition Analysis',
|
|
997
1025
|
description: 'Visualizes sequence composition on a WebLogo plot',
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
|
|
4
|
+
import {category, expect, test} from '@datagrok-libraries/test/src/test';
|
|
5
|
+
|
|
6
|
+
import {_package} from '../package';
|
|
7
|
+
import {numberSequencesWithImmunum} from '../utils/antibody-numbering/immunum-client';
|
|
8
|
+
import {numberAntibodyColumn} from '../utils/antibody-numbering/number-antibody';
|
|
9
|
+
|
|
10
|
+
/** Canonical test sequences picked from samples/antibodies.csv.
|
|
11
|
+
* - heavyChain1/2 are IGH variable regions starting with the classic EVQL/QVQL motifs
|
|
12
|
+
* - lightChain1/2 are IGK/IGL variable regions (DIQM/DIVM/DIVL...)
|
|
13
|
+
* These are stable inputs for immunum so unit tests can assert exact chain type
|
|
14
|
+
* and region coverage without fetching the CSV from the server. */
|
|
15
|
+
const HEAVY_1 = 'QVQLVQSGAEVKKPGASVKVSCKASGYTFTGYYMHWVRQAPGQGLEWMGWINPNSGGTNYAQKFQGRVTMTRDTSISTAYMELSRLRSDDTAVYYCARVAPGALDYWGQGTLVTVSS';
|
|
16
|
+
const HEAVY_2 = 'EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCAKDHYSGSGSYYYYFDYWGQGTLVTVSS';
|
|
17
|
+
const LIGHT_KAPPA = 'DIQMTQSPSSLSASVGDRVTITCRASQDVSTAVAWYQQKPGKAPKLLIYSASFLYSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPRTFGQGTKVEIK';
|
|
18
|
+
const LIGHT_LAMBDA = 'QSALTQPASVSGSPGQSITISCTGTSSDVGGYNYVSWYQQHPGKAPKLMIYDVSNRPSGVSNRFSGSKSGNTASLTISGLQAEDEADYYCSSYTSSSTLVFGGGTKLTVL';
|
|
19
|
+
|
|
20
|
+
/** FR/CDR counts we expect in the immunum annotation JSON for IMGT/Kabat.
|
|
21
|
+
* The engine only accepts IMGT and Kabat — those are the choices declared in
|
|
22
|
+
* package.ts and surfaced in the dialog's scheme dropdown. */
|
|
23
|
+
const EXPECTED_REGION_COUNT = 7; // FR1, CDR1, FR2, CDR2, FR3, CDR3, FR4
|
|
24
|
+
const EXPECTED_REGION_NAMES = ['FR1', 'CDR1', 'FR2', 'CDR2', 'FR3', 'CDR3', 'FR4'];
|
|
25
|
+
|
|
26
|
+
/** Sanity range for alignment confidence on canonical antibody sequences. */
|
|
27
|
+
const MIN_CONFIDENCE = 0.5;
|
|
28
|
+
|
|
29
|
+
category('antibody numbering (immunum)', () => {
|
|
30
|
+
// Each numberSequencesWithImmunum call spawns a fresh worker and terminates
|
|
31
|
+
// it before returning — no shared setup / teardown needed.
|
|
32
|
+
|
|
33
|
+
test('worker: heavy chain (IMGT)', async () => {
|
|
34
|
+
const [row] = await numberSequencesWithImmunum([HEAVY_1], 'imgt');
|
|
35
|
+
expect(row.chainType, 'Heavy');
|
|
36
|
+
expect(row.chainCode, 'H');
|
|
37
|
+
expect(row.confidence >= MIN_CONFIDENCE, true);
|
|
38
|
+
expect(row.positionNames.length > 0, true);
|
|
39
|
+
expect(row.numberingDetail.length > 0, true);
|
|
40
|
+
// numbering_map indices must fall inside the input sequence
|
|
41
|
+
for (const idx of Object.values(row.numberingMap))
|
|
42
|
+
expect(idx >= 0 && idx < HEAVY_1.length, true);
|
|
43
|
+
expect(row.numberingDetail.length, Object.keys(row.numberingMap).length);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test('worker: light kappa chain (IMGT)', async () => {
|
|
47
|
+
const [row] = await numberSequencesWithImmunum([LIGHT_KAPPA], 'imgt');
|
|
48
|
+
expect(row.chainType, 'Light');
|
|
49
|
+
expect(row.chainCode === 'K' || row.chainCode === 'L', true);
|
|
50
|
+
expect(row.confidence >= MIN_CONFIDENCE, true);
|
|
51
|
+
expect(row.numberingDetail.length > 0, true);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test('worker: light lambda chain (IMGT)', async () => {
|
|
55
|
+
const [row] = await numberSequencesWithImmunum([LIGHT_LAMBDA], 'imgt');
|
|
56
|
+
expect(row.chainType, 'Light');
|
|
57
|
+
expect(row.confidence >= MIN_CONFIDENCE, true);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
test('worker: batch numbering', async () => {
|
|
61
|
+
const rows = await numberSequencesWithImmunum(
|
|
62
|
+
[HEAVY_1, LIGHT_KAPPA, HEAVY_2, LIGHT_LAMBDA], 'imgt');
|
|
63
|
+
expect(rows.length, 4);
|
|
64
|
+
expect(rows[0].chainType, 'Heavy');
|
|
65
|
+
expect(rows[1].chainType, 'Light');
|
|
66
|
+
expect(rows[2].chainType, 'Heavy');
|
|
67
|
+
expect(rows[3].chainType, 'Light');
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test('worker: empty / short sequences fail gracefully', async () => {
|
|
71
|
+
const rows = await numberSequencesWithImmunum(['', 'AAAA', ' '], 'imgt');
|
|
72
|
+
expect(rows.length, 3);
|
|
73
|
+
for (const r of rows) {
|
|
74
|
+
expect(r.positionNames, '');
|
|
75
|
+
expect(r.numberingDetail.length, 0);
|
|
76
|
+
expect(r.error.length > 0, true);
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
test('worker: kabat scheme returns kabat-style position codes', async () => {
|
|
81
|
+
const [imgt] = await numberSequencesWithImmunum([HEAVY_1], 'imgt');
|
|
82
|
+
const [kabat] = await numberSequencesWithImmunum([HEAVY_1], 'kabat');
|
|
83
|
+
expect(imgt.chainType, 'Heavy');
|
|
84
|
+
expect(kabat.chainType, 'Heavy');
|
|
85
|
+
// Kabat numbering keys should not match IMGT one-for-one — the schemes
|
|
86
|
+
// number the same residues differently. A weak but robust check: the set
|
|
87
|
+
// of keys differs.
|
|
88
|
+
const imgtKeys = new Set(Object.keys(imgt.numberingMap));
|
|
89
|
+
const kabatKeys = new Set(Object.keys(kabat.numberingMap));
|
|
90
|
+
let differ = false;
|
|
91
|
+
for (const k of kabatKeys) if (!imgtKeys.has(k)) {differ = true; break;}
|
|
92
|
+
expect(differ, true);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test('numberAntibodyColumn: DataFrame shape matches antpack script', async () => {
|
|
96
|
+
const col = DG.Column.fromStrings('seq', [HEAVY_1, LIGHT_KAPPA, '']);
|
|
97
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
98
|
+
const result = await numberAntibodyColumn(col, 'imgt');
|
|
99
|
+
|
|
100
|
+
// Required columns, same names as the Python script
|
|
101
|
+
for (const name of ['position_names', 'chain_type', 'annotations_json',
|
|
102
|
+
'numbering_detail', 'numbering_map']) {
|
|
103
|
+
expect(result.col(name) !== null, true);
|
|
104
|
+
}
|
|
105
|
+
expect(result.rowCount, 3);
|
|
106
|
+
|
|
107
|
+
// Row 0 — heavy chain: all 5 fields populated
|
|
108
|
+
expect(result.get('position_names', 0).length > 0, true);
|
|
109
|
+
expect(result.get('chain_type', 0), 'Heavy');
|
|
110
|
+
const annot0 = JSON.parse(result.get('annotations_json', 0));
|
|
111
|
+
expect(annot0.length, EXPECTED_REGION_COUNT);
|
|
112
|
+
expect(annot0.map((a: any) => a.name).join(','), EXPECTED_REGION_NAMES.join(','));
|
|
113
|
+
for (const a of annot0) {
|
|
114
|
+
expect(a.visualType, 'region');
|
|
115
|
+
expect(a.category, 'structure');
|
|
116
|
+
expect(a.sourceScheme, 'IMGT');
|
|
117
|
+
expect(a.autoGenerated, true);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Row 1 — light chain: region JSON has same structure
|
|
121
|
+
expect(result.get('chain_type', 1), 'Light');
|
|
122
|
+
const annot1 = JSON.parse(result.get('annotations_json', 1));
|
|
123
|
+
expect(annot1.length, EXPECTED_REGION_COUNT);
|
|
124
|
+
|
|
125
|
+
// Row 2 — empty input: all fields blank / '[]'
|
|
126
|
+
expect(result.get('position_names', 2), '');
|
|
127
|
+
expect(result.get('chain_type', 2), '');
|
|
128
|
+
expect(result.get('annotations_json', 2), '[]');
|
|
129
|
+
expect(result.get('numbering_detail', 2), '');
|
|
130
|
+
expect(result.get('numbering_map', 2), '');
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
test('numberAntibodyColumn: numbering_map indices line up with sequence', async () => {
|
|
134
|
+
const col = DG.Column.fromStrings('seq', [HEAVY_1]);
|
|
135
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
136
|
+
const result = await numberAntibodyColumn(col, 'imgt');
|
|
137
|
+
|
|
138
|
+
const detail = JSON.parse(result.get('numbering_detail', 0));
|
|
139
|
+
const map = JSON.parse(result.get('numbering_map', 0));
|
|
140
|
+
|
|
141
|
+
// For each numbered position: sequence[charIdx] must equal the recorded aa
|
|
142
|
+
for (const entry of detail) {
|
|
143
|
+
const idx = map[entry.position];
|
|
144
|
+
expect(typeof idx === 'number', true);
|
|
145
|
+
expect(HEAVY_1[idx], entry.aa);
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
test('numberAntibodyColumn: annotations_json start/end resolve via numbering_map', async () => {
|
|
150
|
+
const col = DG.Column.fromStrings('seq', [HEAVY_1]);
|
|
151
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
152
|
+
const result = await numberAntibodyColumn(col, 'imgt');
|
|
153
|
+
|
|
154
|
+
const annotations = JSON.parse(result.get('annotations_json', 0));
|
|
155
|
+
const map = JSON.parse(result.get('numbering_map', 0));
|
|
156
|
+
// FR1 start (position "1") must be present and resolve to a valid char index.
|
|
157
|
+
const fr1 = annotations.find((a: any) => a.name === 'FR1');
|
|
158
|
+
expect(fr1 !== undefined, true);
|
|
159
|
+
const startIdx = map[fr1.start];
|
|
160
|
+
expect(typeof startIdx === 'number', true);
|
|
161
|
+
expect(startIdx >= 0 && startIdx < HEAVY_1.length, true);
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
test('numberAntibodyColumn: loads antibodies.csv sample subset', async () => {
|
|
165
|
+
let df: DG.DataFrame;
|
|
166
|
+
try {
|
|
167
|
+
df = await _package.files.readCsv('samples/antibodies.csv');
|
|
168
|
+
} catch (err) {
|
|
169
|
+
// Sample may not be deployed on every server; skip instead of failing.
|
|
170
|
+
console.warn('antibodies.csv not available — skipping', err);
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
const hcCol = df.col('AntibodyHC') ?? df.col('HeavyChain') ?? df.columns.byName('AntibodyHC');
|
|
174
|
+
if (!hcCol) return;
|
|
175
|
+
|
|
176
|
+
// Subset to the first 10 rows so the test finishes in seconds.
|
|
177
|
+
const subset = DG.Column.fromStrings('seq',
|
|
178
|
+
Array.from({length: Math.min(10, hcCol.length)}, (_, i) => hcCol.get(i) ?? ''));
|
|
179
|
+
subset.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
180
|
+
|
|
181
|
+
const result = await numberAntibodyColumn(subset, 'imgt');
|
|
182
|
+
expect(result.rowCount, subset.length);
|
|
183
|
+
|
|
184
|
+
let heavyCount = 0;
|
|
185
|
+
for (let i = 0; i < result.rowCount; i++)
|
|
186
|
+
if (result.get('chain_type', i) === 'Heavy') heavyCount++;
|
|
187
|
+
// Expect the majority of the HC column to be classified as heavy.
|
|
188
|
+
expect(heavyCount >= Math.ceil(subset.length * 0.6), true);
|
|
189
|
+
});
|
|
190
|
+
});
|
|
@@ -7,99 +7,48 @@ import {TAGS as bioTAGS, ALIGNMENT, ALPHABET, NOTATION} from '@datagrok-librarie
|
|
|
7
7
|
import {
|
|
8
8
|
SeqAnnotation, SeqAnnotationHit, AnnotationCategory,
|
|
9
9
|
} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
|
|
10
|
-
import {NumberingScheme} from '@datagrok-libraries/bio/src/utils/macromolecule/numbering-schemes';
|
|
11
10
|
import {
|
|
12
11
|
setColumnAnnotations, getColumnAnnotations,
|
|
13
12
|
getOrCreateAnnotationColumn, getRowAnnotations, setRowAnnotations, mergeRowHits,
|
|
14
13
|
} from './annotation-manager';
|
|
15
|
-
|
|
16
|
-
import type {NumberingResult, Scheme} from '../antibody-numbering (WIP)';
|
|
17
|
-
import {VdRegionsViewer} from '../../viewers/vd-regions-viewer';
|
|
18
|
-
import { VdRegion, VdRegionType } from '@datagrok-libraries/bio/src/viewers/vd-regions';
|
|
19
|
-
|
|
20
|
-
const BUILTIN_ENGINE_KEY = '__builtin__';
|
|
21
|
-
const BUILTIN_ENGINE_LABEL = 'Built-in (TypeScript)';
|
|
22
|
-
|
|
23
|
-
/** An engine entry: either a dynamically discovered DG.Func or the built-in TS engine. */
|
|
14
|
+
/** An engine entry — a dynamically discovered DG.Func with meta.role: 'antibodyNumbering'. */
|
|
24
15
|
interface NumberingEngine {
|
|
25
|
-
/** Display label for the dropdown */
|
|
26
16
|
label: string;
|
|
27
|
-
/** Unique key —
|
|
17
|
+
/** Unique key — `${package}:${name}`. */
|
|
28
18
|
key: string;
|
|
29
|
-
|
|
30
|
-
func: DG.Func | null;
|
|
19
|
+
func: DG.Func;
|
|
31
20
|
}
|
|
32
21
|
|
|
33
|
-
/** Discovers all registered antibody numbering engines
|
|
34
|
-
*
|
|
22
|
+
/** Discovers all registered antibody numbering engines (functions with
|
|
23
|
+
* meta.role = 'antibodyNumbering'). Built-in engines: Bio package ships an
|
|
24
|
+
* immunum-WASM-based engine; other packages (Proteomics etc.) can register
|
|
25
|
+
* AntPack/ANARCI/etc. by adding their own meta.role function. */
|
|
35
26
|
function discoverEngines(): NumberingEngine[] {
|
|
36
27
|
const engines: NumberingEngine[] = [];
|
|
37
|
-
|
|
38
28
|
const funcs = DG.Func.find({meta: {role: 'antibodyNumbering'}});
|
|
39
29
|
if (funcs.length === 0) {
|
|
40
|
-
grok.shell.error('No antibody numbering engines found. Make sure
|
|
41
|
-
throw new Error('No
|
|
30
|
+
grok.shell.error('No antibody numbering engines found. Make sure the Bio package is up to date.');
|
|
31
|
+
throw new Error('No antibody numbering engines found.');
|
|
42
32
|
}
|
|
43
33
|
for (const f of funcs) {
|
|
44
34
|
const pkgName = f.package?.name ?? '';
|
|
45
|
-
const label = f.friendlyName || f.name;
|
|
46
35
|
engines.push({
|
|
47
|
-
label:
|
|
36
|
+
label: f.friendlyName || f.name,
|
|
48
37
|
key: pkgName ? `${pkgName}:${f.name}` : f.name,
|
|
49
38
|
func: f,
|
|
50
39
|
});
|
|
51
40
|
}
|
|
52
|
-
|
|
53
|
-
// Built-in TS engine is always last
|
|
54
|
-
engines.push({label: BUILTIN_ENGINE_LABEL, key: BUILTIN_ENGINE_KEY, func: null});
|
|
55
41
|
return engines;
|
|
56
42
|
}
|
|
57
43
|
|
|
58
|
-
/**
|
|
59
|
-
*
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
const
|
|
64
|
-
const
|
|
65
|
-
|
|
66
|
-
const numMap = DG.Column.fromType(DG.COLUMN_TYPE.STRING, 'numbering_map', n);
|
|
67
|
-
|
|
68
|
-
for (let i = 0; i < n; i++) {
|
|
69
|
-
const r = results[i];
|
|
70
|
-
if (r.error && r.percentIdentity < 0.3) {
|
|
71
|
-
posNames.set(i, '');
|
|
72
|
-
chainTypes.set(i, '');
|
|
73
|
-
annotJson.set(i, '[]');
|
|
74
|
-
numDetail.set(i, '');
|
|
75
|
-
numMap.set(i, '');
|
|
76
|
-
} else {
|
|
77
|
-
posNames.set(i, r.positionNames);
|
|
78
|
-
chainTypes.set(i, r.chainType);
|
|
79
|
-
annotJson.set(i, JSON.stringify(r.annotations));
|
|
80
|
-
numDetail.set(i, JSON.stringify(r.numberingDetail));
|
|
81
|
-
numMap.set(i, JSON.stringify(r.numberingMap));
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
return DG.DataFrame.fromColumns([posNames, chainTypes, annotJson, numDetail, numMap]);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
/** Runs the built-in TS numbering engine on all rows of a sequence column. */
|
|
89
|
-
async function runBuiltinNumbering(
|
|
90
|
-
seqCol: DG.Column<string>, schemeName: string,
|
|
91
|
-
): Promise<DG.DataFrame> {
|
|
92
|
-
const {numberSequences, extractSequence} = await import('../antibody-numbering (WIP)');
|
|
93
|
-
const scheme = schemeName.toLowerCase() as Scheme;
|
|
94
|
-
|
|
95
|
-
const sequences: string[] = [];
|
|
96
|
-
for (let i = 0; i < seqCol.length; i++) {
|
|
97
|
-
const raw = seqCol.get(i);
|
|
98
|
-
sequences.push(extractSequence(raw ?? ''));
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
const results = numberSequences(sequences, scheme);
|
|
102
|
-
return numberingResultsToDataFrame(results);
|
|
44
|
+
/** Reads the `scheme` parameter's declared choices off an engine function.
|
|
45
|
+
* Engines advertise which schemes they support via the `choices` option of
|
|
46
|
+
* their `scheme: string` parameter; if the function has none we fall back to
|
|
47
|
+
* a single generic IMGT option so the dropdown is never empty. */
|
|
48
|
+
function getEngineSchemes(engine: NumberingEngine): string[] {
|
|
49
|
+
const schemeInput = engine.func.inputs.find((p) => p.name.toLowerCase() === 'scheme');
|
|
50
|
+
const choices = schemeInput?.choices ?? [];
|
|
51
|
+
return choices.length > 0 ? choices.slice() : ['imgt'];
|
|
103
52
|
}
|
|
104
53
|
|
|
105
54
|
export function showNumberingSchemeDialog(): void {
|
|
@@ -117,45 +66,40 @@ export function showNumberingSchemeDialog(): void {
|
|
|
117
66
|
|
|
118
67
|
const engines = discoverEngines();
|
|
119
68
|
const engineLabels = engines.map((e) => e.label);
|
|
120
|
-
const schemeChoices = Object.values(NumberingScheme);
|
|
121
69
|
|
|
122
70
|
const tableInput = ui.input.table('Table', {value: df});
|
|
123
71
|
const seqInput = ui.input.column('Sequence', {
|
|
124
72
|
table: df, value: seqCols[0],
|
|
125
73
|
filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,
|
|
126
74
|
});
|
|
127
|
-
const schemeInput = ui.input.choice('Scheme', {value: NumberingScheme.IMGT, items: schemeChoices});
|
|
128
75
|
const engineInput = ui.input.choice('Engine', {
|
|
129
76
|
value: engineLabels[0], items: engineLabels,
|
|
130
77
|
});
|
|
131
|
-
|
|
132
|
-
|
|
78
|
+
const initialSchemes = getEngineSchemes(engines[0]);
|
|
79
|
+
const schemeInput = ui.input.choice('Scheme', {value: initialSchemes[0], items: initialSchemes});
|
|
80
|
+
|
|
81
|
+
// Switch the scheme list when the user picks a different engine — each engine
|
|
82
|
+
// advertises its own set via the `scheme` parameter's `choices`.
|
|
83
|
+
engineInput.onChanged.subscribe(() => {
|
|
84
|
+
const selected = engines.find((e) => e.label === engineInput.value);
|
|
85
|
+
if (!selected) return;
|
|
86
|
+
const schemes = getEngineSchemes(selected);
|
|
87
|
+
const prev = schemeInput.value;
|
|
88
|
+
schemeInput.items = schemes;
|
|
89
|
+
schemeInput.value = schemes.includes(prev ?? '') ? prev : schemes[0];
|
|
90
|
+
});
|
|
133
91
|
|
|
134
92
|
const dialog = ui.dialog({title: 'Apply Antibody Numbering'})
|
|
135
|
-
.add(ui.inputs([tableInput, seqInput,
|
|
93
|
+
.add(ui.inputs([tableInput, seqInput, engineInput, schemeInput]))
|
|
136
94
|
.onOK(async () => {
|
|
137
95
|
const seqCol = seqInput.value!;
|
|
138
96
|
const schemeName = schemeInput.value!;
|
|
139
97
|
const selectedLabel = engineInput.value!;
|
|
140
|
-
const engine = engines.find((e) => e.label === selectedLabel) ?? engines[
|
|
98
|
+
const engine = engines.find((e) => e.label === selectedLabel) ?? engines[0];
|
|
141
99
|
const pi = DG.TaskBarProgressIndicator.create(`Applying ${schemeName} numbering...`);
|
|
142
100
|
try {
|
|
143
|
-
|
|
144
|
-
if (engine.func)
|
|
145
|
-
result = await engine.func.apply({df: df, seqCol: seqCol, scheme: schemeName.toLowerCase()});
|
|
146
|
-
else
|
|
147
|
-
result = await runBuiltinNumbering(seqCol, schemeName);
|
|
148
|
-
|
|
101
|
+
const result: DG.DataFrame = await engine.func.apply({df: df, seqCol: seqCol, scheme: schemeName});
|
|
149
102
|
applyNumberingResults(df, seqCol, result, schemeName, true, engine.label);
|
|
150
|
-
|
|
151
|
-
// // Open VD Regions viewer
|
|
152
|
-
// if (openVdRegions.value && grok.shell.tv) {
|
|
153
|
-
// try {
|
|
154
|
-
// await grok.shell.tv.dataFrame.plot.fromType('VdRegions', {});
|
|
155
|
-
// } catch (err) {
|
|
156
|
-
// console.warn('Could not open VD Regions viewer:', err);
|
|
157
|
-
// }
|
|
158
|
-
// }
|
|
159
103
|
} catch (err: any) {
|
|
160
104
|
grok.shell.error(`Numbering failed: ${err.message ?? err}`);
|
|
161
105
|
console.error(err);
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
ImmunumNumberingRow, ImmunumWorkerRequest, ImmunumWorkerResponse,
|
|
3
|
+
} from './types';
|
|
4
|
+
|
|
5
|
+
/** Short-lived worker: we spin it up per call and tear it down immediately
|
|
6
|
+
* after. Numbering is a one-shot batch operation — keeping the worker alive
|
|
7
|
+
* would pin the immunum WASM instance (≈700 KB) in memory indefinitely. */
|
|
8
|
+
function spawnWorker(): Worker {
|
|
9
|
+
return new Worker(new URL('./immunum.worker', import.meta.url));
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function callOnce(worker: Worker, req: ImmunumWorkerRequest): Promise<ImmunumWorkerResponse> {
|
|
13
|
+
return new Promise((resolve, reject) => {
|
|
14
|
+
const ch = new MessageChannel();
|
|
15
|
+
ch.port1.onmessage = ({data}) => {
|
|
16
|
+
ch.port1.close();
|
|
17
|
+
resolve(data as ImmunumWorkerResponse);
|
|
18
|
+
};
|
|
19
|
+
ch.port1.onmessageerror = (err) => {
|
|
20
|
+
ch.port1.close();
|
|
21
|
+
reject(err);
|
|
22
|
+
};
|
|
23
|
+
worker.postMessage({req}, [ch.port2]);
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Runs immunum numbering on a batch of sequences inside a web worker. Spawns a
|
|
28
|
+
* fresh worker for this call and terminates it before returning so the WASM
|
|
29
|
+
* instance is freed. Throws on WASM/init errors; individual per-row errors
|
|
30
|
+
* are attached to each row's `error` field. */
|
|
31
|
+
export async function numberSequencesWithImmunum(
|
|
32
|
+
sequences: string[],
|
|
33
|
+
scheme: string,
|
|
34
|
+
chains?: string[],
|
|
35
|
+
minConfidence?: number | null,
|
|
36
|
+
): Promise<ImmunumNumberingRow[]> {
|
|
37
|
+
const worker = spawnWorker();
|
|
38
|
+
try {
|
|
39
|
+
const resp = await callOnce(worker, {op: 'number', sequences, scheme, chains, minConfidence});
|
|
40
|
+
if (!resp.ok) throw new Error(resp.error);
|
|
41
|
+
return resp.rows ?? [];
|
|
42
|
+
} finally {
|
|
43
|
+
worker.terminate();
|
|
44
|
+
}
|
|
45
|
+
}
|