@datagrok/bio 2.25.17 → 2.26.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/dist/282.js +2 -0
- package/dist/282.js.map +1 -0
- package/dist/287.js +2 -0
- package/dist/287.js.map +1 -0
- package/dist/288.js +2 -0
- package/dist/288.js.map +1 -0
- package/dist/422.js +2 -0
- package/dist/422.js.map +1 -0
- package/dist/455.js +1 -1
- package/dist/455.js.map +1 -1
- package/dist/767.js +2 -0
- package/dist/767.js.map +1 -0
- package/dist/package-test.js +5 -5
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/files/samples/antibodies.csv +494 -0
- package/package.json +2 -2
- package/src/package-api.ts +28 -0
- package/src/package.g.ts +31 -1
- package/src/package.ts +40 -1
- package/src/tests/substructure-filters-tests.ts +1 -0
- package/src/utils/annotations/annotation-actions.ts +130 -0
- package/src/utils/annotations/annotation-manager-ui.ts +118 -0
- package/src/utils/annotations/annotation-manager.ts +163 -0
- package/src/utils/annotations/liability-scanner-ui.ts +88 -0
- package/src/utils/annotations/liability-scanner.ts +147 -0
- package/src/utils/annotations/numbering-ui.ts +472 -0
- package/src/utils/antibody-numbering (WIP)/alignment.ts +578 -0
- package/src/utils/antibody-numbering (WIP)/annotator.ts +120 -0
- package/src/utils/antibody-numbering (WIP)/data/blosum62.ts +55 -0
- package/src/utils/antibody-numbering (WIP)/data/consensus-aho.ts +155 -0
- package/src/utils/antibody-numbering (WIP)/data/consensus-imgt.ts +162 -0
- package/src/utils/antibody-numbering (WIP)/data/consensus-kabat.ts +157 -0
- package/src/utils/antibody-numbering (WIP)/data/consensus-martin.ts +152 -0
- package/src/utils/antibody-numbering (WIP)/data/consensus.ts +36 -0
- package/src/utils/antibody-numbering (WIP)/data/regions.ts +63 -0
- package/src/utils/antibody-numbering (WIP)/index.ts +31 -0
- package/src/utils/antibody-numbering (WIP)/testdata.ts +5356 -0
- package/src/utils/antibody-numbering (WIP)/types.ts +69 -0
- package/src/utils/context-menu.ts +42 -2
- package/src/utils/get-region-func-editor.ts +18 -2
- package/src/utils/get-region.ts +167 -17
- package/src/utils/sequence-column-input.ts +57 -0
- package/src/viewers/vd-regions-viewer.ts +2 -0
- package/src/widgets/representations.ts +53 -2
- package/src/widgets/sequence-scrolling-widget.ts +28 -18
- package/test-console-output-1.log +587 -551
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.26.1",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.63.2",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.9",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.6",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.10.9",
|
package/src/package-api.ts
CHANGED
|
@@ -157,6 +157,34 @@ export namespace funcs {
|
|
|
157
157
|
return await grok.functions.call('Bio:GetRegionTopMenu', { table, sequence, start, end, name });
|
|
158
158
|
}
|
|
159
159
|
|
|
160
|
+
/**
|
|
161
|
+
Assigns antibody numbering (IMGT/Kabat/Chothia/AHo) using AntPack
|
|
162
|
+
*/
|
|
163
|
+
export async function applyNumberingScheme(): Promise<void> {
|
|
164
|
+
return await grok.functions.call('Bio:ApplyNumberingScheme', {});
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
Scans macromolecule sequences for deamidation, oxidation, and other liabilities
|
|
169
|
+
*/
|
|
170
|
+
export async function scanLiabilities(): Promise<void> {
|
|
171
|
+
return await grok.functions.call('Bio:ScanLiabilities', {});
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
View and manage sequence annotations on macromolecule columns
|
|
176
|
+
*/
|
|
177
|
+
export async function manageAnnotations(): Promise<void> {
|
|
178
|
+
return await grok.functions.call('Bio:ManageAnnotations', {});
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
Creates a new input for sequence columns with ability to extract a region
|
|
183
|
+
*/
|
|
184
|
+
export async function sequenceColumnInput(name: string , options: any ): Promise<any> {
|
|
185
|
+
return await grok.functions.call('Bio:SequenceColumnInput', { name, options });
|
|
186
|
+
}
|
|
187
|
+
|
|
160
188
|
/**
|
|
161
189
|
Detects pairs of molecules with similar structure and significant difference in any given property
|
|
162
190
|
*/
|
package/src/package.g.ts
CHANGED
|
@@ -217,12 +217,42 @@ export function getRegion(sequence: DG.Column<any>, start?: string, end?: string
|
|
|
217
217
|
//input: string start { optional: true; description: Region start position name }
|
|
218
218
|
//input: string end { optional: true; description: Region end position name }
|
|
219
219
|
//input: string name { optional: true; description: Region column name }
|
|
220
|
-
//top-menu: Bio | Calculate |
|
|
220
|
+
//top-menu: Bio | Calculate | Extract Region...
|
|
221
221
|
//editor: Bio:GetRegionEditor
|
|
222
222
|
export async function getRegionTopMenu(table: DG.DataFrame, sequence: DG.Column, start?: string, end?: string, name?: string) : Promise<void> {
|
|
223
223
|
await PackageFunctions.getRegionTopMenu(table, sequence, start, end, name);
|
|
224
224
|
}
|
|
225
225
|
|
|
226
|
+
//name: Apply Numbering Scheme
|
|
227
|
+
//description: Assigns antibody numbering (IMGT/Kabat/Chothia/AHo) using AntPack
|
|
228
|
+
//top-menu: Bio | Annotate | Apply Numbering Scheme...
|
|
229
|
+
export function applyNumberingScheme() : void {
|
|
230
|
+
PackageFunctions.applyNumberingScheme();
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
//name: Scan Liabilities
|
|
234
|
+
//description: Scans macromolecule sequences for deamidation, oxidation, and other liabilities
|
|
235
|
+
//top-menu: Bio | Annotate | Scan Liabilities...
|
|
236
|
+
export function scanLiabilities() : void {
|
|
237
|
+
PackageFunctions.scanLiabilities();
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
//name: Manage Annotations
|
|
241
|
+
//description: View and manage sequence annotations on macromolecule columns
|
|
242
|
+
//top-menu: Bio | Annotate | Manage Annotations...
|
|
243
|
+
export function manageAnnotations() : void {
|
|
244
|
+
PackageFunctions.manageAnnotations();
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
//name: Sequence Column Input
|
|
248
|
+
//description: Creates a new input for sequence columns with ability to extract a region
|
|
249
|
+
//input: string name
|
|
250
|
+
//input: dynamic options
|
|
251
|
+
//output: dynamic result
|
|
252
|
+
export function sequenceColumnInput(name: string, options: any) : any {
|
|
253
|
+
return PackageFunctions.sequenceColumnInput(name, options);
|
|
254
|
+
}
|
|
255
|
+
|
|
226
256
|
//name: Sequence Activity Cliffs
|
|
227
257
|
//description: Detects pairs of molecules with similar structure and significant difference in any given property
|
|
228
258
|
//input: dataframe table { description: Input data table }
|
package/src/package.ts
CHANGED
|
@@ -79,6 +79,8 @@ import {handleSequenceHeaderRendering} from './widgets/sequence-scrolling-widget
|
|
|
79
79
|
import {PolymerType} from '@datagrok-libraries/js-draw-lite/src/types/org';
|
|
80
80
|
import {BilnNotationProvider} from './utils/biln';
|
|
81
81
|
import {showMonomerCollectionsView} from './utils/monomer-lib/monomer-collections-view';
|
|
82
|
+
import {ISequenceColumnInput} from '@datagrok-libraries/bio/src/utils/sequence-column-input';
|
|
83
|
+
import {SequenceColumnInput} from './utils/sequence-column-input';
|
|
82
84
|
|
|
83
85
|
import * as api from './package-api';
|
|
84
86
|
export const _package = new BioPackage(/*{debug: true}/**/);
|
|
@@ -445,7 +447,7 @@ export class PackageFunctions {
|
|
|
445
447
|
@grok.decorators.func({
|
|
446
448
|
name: 'Get Region Top Menu',
|
|
447
449
|
description: 'Get sequences for a region specified from a Macromolecule',
|
|
448
|
-
'top-menu': 'Bio | Calculate |
|
|
450
|
+
'top-menu': 'Bio | Calculate | Extract Region...',
|
|
449
451
|
editor: 'Bio:GetRegionEditor'})
|
|
450
452
|
static async getRegionTopMenu(
|
|
451
453
|
@grok.decorators.param({options: {description: 'Input data table'}})table: DG.DataFrame,
|
|
@@ -459,6 +461,43 @@ export class PackageFunctions {
|
|
|
459
461
|
await grok.data.detectSemanticTypes(sequence.dataFrame); // to set renderer
|
|
460
462
|
}
|
|
461
463
|
|
|
464
|
+
// -- Annotation menu entries --
|
|
465
|
+
|
|
466
|
+
@grok.decorators.func({
|
|
467
|
+
name: 'Apply Numbering Scheme',
|
|
468
|
+
description: 'Assigns antibody numbering (IMGT/Kabat/Chothia/AHo) using AntPack',
|
|
469
|
+
'top-menu': 'Bio | Annotate | Apply Numbering Scheme...',
|
|
470
|
+
})
|
|
471
|
+
static applyNumberingScheme(): void {
|
|
472
|
+
import('./utils/annotations/numbering-ui').then((m) => m.showNumberingSchemeDialog());
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
@grok.decorators.func({
|
|
476
|
+
name: 'Scan Liabilities',
|
|
477
|
+
description: 'Scans macromolecule sequences for deamidation, oxidation, and other liabilities',
|
|
478
|
+
'top-menu': 'Bio | Annotate | Scan Liabilities...',
|
|
479
|
+
})
|
|
480
|
+
static scanLiabilities(): void {
|
|
481
|
+
import('./utils/annotations/liability-scanner-ui').then((m) => m.showLiabilityScannerDialog());
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
@grok.decorators.func({
|
|
485
|
+
name: 'Manage Annotations',
|
|
486
|
+
description: 'View and manage sequence annotations on macromolecule columns',
|
|
487
|
+
'top-menu': 'Bio | Annotate | Manage Annotations...',
|
|
488
|
+
})
|
|
489
|
+
static manageAnnotations(): void {
|
|
490
|
+
import('./utils/annotations/annotation-manager-ui').then((m) => m.showAnnotationManagerDialog());
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
@grok.decorators.func({
|
|
494
|
+
name: 'Sequence Column Input',
|
|
495
|
+
description: 'Creates a new input for sequence columns with ability to extract a region',
|
|
496
|
+
})
|
|
497
|
+
static sequenceColumnInput(name: string, options: any): ISequenceColumnInput {
|
|
498
|
+
return SequenceColumnInput.create(name, options);
|
|
499
|
+
}
|
|
500
|
+
|
|
462
501
|
@grok.decorators.func({
|
|
463
502
|
name: 'Sequence Activity Cliffs',
|
|
464
503
|
description: 'Detects pairs of molecules with similar structure and significant difference in any given property',
|
|
@@ -428,6 +428,7 @@ category('bio-substructure-filters', async () => {
|
|
|
428
428
|
await awaitGrid(view.grid);
|
|
429
429
|
|
|
430
430
|
const seqFilter = fg.filters[0] as BioSubstructureFilter;
|
|
431
|
+
await awaitCheck(() => seqFilter.bioFilter !== null, 'FastaBioFilter hasn\'t been created', 1000);
|
|
431
432
|
const seqBf = seqFilter.bioFilter as FastaBioFilter;
|
|
432
433
|
await testEvent(df.onRowsFiltered, () => {}, () => {
|
|
433
434
|
seqBf.props = new BioFilterProps(fSubStr, undefined, _package.logger);
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
|
|
4
|
+
import {AnnotationCategory, SeqAnnotationHit} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
|
|
5
|
+
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
6
|
+
import {getAnnotationColumnName, getColumnAnnotations, cacheAllRowAnnotations} from './annotation-manager';
|
|
7
|
+
|
|
8
|
+
/** Filters the DataFrame to show only rows that have at least one liability hit. */
|
|
9
|
+
export function filterByLiabilityHits(df: DG.DataFrame, seqCol: DG.Column<string>): void {
|
|
10
|
+
const annotColName = getAnnotationColumnName(seqCol.name);
|
|
11
|
+
let annotCol: DG.Column<string> | null = null;
|
|
12
|
+
try {
|
|
13
|
+
annotCol = df.columns.byName(annotColName) as DG.Column<string>;
|
|
14
|
+
} catch { /* not found */ }
|
|
15
|
+
|
|
16
|
+
if (!annotCol) {
|
|
17
|
+
grok.shell.warning('No annotation data found. Run liability scanning first.');
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const rowData = cacheAllRowAnnotations(annotCol);
|
|
22
|
+
const bs = DG.BitSet.create(df.rowCount);
|
|
23
|
+
for (let i = 0; i < df.rowCount; i++) {
|
|
24
|
+
if (rowData[i] && rowData[i]!.length > 0)
|
|
25
|
+
bs.set(i, true);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
df.filter.copyFrom(bs);
|
|
29
|
+
grok.shell.info(`Filtered to ${bs.trueCount} rows with liability hits`);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** Selects all rows that contain a specific annotation hit. */
|
|
33
|
+
export function selectRowsWithAnnotation(df: DG.DataFrame, seqCol: DG.Column<string>, annotationId: string): void {
|
|
34
|
+
const annotColName = getAnnotationColumnName(seqCol.name);
|
|
35
|
+
let annotCol: DG.Column<string> | null = null;
|
|
36
|
+
try {
|
|
37
|
+
annotCol = df.columns.byName(annotColName) as DG.Column<string>;
|
|
38
|
+
} catch { /* not found */ }
|
|
39
|
+
|
|
40
|
+
if (!annotCol) {
|
|
41
|
+
grok.shell.warning('No annotation data found.');
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const rowData = cacheAllRowAnnotations(annotCol);
|
|
46
|
+
const bs = DG.BitSet.create(df.rowCount);
|
|
47
|
+
for (let i = 0; i < df.rowCount; i++) {
|
|
48
|
+
if (rowData[i]?.some((h) => h.annotationId === annotationId))
|
|
49
|
+
bs.set(i, true);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
df.selection.copyFrom(bs);
|
|
53
|
+
grok.shell.info(`Selected ${bs.trueCount} rows with ${annotationId} hits`);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Extracts a named region annotation as a new column.
|
|
57
|
+
* Uses per-row region spans from the companion column when available (unaligned data),
|
|
58
|
+
* falls back to column-level position names (aligned/MSA data). */
|
|
59
|
+
export function extractAnnotatedRegion(
|
|
60
|
+
df: DG.DataFrame,
|
|
61
|
+
seqCol: DG.Column<string>,
|
|
62
|
+
annotationName: string,
|
|
63
|
+
seqHelper: ISeqHelper,
|
|
64
|
+
): DG.Column<string> | null {
|
|
65
|
+
const annotations = getColumnAnnotations(seqCol);
|
|
66
|
+
const annot = annotations.find((a) =>
|
|
67
|
+
a.name === annotationName && a.category === AnnotationCategory.Structure);
|
|
68
|
+
|
|
69
|
+
if (!annot) {
|
|
70
|
+
grok.shell.warning(`Region annotation "${annotationName}" not found.`);
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const sh = seqHelper.getSeqHandler(seqCol);
|
|
75
|
+
const colName = `${seqCol.name}(${annotationName})`;
|
|
76
|
+
|
|
77
|
+
// Try per-row extraction using companion column region spans
|
|
78
|
+
const annotColName = getAnnotationColumnName(seqCol.name);
|
|
79
|
+
let annotCol: DG.Column<string> | null = null;
|
|
80
|
+
try { annotCol = df.columns.byName(annotColName) as DG.Column<string>; } catch { /* not found */ }
|
|
81
|
+
|
|
82
|
+
if (annotCol) {
|
|
83
|
+
const allRowData = cacheAllRowAnnotations(annotCol);
|
|
84
|
+
const hasPerRowRegions = allRowData.some((rd) =>
|
|
85
|
+
rd?.some((h: SeqAnnotationHit) => h.annotationId === annot.id && h.endPositionIndex != null));
|
|
86
|
+
|
|
87
|
+
if (hasPerRowRegions) {
|
|
88
|
+
const regCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, colName, df.rowCount);
|
|
89
|
+
for (let i = 0; i < df.rowCount; i++) {
|
|
90
|
+
const rowHits = allRowData[i];
|
|
91
|
+
const regionHit = rowHits?.find((h: SeqAnnotationHit) =>
|
|
92
|
+
h.annotationId === annot.id && h.endPositionIndex != null);
|
|
93
|
+
if (regionHit) {
|
|
94
|
+
const splitted = sh.getSplitted(i);
|
|
95
|
+
const parts: string[] = [];
|
|
96
|
+
for (let p = regionHit.positionIndex; p <= regionHit.endPositionIndex!; p++) {
|
|
97
|
+
if (p < splitted.length)
|
|
98
|
+
parts.push(splitted.getOriginal(p));
|
|
99
|
+
}
|
|
100
|
+
regCol.set(i, parts.join(sh.separator || ''));
|
|
101
|
+
} else
|
|
102
|
+
regCol.set(i, '');
|
|
103
|
+
}
|
|
104
|
+
df.columns.add(regCol);
|
|
105
|
+
grok.data.detectSemanticTypes(df);
|
|
106
|
+
grok.shell.info(`Extracted region ${annotationName} as column "${colName}"`);
|
|
107
|
+
return regCol;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Fall back to column-level position names (aligned/MSA data)
|
|
112
|
+
if (annot.start == null || annot.end == null) {
|
|
113
|
+
grok.shell.warning(`Region annotation "${annotationName}" has no position range.`);
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const startIdx = sh.posList.indexOf(annot.start);
|
|
118
|
+
const endIdx = sh.posList.indexOf(annot.end);
|
|
119
|
+
|
|
120
|
+
if (startIdx < 0 || endIdx < 0) {
|
|
121
|
+
grok.shell.warning(`Position names "${annot.start}" or "${annot.end}" not found in position list.`);
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const regCol = sh.getRegion(startIdx, endIdx, colName);
|
|
126
|
+
df.columns.add(regCol);
|
|
127
|
+
grok.data.detectSemanticTypes(df);
|
|
128
|
+
grok.shell.info(`Extracted region ${annotationName} as column "${colName}"`);
|
|
129
|
+
return regCol;
|
|
130
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/* eslint-disable max-len */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {
|
|
7
|
+
SeqAnnotation, AnnotationCategory, LiabilitySeverity,
|
|
8
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
|
|
9
|
+
import {getColumnAnnotations, setColumnAnnotations, clearAnnotations} from './annotation-manager';
|
|
10
|
+
|
|
11
|
+
const categoryLabels: Record<string, string> = {
|
|
12
|
+
[AnnotationCategory.Structure]: 'Structure (FR/CDR)',
|
|
13
|
+
[AnnotationCategory.Liability]: 'Liability',
|
|
14
|
+
[AnnotationCategory.PTM]: 'Post-translational Modification',
|
|
15
|
+
[AnnotationCategory.Custom]: 'Custom',
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const severityLabels: Record<string, string> = {
|
|
19
|
+
[LiabilitySeverity.High]: 'High',
|
|
20
|
+
[LiabilitySeverity.Medium]: 'Medium',
|
|
21
|
+
[LiabilitySeverity.Low]: 'Low',
|
|
22
|
+
[LiabilitySeverity.Info]: 'Info',
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
export function showAnnotationManagerDialog(): void {
|
|
26
|
+
const df = grok.shell.tv?.dataFrame;
|
|
27
|
+
if (!df) {
|
|
28
|
+
grok.shell.warning('No table open');
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const seqCols = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
33
|
+
if (seqCols.length === 0) {
|
|
34
|
+
grok.shell.warning('No macromolecule columns found');
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let selectedCol = seqCols[0];
|
|
39
|
+
const colInput = ui.input.column('Sequence Column', {
|
|
40
|
+
table: df, value: selectedCol,
|
|
41
|
+
filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,
|
|
42
|
+
onValueChanged: (col) => { selectedCol = col!; refreshList(); },
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
const listDiv = ui.divV([], {style: {maxHeight: '380px', overflowY: 'auto', paddingRight: '8px'}});
|
|
46
|
+
|
|
47
|
+
function refreshList(): void {
|
|
48
|
+
listDiv.innerHTML = '';
|
|
49
|
+
const annotations = getColumnAnnotations(selectedCol);
|
|
50
|
+
if (annotations.length === 0) {
|
|
51
|
+
listDiv.append(ui.divText('No annotations on this column.', {style: {color: '#888', padding: '8px'}}));
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
for (const annot of annotations) {
|
|
56
|
+
const catLabel = categoryLabels[annot.category] ?? annot.category;
|
|
57
|
+
const sevLabel = annot.severity ? ` [${severityLabels[annot.severity] ?? annot.severity}]` : '';
|
|
58
|
+
const rangeLabel = annot.start && annot.end ? ` (${annot.start}-${annot.end})` : '';
|
|
59
|
+
const schemeLabel = annot.sourceScheme ? ` ${annot.sourceScheme}` : '';
|
|
60
|
+
|
|
61
|
+
const removeBtn = ui.iconFA('trash', () => {
|
|
62
|
+
const updated = getColumnAnnotations(selectedCol).filter((a) => a.id !== annot.id);
|
|
63
|
+
setColumnAnnotations(selectedCol, updated);
|
|
64
|
+
df.fireValuesChanged();
|
|
65
|
+
refreshList();
|
|
66
|
+
});
|
|
67
|
+
removeBtn.style.cursor = 'pointer';
|
|
68
|
+
removeBtn.style.color = '#999';
|
|
69
|
+
removeBtn.style.marginLeft = '8px';
|
|
70
|
+
const originalColor = annot.color ?? '#ccc';
|
|
71
|
+
let currentColor = originalColor;
|
|
72
|
+
const colorSwatch = ui.div([], {style: {
|
|
73
|
+
width: '12px', height: '12px', borderRadius: '2px',
|
|
74
|
+
backgroundColor: currentColor, display: 'inline-block', marginRight: '6px',
|
|
75
|
+
flexShrink: '0', cursor: 'pointer',
|
|
76
|
+
}});
|
|
77
|
+
|
|
78
|
+
ui.colorPicker(DG.Color.fromHtml(annot.color ?? '#ccc'), (newColor) => {
|
|
79
|
+
currentColor = DG.Color.toHtml(newColor);
|
|
80
|
+
}, colorSwatch, () => {
|
|
81
|
+
const updated = getColumnAnnotations(selectedCol).map((a) => a.id === annot.id ? {...a, color: currentColor} : a);
|
|
82
|
+
setColumnAnnotations(selectedCol, updated);
|
|
83
|
+
df.fireValuesChanged();
|
|
84
|
+
refreshList();
|
|
85
|
+
}, () => {
|
|
86
|
+
currentColor = originalColor;
|
|
87
|
+
colorSwatch.style.backgroundColor = currentColor;
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
const row = ui.divH([
|
|
91
|
+
colorSwatch,
|
|
92
|
+
ui.divText(`${annot.name}${rangeLabel}${schemeLabel}${sevLabel}`, {style: {flex: '1', fontSize: '12px', padding: '4px'}}),
|
|
93
|
+
ui.divText(catLabel, {style: {color: '#888', fontSize: '11px', marginRight: '8px'}}),
|
|
94
|
+
removeBtn,
|
|
95
|
+
], {style: {alignItems: 'center', padding: '4px 0', borderBottom: '1px solid #eee'}});
|
|
96
|
+
|
|
97
|
+
listDiv.append(row);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
refreshList();
|
|
102
|
+
|
|
103
|
+
const clearBtn = ui.button('Clear All', () => {
|
|
104
|
+
clearAnnotations(df, selectedCol);
|
|
105
|
+
df.fireValuesChanged();
|
|
106
|
+
refreshList();
|
|
107
|
+
grok.shell.info('All annotations cleared');
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
const dialog = ui.dialog({title: 'Manage Annotations'})
|
|
111
|
+
.add(ui.inputs([colInput]))
|
|
112
|
+
.add(ui.h3('Annotations'))
|
|
113
|
+
.add(listDiv)
|
|
114
|
+
.add(ui.divH([clearBtn], {style: {marginTop: '8px'}}))
|
|
115
|
+
.onOK(() => {});
|
|
116
|
+
|
|
117
|
+
dialog.show();
|
|
118
|
+
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
4
|
+
import {
|
|
5
|
+
SeqAnnotation, SeqAnnotationHit, RowAnnotationData,
|
|
6
|
+
AnnotationCategory, AnnotationVisualType,
|
|
7
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
|
|
8
|
+
import {SeqRegion} from '../get-region-func-editor';
|
|
9
|
+
|
|
10
|
+
/** Prefix for hidden companion annotation columns (~ hides them in Datagrok). */
|
|
11
|
+
const ANNOTATION_COL_PREFIX = '~';
|
|
12
|
+
|
|
13
|
+
/** Reads column-level annotations from the `.annotations` tag.
|
|
14
|
+
* Falls back to `.regions` for backward compatibility. */
|
|
15
|
+
export function getColumnAnnotations(col: DG.Column<string>): SeqAnnotation[] {
|
|
16
|
+
const annotationsTag = col.getTag(bioTAGS.annotations);
|
|
17
|
+
if (annotationsTag) {
|
|
18
|
+
try {
|
|
19
|
+
return JSON.parse(annotationsTag) as SeqAnnotation[];
|
|
20
|
+
} catch { /* fall through */ }
|
|
21
|
+
}
|
|
22
|
+
// Backward compat: convert legacy .regions to SeqAnnotation[]
|
|
23
|
+
const regionsTag = col.getTag(bioTAGS.regions);
|
|
24
|
+
if (regionsTag) {
|
|
25
|
+
try {
|
|
26
|
+
const regions: SeqRegion[] = JSON.parse(regionsTag);
|
|
27
|
+
return regions.map((r, i) => ({
|
|
28
|
+
id: `legacy-region-${i}`,
|
|
29
|
+
name: r.name,
|
|
30
|
+
description: r.description,
|
|
31
|
+
start: r.start,
|
|
32
|
+
end: r.end,
|
|
33
|
+
visualType: AnnotationVisualType.Region,
|
|
34
|
+
category: AnnotationCategory.Structure,
|
|
35
|
+
autoGenerated: true,
|
|
36
|
+
}));
|
|
37
|
+
} catch { /* ignore */ }
|
|
38
|
+
}
|
|
39
|
+
return [];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Writes column-level annotations to the `.annotations` tag.
|
|
43
|
+
* Also keeps `.regions` in sync for backward compatibility with GetRegionFuncEditor. */
|
|
44
|
+
export function setColumnAnnotations(col: DG.Column<string>, annotations: SeqAnnotation[]): void {
|
|
45
|
+
col.setTag(bioTAGS.annotations, JSON.stringify(annotations));
|
|
46
|
+
// Keep .regions in sync with structure annotations
|
|
47
|
+
const structureAnnotations = annotations.filter((a) => a.category === AnnotationCategory.Structure);
|
|
48
|
+
if (structureAnnotations.length > 0) {
|
|
49
|
+
const regions: SeqRegion[] = structureAnnotations
|
|
50
|
+
.filter((a) => a.start != null && a.end != null)
|
|
51
|
+
.map((a) => ({
|
|
52
|
+
name: a.name,
|
|
53
|
+
description: a.description ?? '',
|
|
54
|
+
start: a.start!,
|
|
55
|
+
end: a.end!,
|
|
56
|
+
}));
|
|
57
|
+
col.setTag(bioTAGS.regions, JSON.stringify(regions));
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Returns the name for the hidden companion annotation column. */
|
|
62
|
+
export function getAnnotationColumnName(seqColName: string): string {
|
|
63
|
+
return `${ANNOTATION_COL_PREFIX}${seqColName}_annotations`;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/** Gets or creates the hidden companion column for per-row annotation hits. */
|
|
67
|
+
export function getOrCreateAnnotationColumn(df: DG.DataFrame, seqCol: DG.Column<string>): DG.Column<string> {
|
|
68
|
+
const colName = getAnnotationColumnName(seqCol.name);
|
|
69
|
+
let col = df.columns.byName(colName);
|
|
70
|
+
if (!col) {
|
|
71
|
+
col = df.columns.addNewString(colName);
|
|
72
|
+
seqCol.setTag(bioTAGS.annotationColumnName, colName);
|
|
73
|
+
}
|
|
74
|
+
return col as DG.Column<string>;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/** Reads per-row annotation hits from the companion column. Uses version-based caching. */
|
|
78
|
+
const _rowDataCache = new WeakMap<DG.Column, {version: number; data:(RowAnnotationData | null)[]}>();
|
|
79
|
+
|
|
80
|
+
export function getRowAnnotations(annotCol: DG.Column<string>, rowIdx: number): RowAnnotationData | null {
|
|
81
|
+
const cached = _rowDataCache.get(annotCol);
|
|
82
|
+
if (cached && cached.version === annotCol.version) {
|
|
83
|
+
if (cached.data[rowIdx] !== undefined)
|
|
84
|
+
return cached.data[rowIdx];
|
|
85
|
+
}
|
|
86
|
+
// Parse this row
|
|
87
|
+
const raw = annotCol.get(rowIdx);
|
|
88
|
+
if (!raw) return null;
|
|
89
|
+
try {
|
|
90
|
+
return JSON.parse(raw) as RowAnnotationData;
|
|
91
|
+
} catch { return null; }
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** Parses and caches all row annotations for the column. Call once when version changes. */
|
|
95
|
+
export function cacheAllRowAnnotations(annotCol: DG.Column<string>): (RowAnnotationData | null)[] {
|
|
96
|
+
const cached = _rowDataCache.get(annotCol);
|
|
97
|
+
if (cached && cached.version === annotCol.version)
|
|
98
|
+
return cached.data;
|
|
99
|
+
|
|
100
|
+
const data: (RowAnnotationData | null)[] = new Array(annotCol.length);
|
|
101
|
+
for (let i = 0; i < annotCol.length; i++) {
|
|
102
|
+
const raw = annotCol.get(i);
|
|
103
|
+
if (!raw) {
|
|
104
|
+
data[i] = null;
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
try {
|
|
108
|
+
data[i] = JSON.parse(raw) as RowAnnotationData;
|
|
109
|
+
} catch {
|
|
110
|
+
data[i] = null;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
_rowDataCache.set(annotCol, {version: annotCol.version, data});
|
|
114
|
+
return data;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/** Writes per-row annotation hits. */
|
|
118
|
+
export function setRowAnnotations(annotCol: DG.Column<string>, rowIdx: number, hits: SeqAnnotationHit[]): void {
|
|
119
|
+
annotCol.set(rowIdx, hits.length > 0 ? JSON.stringify(hits) : '');
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/** Clears all annotations from a column (both column-level and row-level). */
|
|
123
|
+
export function clearAnnotations(df: DG.DataFrame, seqCol: DG.Column<string>): void {
|
|
124
|
+
seqCol.setTag(bioTAGS.annotations, '');
|
|
125
|
+
const annotColName = getAnnotationColumnName(seqCol.name);
|
|
126
|
+
const annotCol = df.columns.byName(annotColName);
|
|
127
|
+
if (annotCol)
|
|
128
|
+
df.columns.remove(annotColName);
|
|
129
|
+
// Clear .regions too
|
|
130
|
+
seqCol.setTag(bioTAGS.regions, '');
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/** Adds an annotation to the column-level list. */
|
|
134
|
+
export function addColumnAnnotation(col: DG.Column<string>, annotation: SeqAnnotation): void {
|
|
135
|
+
const existing = getColumnAnnotations(col);
|
|
136
|
+
existing.push(annotation);
|
|
137
|
+
setColumnAnnotations(col, existing);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/** Removes an annotation by id from the column-level list. */
|
|
141
|
+
export function removeColumnAnnotation(col: DG.Column<string>, annotationId: string): void {
|
|
142
|
+
const existing = getColumnAnnotations(col).filter((a) => a.id !== annotationId);
|
|
143
|
+
setColumnAnnotations(col, existing);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/** Merges row-level annotation hits by replacing hits of one kind while preserving the rest.
|
|
147
|
+
* @param existingHits Current per-row hits
|
|
148
|
+
* @param newHits New hits to add
|
|
149
|
+
* @param replaceRegions If true, removes existing region span hits (endPositionIndex set) before merging.
|
|
150
|
+
* @param replaceLiabilities If true, removes existing non-region hits before merging. */
|
|
151
|
+
export function mergeRowHits(
|
|
152
|
+
existingHits: SeqAnnotationHit[],
|
|
153
|
+
newHits: SeqAnnotationHit[],
|
|
154
|
+
replaceRegions: boolean,
|
|
155
|
+
replaceLiabilities: boolean,
|
|
156
|
+
): SeqAnnotationHit[] {
|
|
157
|
+
let kept = existingHits;
|
|
158
|
+
if (replaceRegions)
|
|
159
|
+
kept = kept.filter((h) => h.endPositionIndex == null);
|
|
160
|
+
if (replaceLiabilities)
|
|
161
|
+
kept = kept.filter((h) => h.endPositionIndex != null);
|
|
162
|
+
return [...kept, ...newHits];
|
|
163
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {_package} from '../../package';
|
|
6
|
+
import {
|
|
7
|
+
BUILTIN_LIABILITY_RULES, LiabilityRule, scanLiabilities,
|
|
8
|
+
applyLiabilityScanResults, createLiabilitySummaryColumn,
|
|
9
|
+
} from './liability-scanner';
|
|
10
|
+
import {LiabilitySeverity} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
|
|
11
|
+
|
|
12
|
+
const severityLabels: Record<string, string> = {
|
|
13
|
+
[LiabilitySeverity.High]: 'High',
|
|
14
|
+
[LiabilitySeverity.Medium]: 'Medium',
|
|
15
|
+
[LiabilitySeverity.Low]: 'Low',
|
|
16
|
+
[LiabilitySeverity.Info]: 'Info',
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
export function showLiabilityScannerDialog(): void {
|
|
20
|
+
const df = grok.shell.tv?.dataFrame;
|
|
21
|
+
if (!df) {
|
|
22
|
+
grok.shell.warning('No table open');
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const seqCols = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
27
|
+
if (seqCols.length === 0) {
|
|
28
|
+
grok.shell.warning('No macromolecule columns found');
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const rules = BUILTIN_LIABILITY_RULES.map((r) => ({...r, pattern: new RegExp(r.pattern.source, 'g')}));
|
|
33
|
+
|
|
34
|
+
const tableInput = ui.input.table('Table', {value: df});
|
|
35
|
+
const seqInput = ui.input.column('Sequence', {
|
|
36
|
+
table: df, value: seqCols[0],
|
|
37
|
+
filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
// Rule checkboxes
|
|
41
|
+
const ruleChecks: {rule: LiabilityRule; input: DG.InputBase<boolean>}[] = [];
|
|
42
|
+
const rulesDiv = ui.divV([]);
|
|
43
|
+
for (const rule of rules) {
|
|
44
|
+
const check = ui.input.bool(rule.name, {
|
|
45
|
+
value: rule.enabled,
|
|
46
|
+
tooltipText: `Severity: ${severityLabels[rule.severity] ?? rule.severity}`,
|
|
47
|
+
});
|
|
48
|
+
ruleChecks.push({rule, input: check});
|
|
49
|
+
rulesDiv.append(check.root);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const highlightInput = ui.input.bool('Highlight in cell renderer', {value: true});
|
|
53
|
+
const annotColInput = ui.input.bool('Create annotation column', {value: true});
|
|
54
|
+
const summaryInput = ui.input.bool('Create summary count column', {value: false});
|
|
55
|
+
|
|
56
|
+
const dialog = ui.dialog({title: 'Scan Sequence Liabilities'})
|
|
57
|
+
.add(ui.inputs([tableInput, seqInput]))
|
|
58
|
+
.add(ui.h3('Rules'))
|
|
59
|
+
.add(rulesDiv)
|
|
60
|
+
.add(ui.h3('Output'))
|
|
61
|
+
.add(ui.inputs([highlightInput, annotColInput, summaryInput]))
|
|
62
|
+
.onOK(() => {
|
|
63
|
+
try {
|
|
64
|
+
const seqCol = seqInput.value!;
|
|
65
|
+
const sh = _package.seqHelper.getSeqHandler(seqCol);
|
|
66
|
+
|
|
67
|
+
// Apply checkbox state
|
|
68
|
+
for (const {rule, input} of ruleChecks)
|
|
69
|
+
rule.enabled = input.value;
|
|
70
|
+
|
|
71
|
+
const result = scanLiabilities(seqCol, sh, rules);
|
|
72
|
+
|
|
73
|
+
if (annotColInput.value || highlightInput.value)
|
|
74
|
+
applyLiabilityScanResults(df, seqCol, result);
|
|
75
|
+
|
|
76
|
+
if (summaryInput.value)
|
|
77
|
+
createLiabilitySummaryColumn(df, seqCol, result);
|
|
78
|
+
|
|
79
|
+
grok.shell.info(`Liability scan: ${result.totalHits} hits found across ${result.annotations.length} rules`);
|
|
80
|
+
df.fireValuesChanged();
|
|
81
|
+
} catch (err: any) {
|
|
82
|
+
grok.shell.error(`Liability scan failed: ${err.message ?? err}`);
|
|
83
|
+
console.error(err);
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
dialog.show();
|
|
88
|
+
}
|