@datagrok/bio 2.25.17 → 2.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +4 -0
  2. package/dist/282.js +2 -0
  3. package/dist/282.js.map +1 -0
  4. package/dist/287.js +2 -0
  5. package/dist/287.js.map +1 -0
  6. package/dist/288.js +2 -0
  7. package/dist/288.js.map +1 -0
  8. package/dist/422.js +2 -0
  9. package/dist/422.js.map +1 -0
  10. package/dist/455.js +1 -1
  11. package/dist/455.js.map +1 -1
  12. package/dist/767.js +2 -0
  13. package/dist/767.js.map +1 -0
  14. package/dist/package-test.js +5 -5
  15. package/dist/package-test.js.map +1 -1
  16. package/dist/package.js +3 -3
  17. package/dist/package.js.map +1 -1
  18. package/files/samples/antibodies.csv +494 -0
  19. package/package.json +2 -2
  20. package/src/package-api.ts +28 -0
  21. package/src/package.g.ts +31 -1
  22. package/src/package.ts +40 -1
  23. package/src/tests/substructure-filters-tests.ts +1 -0
  24. package/src/utils/annotations/annotation-actions.ts +130 -0
  25. package/src/utils/annotations/annotation-manager-ui.ts +118 -0
  26. package/src/utils/annotations/annotation-manager.ts +163 -0
  27. package/src/utils/annotations/liability-scanner-ui.ts +88 -0
  28. package/src/utils/annotations/liability-scanner.ts +147 -0
  29. package/src/utils/annotations/numbering-ui.ts +472 -0
  30. package/src/utils/antibody-numbering (WIP)/alignment.ts +578 -0
  31. package/src/utils/antibody-numbering (WIP)/annotator.ts +120 -0
  32. package/src/utils/antibody-numbering (WIP)/data/blosum62.ts +55 -0
  33. package/src/utils/antibody-numbering (WIP)/data/consensus-aho.ts +155 -0
  34. package/src/utils/antibody-numbering (WIP)/data/consensus-imgt.ts +162 -0
  35. package/src/utils/antibody-numbering (WIP)/data/consensus-kabat.ts +157 -0
  36. package/src/utils/antibody-numbering (WIP)/data/consensus-martin.ts +152 -0
  37. package/src/utils/antibody-numbering (WIP)/data/consensus.ts +36 -0
  38. package/src/utils/antibody-numbering (WIP)/data/regions.ts +63 -0
  39. package/src/utils/antibody-numbering (WIP)/index.ts +31 -0
  40. package/src/utils/antibody-numbering (WIP)/testdata.ts +5356 -0
  41. package/src/utils/antibody-numbering (WIP)/types.ts +69 -0
  42. package/src/utils/context-menu.ts +42 -2
  43. package/src/utils/get-region-func-editor.ts +18 -2
  44. package/src/utils/get-region.ts +167 -17
  45. package/src/utils/sequence-column-input.ts +57 -0
  46. package/src/viewers/vd-regions-viewer.ts +2 -0
  47. package/src/widgets/representations.ts +53 -2
  48. package/src/widgets/sequence-scrolling-widget.ts +28 -18
  49. package/test-console-output-1.log +587 -551
  50. package/test-record-1.mp4 +0 -0
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.25.17",
8
+ "version": "2.26.1",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,7 +44,7 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.62.1",
47
+ "@datagrok-libraries/bio": "^5.63.2",
48
48
  "@datagrok-libraries/chem-meta": "^1.2.9",
49
49
  "@datagrok-libraries/math": "^1.2.6",
50
50
  "@datagrok-libraries/ml": "^6.10.9",
@@ -157,6 +157,34 @@ export namespace funcs {
157
157
  return await grok.functions.call('Bio:GetRegionTopMenu', { table, sequence, start, end, name });
158
158
  }
159
159
 
160
+ /**
161
+ Assigns antibody numbering (IMGT/Kabat/Chothia/AHo) using AntPack
162
+ */
163
+ export async function applyNumberingScheme(): Promise<void> {
164
+ return await grok.functions.call('Bio:ApplyNumberingScheme', {});
165
+ }
166
+
167
+ /**
168
+ Scans macromolecule sequences for deamidation, oxidation, and other liabilities
169
+ */
170
+ export async function scanLiabilities(): Promise<void> {
171
+ return await grok.functions.call('Bio:ScanLiabilities', {});
172
+ }
173
+
174
+ /**
175
+ View and manage sequence annotations on macromolecule columns
176
+ */
177
+ export async function manageAnnotations(): Promise<void> {
178
+ return await grok.functions.call('Bio:ManageAnnotations', {});
179
+ }
180
+
181
+ /**
182
+ Creates a new input for sequence columns with ability to extract a region
183
+ */
184
+ export async function sequenceColumnInput(name: string , options: any ): Promise<any> {
185
+ return await grok.functions.call('Bio:SequenceColumnInput', { name, options });
186
+ }
187
+
160
188
  /**
161
189
  Detects pairs of molecules with similar structure and significant difference in any given property
162
190
  */
package/src/package.g.ts CHANGED
@@ -217,12 +217,42 @@ export function getRegion(sequence: DG.Column<any>, start?: string, end?: string
217
217
  //input: string start { optional: true; description: Region start position name }
218
218
  //input: string end { optional: true; description: Region end position name }
219
219
  //input: string name { optional: true; description: Region column name }
220
- //top-menu: Bio | Calculate | Get Region...
220
+ //top-menu: Bio | Calculate | Extract Region...
221
221
  //editor: Bio:GetRegionEditor
222
222
  export async function getRegionTopMenu(table: DG.DataFrame, sequence: DG.Column, start?: string, end?: string, name?: string) : Promise<void> {
223
223
  await PackageFunctions.getRegionTopMenu(table, sequence, start, end, name);
224
224
  }
225
225
 
226
+ //name: Apply Numbering Scheme
227
+ //description: Assigns antibody numbering (IMGT/Kabat/Chothia/AHo) using AntPack
228
+ //top-menu: Bio | Annotate | Apply Numbering Scheme...
229
+ export function applyNumberingScheme() : void {
230
+ PackageFunctions.applyNumberingScheme();
231
+ }
232
+
233
+ //name: Scan Liabilities
234
+ //description: Scans macromolecule sequences for deamidation, oxidation, and other liabilities
235
+ //top-menu: Bio | Annotate | Scan Liabilities...
236
+ export function scanLiabilities() : void {
237
+ PackageFunctions.scanLiabilities();
238
+ }
239
+
240
+ //name: Manage Annotations
241
+ //description: View and manage sequence annotations on macromolecule columns
242
+ //top-menu: Bio | Annotate | Manage Annotations...
243
+ export function manageAnnotations() : void {
244
+ PackageFunctions.manageAnnotations();
245
+ }
246
+
247
+ //name: Sequence Column Input
248
+ //description: Creates a new input for sequence columns with ability to extract a region
249
+ //input: string name
250
+ //input: dynamic options
251
+ //output: dynamic result
252
+ export function sequenceColumnInput(name: string, options: any) : any {
253
+ return PackageFunctions.sequenceColumnInput(name, options);
254
+ }
255
+
226
256
  //name: Sequence Activity Cliffs
227
257
  //description: Detects pairs of molecules with similar structure and significant difference in any given property
228
258
  //input: dataframe table { description: Input data table }
package/src/package.ts CHANGED
@@ -79,6 +79,8 @@ import {handleSequenceHeaderRendering} from './widgets/sequence-scrolling-widget
79
79
  import {PolymerType} from '@datagrok-libraries/js-draw-lite/src/types/org';
80
80
  import {BilnNotationProvider} from './utils/biln';
81
81
  import {showMonomerCollectionsView} from './utils/monomer-lib/monomer-collections-view';
82
+ import {ISequenceColumnInput} from '@datagrok-libraries/bio/src/utils/sequence-column-input';
83
+ import {SequenceColumnInput} from './utils/sequence-column-input';
82
84
 
83
85
  import * as api from './package-api';
84
86
  export const _package = new BioPackage(/*{debug: true}/**/);
@@ -445,7 +447,7 @@ export class PackageFunctions {
445
447
  @grok.decorators.func({
446
448
  name: 'Get Region Top Menu',
447
449
  description: 'Get sequences for a region specified from a Macromolecule',
448
- 'top-menu': 'Bio | Calculate | Get Region...',
450
+ 'top-menu': 'Bio | Calculate | Extract Region...',
449
451
  editor: 'Bio:GetRegionEditor'})
450
452
  static async getRegionTopMenu(
451
453
  @grok.decorators.param({options: {description: 'Input data table'}})table: DG.DataFrame,
@@ -459,6 +461,43 @@ export class PackageFunctions {
459
461
  await grok.data.detectSemanticTypes(sequence.dataFrame); // to set renderer
460
462
  }
461
463
 
464
+ // -- Annotation menu entries --
465
+
466
+ @grok.decorators.func({
467
+ name: 'Apply Numbering Scheme',
468
+ description: 'Assigns antibody numbering (IMGT/Kabat/Chothia/AHo) using AntPack',
469
+ 'top-menu': 'Bio | Annotate | Apply Numbering Scheme...',
470
+ })
471
+ static applyNumberingScheme(): void {
472
+ import('./utils/annotations/numbering-ui').then((m) => m.showNumberingSchemeDialog());
473
+ }
474
+
475
+ @grok.decorators.func({
476
+ name: 'Scan Liabilities',
477
+ description: 'Scans macromolecule sequences for deamidation, oxidation, and other liabilities',
478
+ 'top-menu': 'Bio | Annotate | Scan Liabilities...',
479
+ })
480
+ static scanLiabilities(): void {
481
+ import('./utils/annotations/liability-scanner-ui').then((m) => m.showLiabilityScannerDialog());
482
+ }
483
+
484
+ @grok.decorators.func({
485
+ name: 'Manage Annotations',
486
+ description: 'View and manage sequence annotations on macromolecule columns',
487
+ 'top-menu': 'Bio | Annotate | Manage Annotations...',
488
+ })
489
+ static manageAnnotations(): void {
490
+ import('./utils/annotations/annotation-manager-ui').then((m) => m.showAnnotationManagerDialog());
491
+ }
492
+
493
+ @grok.decorators.func({
494
+ name: 'Sequence Column Input',
495
+ description: 'Creates a new input for sequence columns with ability to extract a region',
496
+ })
497
+ static sequenceColumnInput(name: string, options: any): ISequenceColumnInput {
498
+ return SequenceColumnInput.create(name, options);
499
+ }
500
+
462
501
  @grok.decorators.func({
463
502
  name: 'Sequence Activity Cliffs',
464
503
  description: 'Detects pairs of molecules with similar structure and significant difference in any given property',
@@ -428,6 +428,7 @@ category('bio-substructure-filters', async () => {
428
428
  await awaitGrid(view.grid);
429
429
 
430
430
  const seqFilter = fg.filters[0] as BioSubstructureFilter;
431
+ await awaitCheck(() => seqFilter.bioFilter !== null, 'FastaBioFilter hasn\'t been created', 1000);
431
432
  const seqBf = seqFilter.bioFilter as FastaBioFilter;
432
433
  await testEvent(df.onRowsFiltered, () => {}, () => {
433
434
  seqBf.props = new BioFilterProps(fSubStr, undefined, _package.logger);
@@ -0,0 +1,130 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as DG from 'datagrok-api/dg';
3
+
4
+ import {AnnotationCategory, SeqAnnotationHit} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
5
+ import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
6
+ import {getAnnotationColumnName, getColumnAnnotations, cacheAllRowAnnotations} from './annotation-manager';
7
+
8
+ /** Filters the DataFrame to show only rows that have at least one liability hit. */
9
+ export function filterByLiabilityHits(df: DG.DataFrame, seqCol: DG.Column<string>): void {
10
+ const annotColName = getAnnotationColumnName(seqCol.name);
11
+ let annotCol: DG.Column<string> | null = null;
12
+ try {
13
+ annotCol = df.columns.byName(annotColName) as DG.Column<string>;
14
+ } catch { /* not found */ }
15
+
16
+ if (!annotCol) {
17
+ grok.shell.warning('No annotation data found. Run liability scanning first.');
18
+ return;
19
+ }
20
+
21
+ const rowData = cacheAllRowAnnotations(annotCol);
22
+ const bs = DG.BitSet.create(df.rowCount);
23
+ for (let i = 0; i < df.rowCount; i++) {
24
+ if (rowData[i] && rowData[i]!.length > 0)
25
+ bs.set(i, true);
26
+ }
27
+
28
+ df.filter.copyFrom(bs);
29
+ grok.shell.info(`Filtered to ${bs.trueCount} rows with liability hits`);
30
+ }
31
+
32
+ /** Selects all rows that contain a specific annotation hit. */
33
+ export function selectRowsWithAnnotation(df: DG.DataFrame, seqCol: DG.Column<string>, annotationId: string): void {
34
+ const annotColName = getAnnotationColumnName(seqCol.name);
35
+ let annotCol: DG.Column<string> | null = null;
36
+ try {
37
+ annotCol = df.columns.byName(annotColName) as DG.Column<string>;
38
+ } catch { /* not found */ }
39
+
40
+ if (!annotCol) {
41
+ grok.shell.warning('No annotation data found.');
42
+ return;
43
+ }
44
+
45
+ const rowData = cacheAllRowAnnotations(annotCol);
46
+ const bs = DG.BitSet.create(df.rowCount);
47
+ for (let i = 0; i < df.rowCount; i++) {
48
+ if (rowData[i]?.some((h) => h.annotationId === annotationId))
49
+ bs.set(i, true);
50
+ }
51
+
52
+ df.selection.copyFrom(bs);
53
+ grok.shell.info(`Selected ${bs.trueCount} rows with ${annotationId} hits`);
54
+ }
55
+
56
+ /** Extracts a named region annotation as a new column.
57
+ * Uses per-row region spans from the companion column when available (unaligned data),
58
+ * falls back to column-level position names (aligned/MSA data). */
59
+ export function extractAnnotatedRegion(
60
+ df: DG.DataFrame,
61
+ seqCol: DG.Column<string>,
62
+ annotationName: string,
63
+ seqHelper: ISeqHelper,
64
+ ): DG.Column<string> | null {
65
+ const annotations = getColumnAnnotations(seqCol);
66
+ const annot = annotations.find((a) =>
67
+ a.name === annotationName && a.category === AnnotationCategory.Structure);
68
+
69
+ if (!annot) {
70
+ grok.shell.warning(`Region annotation "${annotationName}" not found.`);
71
+ return null;
72
+ }
73
+
74
+ const sh = seqHelper.getSeqHandler(seqCol);
75
+ const colName = `${seqCol.name}(${annotationName})`;
76
+
77
+ // Try per-row extraction using companion column region spans
78
+ const annotColName = getAnnotationColumnName(seqCol.name);
79
+ let annotCol: DG.Column<string> | null = null;
80
+ try { annotCol = df.columns.byName(annotColName) as DG.Column<string>; } catch { /* not found */ }
81
+
82
+ if (annotCol) {
83
+ const allRowData = cacheAllRowAnnotations(annotCol);
84
+ const hasPerRowRegions = allRowData.some((rd) =>
85
+ rd?.some((h: SeqAnnotationHit) => h.annotationId === annot.id && h.endPositionIndex != null));
86
+
87
+ if (hasPerRowRegions) {
88
+ const regCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, colName, df.rowCount);
89
+ for (let i = 0; i < df.rowCount; i++) {
90
+ const rowHits = allRowData[i];
91
+ const regionHit = rowHits?.find((h: SeqAnnotationHit) =>
92
+ h.annotationId === annot.id && h.endPositionIndex != null);
93
+ if (regionHit) {
94
+ const splitted = sh.getSplitted(i);
95
+ const parts: string[] = [];
96
+ for (let p = regionHit.positionIndex; p <= regionHit.endPositionIndex!; p++) {
97
+ if (p < splitted.length)
98
+ parts.push(splitted.getOriginal(p));
99
+ }
100
+ regCol.set(i, parts.join(sh.separator || ''));
101
+ } else
102
+ regCol.set(i, '');
103
+ }
104
+ df.columns.add(regCol);
105
+ grok.data.detectSemanticTypes(df);
106
+ grok.shell.info(`Extracted region ${annotationName} as column "${colName}"`);
107
+ return regCol;
108
+ }
109
+ }
110
+
111
+ // Fall back to column-level position names (aligned/MSA data)
112
+ if (annot.start == null || annot.end == null) {
113
+ grok.shell.warning(`Region annotation "${annotationName}" has no position range.`);
114
+ return null;
115
+ }
116
+
117
+ const startIdx = sh.posList.indexOf(annot.start);
118
+ const endIdx = sh.posList.indexOf(annot.end);
119
+
120
+ if (startIdx < 0 || endIdx < 0) {
121
+ grok.shell.warning(`Position names "${annot.start}" or "${annot.end}" not found in position list.`);
122
+ return null;
123
+ }
124
+
125
+ const regCol = sh.getRegion(startIdx, endIdx, colName);
126
+ df.columns.add(regCol);
127
+ grok.data.detectSemanticTypes(df);
128
+ grok.shell.info(`Extracted region ${annotationName} as column "${colName}"`);
129
+ return regCol;
130
+ }
@@ -0,0 +1,118 @@
1
+ /* eslint-disable max-len */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {
7
+ SeqAnnotation, AnnotationCategory, LiabilitySeverity,
8
+ } from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
9
+ import {getColumnAnnotations, setColumnAnnotations, clearAnnotations} from './annotation-manager';
10
+
11
+ const categoryLabels: Record<string, string> = {
12
+ [AnnotationCategory.Structure]: 'Structure (FR/CDR)',
13
+ [AnnotationCategory.Liability]: 'Liability',
14
+ [AnnotationCategory.PTM]: 'Post-translational Modification',
15
+ [AnnotationCategory.Custom]: 'Custom',
16
+ };
17
+
18
+ const severityLabels: Record<string, string> = {
19
+ [LiabilitySeverity.High]: 'High',
20
+ [LiabilitySeverity.Medium]: 'Medium',
21
+ [LiabilitySeverity.Low]: 'Low',
22
+ [LiabilitySeverity.Info]: 'Info',
23
+ };
24
+
25
+ export function showAnnotationManagerDialog(): void {
26
+ const df = grok.shell.tv?.dataFrame;
27
+ if (!df) {
28
+ grok.shell.warning('No table open');
29
+ return;
30
+ }
31
+
32
+ const seqCols = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
33
+ if (seqCols.length === 0) {
34
+ grok.shell.warning('No macromolecule columns found');
35
+ return;
36
+ }
37
+
38
+ let selectedCol = seqCols[0];
39
+ const colInput = ui.input.column('Sequence Column', {
40
+ table: df, value: selectedCol,
41
+ filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,
42
+ onValueChanged: (col) => { selectedCol = col!; refreshList(); },
43
+ });
44
+
45
+ const listDiv = ui.divV([], {style: {maxHeight: '380px', overflowY: 'auto', paddingRight: '8px'}});
46
+
47
+ function refreshList(): void {
48
+ listDiv.innerHTML = '';
49
+ const annotations = getColumnAnnotations(selectedCol);
50
+ if (annotations.length === 0) {
51
+ listDiv.append(ui.divText('No annotations on this column.', {style: {color: '#888', padding: '8px'}}));
52
+ return;
53
+ }
54
+
55
+ for (const annot of annotations) {
56
+ const catLabel = categoryLabels[annot.category] ?? annot.category;
57
+ const sevLabel = annot.severity ? ` [${severityLabels[annot.severity] ?? annot.severity}]` : '';
58
+ const rangeLabel = annot.start && annot.end ? ` (${annot.start}-${annot.end})` : '';
59
+ const schemeLabel = annot.sourceScheme ? ` ${annot.sourceScheme}` : '';
60
+
61
+ const removeBtn = ui.iconFA('trash', () => {
62
+ const updated = getColumnAnnotations(selectedCol).filter((a) => a.id !== annot.id);
63
+ setColumnAnnotations(selectedCol, updated);
64
+ df.fireValuesChanged();
65
+ refreshList();
66
+ });
67
+ removeBtn.style.cursor = 'pointer';
68
+ removeBtn.style.color = '#999';
69
+ removeBtn.style.marginLeft = '8px';
70
+ const originalColor = annot.color ?? '#ccc';
71
+ let currentColor = originalColor;
72
+ const colorSwatch = ui.div([], {style: {
73
+ width: '12px', height: '12px', borderRadius: '2px',
74
+ backgroundColor: currentColor, display: 'inline-block', marginRight: '6px',
75
+ flexShrink: '0', cursor: 'pointer',
76
+ }});
77
+
78
+ ui.colorPicker(DG.Color.fromHtml(annot.color ?? '#ccc'), (newColor) => {
79
+ currentColor = DG.Color.toHtml(newColor);
80
+ }, colorSwatch, () => {
81
+ const updated = getColumnAnnotations(selectedCol).map((a) => a.id === annot.id ? {...a, color: currentColor} : a);
82
+ setColumnAnnotations(selectedCol, updated);
83
+ df.fireValuesChanged();
84
+ refreshList();
85
+ }, () => {
86
+ currentColor = originalColor;
87
+ colorSwatch.style.backgroundColor = currentColor;
88
+ });
89
+
90
+ const row = ui.divH([
91
+ colorSwatch,
92
+ ui.divText(`${annot.name}${rangeLabel}${schemeLabel}${sevLabel}`, {style: {flex: '1', fontSize: '12px', padding: '4px'}}),
93
+ ui.divText(catLabel, {style: {color: '#888', fontSize: '11px', marginRight: '8px'}}),
94
+ removeBtn,
95
+ ], {style: {alignItems: 'center', padding: '4px 0', borderBottom: '1px solid #eee'}});
96
+
97
+ listDiv.append(row);
98
+ }
99
+ }
100
+
101
+ refreshList();
102
+
103
+ const clearBtn = ui.button('Clear All', () => {
104
+ clearAnnotations(df, selectedCol);
105
+ df.fireValuesChanged();
106
+ refreshList();
107
+ grok.shell.info('All annotations cleared');
108
+ });
109
+
110
+ const dialog = ui.dialog({title: 'Manage Annotations'})
111
+ .add(ui.inputs([colInput]))
112
+ .add(ui.h3('Annotations'))
113
+ .add(listDiv)
114
+ .add(ui.divH([clearBtn], {style: {marginTop: '8px'}}))
115
+ .onOK(() => {});
116
+
117
+ dialog.show();
118
+ }
@@ -0,0 +1,163 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
4
+ import {
5
+ SeqAnnotation, SeqAnnotationHit, RowAnnotationData,
6
+ AnnotationCategory, AnnotationVisualType,
7
+ } from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
8
+ import {SeqRegion} from '../get-region-func-editor';
9
+
10
+ /** Prefix for hidden companion annotation columns (~ hides them in Datagrok). */
11
+ const ANNOTATION_COL_PREFIX = '~';
12
+
13
+ /** Reads column-level annotations from the `.annotations` tag.
14
+ * Falls back to `.regions` for backward compatibility. */
15
+ export function getColumnAnnotations(col: DG.Column<string>): SeqAnnotation[] {
16
+ const annotationsTag = col.getTag(bioTAGS.annotations);
17
+ if (annotationsTag) {
18
+ try {
19
+ return JSON.parse(annotationsTag) as SeqAnnotation[];
20
+ } catch { /* fall through */ }
21
+ }
22
+ // Backward compat: convert legacy .regions to SeqAnnotation[]
23
+ const regionsTag = col.getTag(bioTAGS.regions);
24
+ if (regionsTag) {
25
+ try {
26
+ const regions: SeqRegion[] = JSON.parse(regionsTag);
27
+ return regions.map((r, i) => ({
28
+ id: `legacy-region-${i}`,
29
+ name: r.name,
30
+ description: r.description,
31
+ start: r.start,
32
+ end: r.end,
33
+ visualType: AnnotationVisualType.Region,
34
+ category: AnnotationCategory.Structure,
35
+ autoGenerated: true,
36
+ }));
37
+ } catch { /* ignore */ }
38
+ }
39
+ return [];
40
+ }
41
+
42
+ /** Writes column-level annotations to the `.annotations` tag.
43
+ * Also keeps `.regions` in sync for backward compatibility with GetRegionFuncEditor. */
44
+ export function setColumnAnnotations(col: DG.Column<string>, annotations: SeqAnnotation[]): void {
45
+ col.setTag(bioTAGS.annotations, JSON.stringify(annotations));
46
+ // Keep .regions in sync with structure annotations
47
+ const structureAnnotations = annotations.filter((a) => a.category === AnnotationCategory.Structure);
48
+ if (structureAnnotations.length > 0) {
49
+ const regions: SeqRegion[] = structureAnnotations
50
+ .filter((a) => a.start != null && a.end != null)
51
+ .map((a) => ({
52
+ name: a.name,
53
+ description: a.description ?? '',
54
+ start: a.start!,
55
+ end: a.end!,
56
+ }));
57
+ col.setTag(bioTAGS.regions, JSON.stringify(regions));
58
+ }
59
+ }
60
+
61
+ /** Returns the name for the hidden companion annotation column. */
62
+ export function getAnnotationColumnName(seqColName: string): string {
63
+ return `${ANNOTATION_COL_PREFIX}${seqColName}_annotations`;
64
+ }
65
+
66
+ /** Gets or creates the hidden companion column for per-row annotation hits. */
67
+ export function getOrCreateAnnotationColumn(df: DG.DataFrame, seqCol: DG.Column<string>): DG.Column<string> {
68
+ const colName = getAnnotationColumnName(seqCol.name);
69
+ let col = df.columns.byName(colName);
70
+ if (!col) {
71
+ col = df.columns.addNewString(colName);
72
+ seqCol.setTag(bioTAGS.annotationColumnName, colName);
73
+ }
74
+ return col as DG.Column<string>;
75
+ }
76
+
77
+ /** Reads per-row annotation hits from the companion column. Uses version-based caching. */
78
+ const _rowDataCache = new WeakMap<DG.Column, {version: number; data:(RowAnnotationData | null)[]}>();
79
+
80
+ export function getRowAnnotations(annotCol: DG.Column<string>, rowIdx: number): RowAnnotationData | null {
81
+ const cached = _rowDataCache.get(annotCol);
82
+ if (cached && cached.version === annotCol.version) {
83
+ if (cached.data[rowIdx] !== undefined)
84
+ return cached.data[rowIdx];
85
+ }
86
+ // Parse this row
87
+ const raw = annotCol.get(rowIdx);
88
+ if (!raw) return null;
89
+ try {
90
+ return JSON.parse(raw) as RowAnnotationData;
91
+ } catch { return null; }
92
+ }
93
+
94
+ /** Parses and caches all row annotations for the column. Call once when version changes. */
95
+ export function cacheAllRowAnnotations(annotCol: DG.Column<string>): (RowAnnotationData | null)[] {
96
+ const cached = _rowDataCache.get(annotCol);
97
+ if (cached && cached.version === annotCol.version)
98
+ return cached.data;
99
+
100
+ const data: (RowAnnotationData | null)[] = new Array(annotCol.length);
101
+ for (let i = 0; i < annotCol.length; i++) {
102
+ const raw = annotCol.get(i);
103
+ if (!raw) {
104
+ data[i] = null;
105
+ continue;
106
+ }
107
+ try {
108
+ data[i] = JSON.parse(raw) as RowAnnotationData;
109
+ } catch {
110
+ data[i] = null;
111
+ }
112
+ }
113
+ _rowDataCache.set(annotCol, {version: annotCol.version, data});
114
+ return data;
115
+ }
116
+
117
+ /** Writes per-row annotation hits. */
118
+ export function setRowAnnotations(annotCol: DG.Column<string>, rowIdx: number, hits: SeqAnnotationHit[]): void {
119
+ annotCol.set(rowIdx, hits.length > 0 ? JSON.stringify(hits) : '');
120
+ }
121
+
122
+ /** Clears all annotations from a column (both column-level and row-level). */
123
+ export function clearAnnotations(df: DG.DataFrame, seqCol: DG.Column<string>): void {
124
+ seqCol.setTag(bioTAGS.annotations, '');
125
+ const annotColName = getAnnotationColumnName(seqCol.name);
126
+ const annotCol = df.columns.byName(annotColName);
127
+ if (annotCol)
128
+ df.columns.remove(annotColName);
129
+ // Clear .regions too
130
+ seqCol.setTag(bioTAGS.regions, '');
131
+ }
132
+
133
+ /** Adds an annotation to the column-level list. */
134
+ export function addColumnAnnotation(col: DG.Column<string>, annotation: SeqAnnotation): void {
135
+ const existing = getColumnAnnotations(col);
136
+ existing.push(annotation);
137
+ setColumnAnnotations(col, existing);
138
+ }
139
+
140
+ /** Removes an annotation by id from the column-level list. */
141
+ export function removeColumnAnnotation(col: DG.Column<string>, annotationId: string): void {
142
+ const existing = getColumnAnnotations(col).filter((a) => a.id !== annotationId);
143
+ setColumnAnnotations(col, existing);
144
+ }
145
+
146
+ /** Merges row-level annotation hits by replacing hits of one kind while preserving the rest.
147
+ * @param existingHits Current per-row hits
148
+ * @param newHits New hits to add
149
+ * @param replaceRegions If true, removes existing region span hits (endPositionIndex set) before merging.
150
+ * @param replaceLiabilities If true, removes existing non-region hits before merging. */
151
+ export function mergeRowHits(
152
+ existingHits: SeqAnnotationHit[],
153
+ newHits: SeqAnnotationHit[],
154
+ replaceRegions: boolean,
155
+ replaceLiabilities: boolean,
156
+ ): SeqAnnotationHit[] {
157
+ let kept = existingHits;
158
+ if (replaceRegions)
159
+ kept = kept.filter((h) => h.endPositionIndex == null);
160
+ if (replaceLiabilities)
161
+ kept = kept.filter((h) => h.endPositionIndex != null);
162
+ return [...kept, ...newHits];
163
+ }
@@ -0,0 +1,88 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {_package} from '../../package';
6
+ import {
7
+ BUILTIN_LIABILITY_RULES, LiabilityRule, scanLiabilities,
8
+ applyLiabilityScanResults, createLiabilitySummaryColumn,
9
+ } from './liability-scanner';
10
+ import {LiabilitySeverity} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
11
+
12
+ const severityLabels: Record<string, string> = {
13
+ [LiabilitySeverity.High]: 'High',
14
+ [LiabilitySeverity.Medium]: 'Medium',
15
+ [LiabilitySeverity.Low]: 'Low',
16
+ [LiabilitySeverity.Info]: 'Info',
17
+ };
18
+
19
+ export function showLiabilityScannerDialog(): void {
20
+ const df = grok.shell.tv?.dataFrame;
21
+ if (!df) {
22
+ grok.shell.warning('No table open');
23
+ return;
24
+ }
25
+
26
+ const seqCols = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
27
+ if (seqCols.length === 0) {
28
+ grok.shell.warning('No macromolecule columns found');
29
+ return;
30
+ }
31
+
32
+ const rules = BUILTIN_LIABILITY_RULES.map((r) => ({...r, pattern: new RegExp(r.pattern.source, 'g')}));
33
+
34
+ const tableInput = ui.input.table('Table', {value: df});
35
+ const seqInput = ui.input.column('Sequence', {
36
+ table: df, value: seqCols[0],
37
+ filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,
38
+ });
39
+
40
+ // Rule checkboxes
41
+ const ruleChecks: {rule: LiabilityRule; input: DG.InputBase<boolean>}[] = [];
42
+ const rulesDiv = ui.divV([]);
43
+ for (const rule of rules) {
44
+ const check = ui.input.bool(rule.name, {
45
+ value: rule.enabled,
46
+ tooltipText: `Severity: ${severityLabels[rule.severity] ?? rule.severity}`,
47
+ });
48
+ ruleChecks.push({rule, input: check});
49
+ rulesDiv.append(check.root);
50
+ }
51
+
52
+ const highlightInput = ui.input.bool('Highlight in cell renderer', {value: true});
53
+ const annotColInput = ui.input.bool('Create annotation column', {value: true});
54
+ const summaryInput = ui.input.bool('Create summary count column', {value: false});
55
+
56
+ const dialog = ui.dialog({title: 'Scan Sequence Liabilities'})
57
+ .add(ui.inputs([tableInput, seqInput]))
58
+ .add(ui.h3('Rules'))
59
+ .add(rulesDiv)
60
+ .add(ui.h3('Output'))
61
+ .add(ui.inputs([highlightInput, annotColInput, summaryInput]))
62
+ .onOK(() => {
63
+ try {
64
+ const seqCol = seqInput.value!;
65
+ const sh = _package.seqHelper.getSeqHandler(seqCol);
66
+
67
+ // Apply checkbox state
68
+ for (const {rule, input} of ruleChecks)
69
+ rule.enabled = input.value;
70
+
71
+ const result = scanLiabilities(seqCol, sh, rules);
72
+
73
+ if (annotColInput.value || highlightInput.value)
74
+ applyLiabilityScanResults(df, seqCol, result);
75
+
76
+ if (summaryInput.value)
77
+ createLiabilitySummaryColumn(df, seqCol, result);
78
+
79
+ grok.shell.info(`Liability scan: ${result.totalHits} hits found across ${result.annotations.length} rules`);
80
+ df.fireValuesChanged();
81
+ } catch (err: any) {
82
+ grok.shell.error(`Liability scan failed: ${err.message ?? err}`);
83
+ console.error(err);
84
+ }
85
+ });
86
+
87
+ dialog.show();
88
+ }