npm - @datagrok/bio - Versions diffs - 2.25.17 → 2.26.1 - Mend

@datagrok/bio 2.25.17 → 2.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/CHANGELOG.md +4 -0
package/dist/282.js +2 -0
package/dist/282.js.map +1 -0
package/dist/287.js +2 -0
package/dist/287.js.map +1 -0
package/dist/288.js +2 -0
package/dist/288.js.map +1 -0
package/dist/422.js +2 -0
package/dist/422.js.map +1 -0
package/dist/455.js +1 -1
package/dist/455.js.map +1 -1
package/dist/767.js +2 -0
package/dist/767.js.map +1 -0
package/dist/package-test.js +5 -5
package/dist/package-test.js.map +1 -1
package/dist/package.js +3 -3
package/dist/package.js.map +1 -1
package/files/samples/antibodies.csv +494 -0
package/package.json +2 -2
package/src/package-api.ts +28 -0
package/src/package.g.ts +31 -1
package/src/package.ts +40 -1
package/src/tests/substructure-filters-tests.ts +1 -0
package/src/utils/annotations/annotation-actions.ts +130 -0
package/src/utils/annotations/annotation-manager-ui.ts +118 -0
package/src/utils/annotations/annotation-manager.ts +163 -0
package/src/utils/annotations/liability-scanner-ui.ts +88 -0
package/src/utils/annotations/liability-scanner.ts +147 -0
package/src/utils/annotations/numbering-ui.ts +472 -0
package/src/utils/antibody-numbering (WIP)/alignment.ts +578 -0
package/src/utils/antibody-numbering (WIP)/annotator.ts +120 -0
package/src/utils/antibody-numbering (WIP)/data/blosum62.ts +55 -0
package/src/utils/antibody-numbering (WIP)/data/consensus-aho.ts +155 -0
package/src/utils/antibody-numbering (WIP)/data/consensus-imgt.ts +162 -0
package/src/utils/antibody-numbering (WIP)/data/consensus-kabat.ts +157 -0
package/src/utils/antibody-numbering (WIP)/data/consensus-martin.ts +152 -0
package/src/utils/antibody-numbering (WIP)/data/consensus.ts +36 -0
package/src/utils/antibody-numbering (WIP)/data/regions.ts +63 -0
package/src/utils/antibody-numbering (WIP)/index.ts +31 -0
package/src/utils/antibody-numbering (WIP)/testdata.ts +5356 -0
package/src/utils/antibody-numbering (WIP)/types.ts +69 -0
package/src/utils/context-menu.ts +42 -2
package/src/utils/get-region-func-editor.ts +18 -2
package/src/utils/get-region.ts +167 -17
package/src/utils/sequence-column-input.ts +57 -0
package/src/viewers/vd-regions-viewer.ts +2 -0
package/src/widgets/representations.ts +53 -2
package/src/widgets/sequence-scrolling-widget.ts +28 -18
package/test-console-output-1.log +587 -551
package/test-record-1.mp4 +0 -0

package/package.json CHANGED Viewed

@@ -5,7 +5,7 @@
     "name": "Davit Rizhinashvili",
     "email": "drizhinashvili@datagrok.ai"
   },
-  "version": "2.25.17",
+  "version": "2.26.1",
   "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
   "repository": {
     "type": "git",
@@ -44,7 +44,7 @@
   ],
   "dependencies": {
     "@biowasm/aioli": "^3.1.0",
-    "@datagrok-libraries/bio": "^5.62.1",
+    "@datagrok-libraries/bio": "^5.63.2",
     "@datagrok-libraries/chem-meta": "^1.2.9",
     "@datagrok-libraries/math": "^1.2.6",
     "@datagrok-libraries/ml": "^6.10.9",

package/src/package-api.ts CHANGED Viewed

@@ -157,6 +157,34 @@ export namespace funcs {
     return await grok.functions.call('Bio:GetRegionTopMenu', { table, sequence, start, end, name });
   }
+  /**
+  Assigns antibody numbering (IMGT/Kabat/Chothia/AHo) using AntPack
+  */
+  export async function applyNumberingScheme(): Promise<void> {
+    return await grok.functions.call('Bio:ApplyNumberingScheme', {});
+  }
+  /**
+  Scans macromolecule sequences for deamidation, oxidation, and other liabilities
+  */
+  export async function scanLiabilities(): Promise<void> {
+    return await grok.functions.call('Bio:ScanLiabilities', {});
+  }
+  /**
+  View and manage sequence annotations on macromolecule columns
+  */
+  export async function manageAnnotations(): Promise<void> {
+    return await grok.functions.call('Bio:ManageAnnotations', {});
+  }
+  /**
+  Creates a new input for sequence columns with ability to extract a region
+  */
+  export async function sequenceColumnInput(name: string , options: any ): Promise<any> {
+    return await grok.functions.call('Bio:SequenceColumnInput', { name, options });
+  }
   /**
   Detects pairs of molecules with similar structure and significant difference in any given property
   */

package/src/package.g.ts CHANGED Viewed

@@ -217,12 +217,42 @@ export function getRegion(sequence: DG.Column<any>, start?: string, end?: string
 //input: string start { optional: true; description: Region start position name }
 //input: string end { optional: true; description: Region end position name }
 //input: string name { optional: true; description: Region column name }
-//top-menu: Bio | Calculate | Get Region...
+//top-menu: Bio | Calculate | Extract Region...
 //editor: Bio:GetRegionEditor
 export async function getRegionTopMenu(table: DG.DataFrame, sequence: DG.Column, start?: string, end?: string, name?: string) : Promise<void> {
   await PackageFunctions.getRegionTopMenu(table, sequence, start, end, name);
 }
+//name: Apply Numbering Scheme
+//description: Assigns antibody numbering (IMGT/Kabat/Chothia/AHo) using AntPack
+//top-menu: Bio | Annotate | Apply Numbering Scheme...
+export function applyNumberingScheme() : void {
+  PackageFunctions.applyNumberingScheme();
+}
+//name: Scan Liabilities
+//description: Scans macromolecule sequences for deamidation, oxidation, and other liabilities
+//top-menu: Bio | Annotate | Scan Liabilities...
+export function scanLiabilities() : void {
+  PackageFunctions.scanLiabilities();
+}
+//name: Manage Annotations
+//description: View and manage sequence annotations on macromolecule columns
+//top-menu: Bio | Annotate | Manage Annotations...
+export function manageAnnotations() : void {
+  PackageFunctions.manageAnnotations();
+}
+//name: Sequence Column Input
+//description: Creates a new input for sequence columns with ability to extract a region
+//input: string name
+//input: dynamic options
+//output: dynamic result
+export function sequenceColumnInput(name: string, options: any) : any {
+  return PackageFunctions.sequenceColumnInput(name, options);
+}
 //name: Sequence Activity Cliffs
 //description: Detects pairs of molecules with similar structure and significant difference in any given property
 //input: dataframe table { description: Input data table }

package/src/package.ts CHANGED Viewed

@@ -79,6 +79,8 @@ import {handleSequenceHeaderRendering} from './widgets/sequence-scrolling-widget
 import {PolymerType} from '@datagrok-libraries/js-draw-lite/src/types/org';
 import {BilnNotationProvider} from './utils/biln';
 import {showMonomerCollectionsView} from './utils/monomer-lib/monomer-collections-view';
+import {ISequenceColumnInput} from '@datagrok-libraries/bio/src/utils/sequence-column-input';
+import {SequenceColumnInput} from './utils/sequence-column-input';
 import * as api from './package-api';
 export const _package = new BioPackage(/*{debug: true}/**/);
@@ -445,7 +447,7 @@ export class PackageFunctions {
   @grok.decorators.func({
     name: 'Get Region Top Menu',
     description: 'Get sequences for a region specified from a Macromolecule',
-    'top-menu': 'Bio | Calculate | Get Region...',
+    'top-menu': 'Bio | Calculate | Extract Region...',
     editor: 'Bio:GetRegionEditor'})
   static async getRegionTopMenu(
     @grok.decorators.param({options: {description: 'Input data table'}})table: DG.DataFrame,
@@ -459,6 +461,43 @@ export class PackageFunctions {
     await grok.data.detectSemanticTypes(sequence.dataFrame); // to set renderer
   }
+  // -- Annotation menu entries --
+  @grok.decorators.func({
+    name: 'Apply Numbering Scheme',
+    description: 'Assigns antibody numbering (IMGT/Kabat/Chothia/AHo) using AntPack',
+    'top-menu': 'Bio | Annotate | Apply Numbering Scheme...',
+  })
+  static applyNumberingScheme(): void {
+    import('./utils/annotations/numbering-ui').then((m) => m.showNumberingSchemeDialog());
+  }
+  @grok.decorators.func({
+    name: 'Scan Liabilities',
+    description: 'Scans macromolecule sequences for deamidation, oxidation, and other liabilities',
+    'top-menu': 'Bio | Annotate | Scan Liabilities...',
+  })
+  static scanLiabilities(): void {
+    import('./utils/annotations/liability-scanner-ui').then((m) => m.showLiabilityScannerDialog());
+  }
+  @grok.decorators.func({
+    name: 'Manage Annotations',
+    description: 'View and manage sequence annotations on macromolecule columns',
+    'top-menu': 'Bio | Annotate | Manage Annotations...',
+  })
+  static manageAnnotations(): void {
+    import('./utils/annotations/annotation-manager-ui').then((m) => m.showAnnotationManagerDialog());
+  }
+  @grok.decorators.func({
+    name: 'Sequence Column Input',
+    description: 'Creates a new input for sequence columns with ability to extract a region',
+  })
+  static sequenceColumnInput(name: string, options: any): ISequenceColumnInput {
+    return SequenceColumnInput.create(name, options);
+  }
   @grok.decorators.func({
     name: 'Sequence Activity Cliffs',
     description: 'Detects pairs of molecules with similar structure and significant difference in any given property',

package/src/tests/substructure-filters-tests.ts CHANGED Viewed

@@ -428,6 +428,7 @@ category('bio-substructure-filters', async () => {
     await awaitGrid(view.grid);
     const seqFilter = fg.filters[0] as BioSubstructureFilter;
+    await awaitCheck(() => seqFilter.bioFilter !== null, 'FastaBioFilter hasn\'t been created', 1000);
     const seqBf = seqFilter.bioFilter as FastaBioFilter;
     await testEvent(df.onRowsFiltered, () => {}, () => {
       seqBf.props = new BioFilterProps(fSubStr, undefined, _package.logger);

package/src/utils/annotations/annotation-actions.ts ADDED Viewed

@@ -0,0 +1,130 @@
+import * as grok from 'datagrok-api/grok';
+import * as DG from 'datagrok-api/dg';
+import {AnnotationCategory, SeqAnnotationHit} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
+import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
+import {getAnnotationColumnName, getColumnAnnotations, cacheAllRowAnnotations} from './annotation-manager';
+/** Filters the DataFrame to show only rows that have at least one liability hit. */
+export function filterByLiabilityHits(df: DG.DataFrame, seqCol: DG.Column<string>): void {
+  const annotColName = getAnnotationColumnName(seqCol.name);
+  let annotCol: DG.Column<string> | null = null;
+  try {
+    annotCol = df.columns.byName(annotColName) as DG.Column<string>;
+  } catch { /* not found */ }
+  if (!annotCol) {
+    grok.shell.warning('No annotation data found. Run liability scanning first.');
+    return;
+  }
+  const rowData = cacheAllRowAnnotations(annotCol);
+  const bs = DG.BitSet.create(df.rowCount);
+  for (let i = 0; i < df.rowCount; i++) {
+    if (rowData[i] && rowData[i]!.length > 0)
+      bs.set(i, true);
+  }
+  df.filter.copyFrom(bs);
+  grok.shell.info(`Filtered to ${bs.trueCount} rows with liability hits`);
+}
+/** Selects all rows that contain a specific annotation hit. */
+export function selectRowsWithAnnotation(df: DG.DataFrame, seqCol: DG.Column<string>, annotationId: string): void {
+  const annotColName = getAnnotationColumnName(seqCol.name);
+  let annotCol: DG.Column<string> | null = null;
+  try {
+    annotCol = df.columns.byName(annotColName) as DG.Column<string>;
+  } catch { /* not found */ }
+  if (!annotCol) {
+    grok.shell.warning('No annotation data found.');
+    return;
+  }
+  const rowData = cacheAllRowAnnotations(annotCol);
+  const bs = DG.BitSet.create(df.rowCount);
+  for (let i = 0; i < df.rowCount; i++) {
+    if (rowData[i]?.some((h) => h.annotationId === annotationId))
+      bs.set(i, true);
+  }
+  df.selection.copyFrom(bs);
+  grok.shell.info(`Selected ${bs.trueCount} rows with ${annotationId} hits`);
+}
+/** Extracts a named region annotation as a new column.
+ *  Uses per-row region spans from the companion column when available (unaligned data),
+ *  falls back to column-level position names (aligned/MSA data). */
+export function extractAnnotatedRegion(
+  df: DG.DataFrame,
+  seqCol: DG.Column<string>,
+  annotationName: string,
+  seqHelper: ISeqHelper,
+): DG.Column<string> | null {
+  const annotations = getColumnAnnotations(seqCol);
+  const annot = annotations.find((a) =>
+    a.name === annotationName && a.category === AnnotationCategory.Structure);
+  if (!annot) {
+    grok.shell.warning(`Region annotation "${annotationName}" not found.`);
+    return null;
+  }
+  const sh = seqHelper.getSeqHandler(seqCol);
+  const colName = `${seqCol.name}(${annotationName})`;
+  // Try per-row extraction using companion column region spans
+  const annotColName = getAnnotationColumnName(seqCol.name);
+  let annotCol: DG.Column<string> | null = null;
+  try { annotCol = df.columns.byName(annotColName) as DG.Column<string>; } catch { /* not found */ }
+  if (annotCol) {
+    const allRowData = cacheAllRowAnnotations(annotCol);
+    const hasPerRowRegions = allRowData.some((rd) =>
+      rd?.some((h: SeqAnnotationHit) => h.annotationId === annot.id && h.endPositionIndex != null));
+    if (hasPerRowRegions) {
+      const regCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, colName, df.rowCount);
+      for (let i = 0; i < df.rowCount; i++) {
+        const rowHits = allRowData[i];
+        const regionHit = rowHits?.find((h: SeqAnnotationHit) =>
+          h.annotationId === annot.id && h.endPositionIndex != null);
+        if (regionHit) {
+          const splitted = sh.getSplitted(i);
+          const parts: string[] = [];
+          for (let p = regionHit.positionIndex; p <= regionHit.endPositionIndex!; p++) {
+            if (p < splitted.length)
+              parts.push(splitted.getOriginal(p));
+          }
+          regCol.set(i, parts.join(sh.separator || ''));
+        } else
+          regCol.set(i, '');
+      }
+      df.columns.add(regCol);
+      grok.data.detectSemanticTypes(df);
+      grok.shell.info(`Extracted region ${annotationName} as column "${colName}"`);
+      return regCol;
+    }
+  }
+  // Fall back to column-level position names (aligned/MSA data)
+  if (annot.start == null || annot.end == null) {
+    grok.shell.warning(`Region annotation "${annotationName}" has no position range.`);
+    return null;
+  }
+  const startIdx = sh.posList.indexOf(annot.start);
+  const endIdx = sh.posList.indexOf(annot.end);
+  if (startIdx < 0 || endIdx < 0) {
+    grok.shell.warning(`Position names "${annot.start}" or "${annot.end}" not found in position list.`);
+    return null;
+  }
+  const regCol = sh.getRegion(startIdx, endIdx, colName);
+  df.columns.add(regCol);
+  grok.data.detectSemanticTypes(df);
+  grok.shell.info(`Extracted region ${annotationName} as column "${colName}"`);
+  return regCol;
+}

package/src/utils/annotations/annotation-manager-ui.ts ADDED Viewed

@@ -0,0 +1,118 @@
+/* eslint-disable max-len */
+import * as grok from 'datagrok-api/grok';
+import * as ui from 'datagrok-api/ui';
+import * as DG from 'datagrok-api/dg';
+import {
+  SeqAnnotation, AnnotationCategory, LiabilitySeverity,
+} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
+import {getColumnAnnotations, setColumnAnnotations, clearAnnotations} from './annotation-manager';
+const categoryLabels: Record<string, string> = {
+  [AnnotationCategory.Structure]: 'Structure (FR/CDR)',
+  [AnnotationCategory.Liability]: 'Liability',
+  [AnnotationCategory.PTM]: 'Post-translational Modification',
+  [AnnotationCategory.Custom]: 'Custom',
+};
+const severityLabels: Record<string, string> = {
+  [LiabilitySeverity.High]: 'High',
+  [LiabilitySeverity.Medium]: 'Medium',
+  [LiabilitySeverity.Low]: 'Low',
+  [LiabilitySeverity.Info]: 'Info',
+};
+export function showAnnotationManagerDialog(): void {
+  const df = grok.shell.tv?.dataFrame;
+  if (!df) {
+    grok.shell.warning('No table open');
+    return;
+  }
+  const seqCols = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
+  if (seqCols.length === 0) {
+    grok.shell.warning('No macromolecule columns found');
+    return;
+  }
+  let selectedCol = seqCols[0];
+  const colInput = ui.input.column('Sequence Column', {
+    table: df, value: selectedCol,
+    filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,
+    onValueChanged: (col) => { selectedCol = col!; refreshList(); },
+  });
+  const listDiv = ui.divV([], {style: {maxHeight: '380px', overflowY: 'auto', paddingRight: '8px'}});
+  function refreshList(): void {
+    listDiv.innerHTML = '';
+    const annotations = getColumnAnnotations(selectedCol);
+    if (annotations.length === 0) {
+      listDiv.append(ui.divText('No annotations on this column.', {style: {color: '#888', padding: '8px'}}));
+      return;
+    }
+    for (const annot of annotations) {
+      const catLabel = categoryLabels[annot.category] ?? annot.category;
+      const sevLabel = annot.severity ? ` [${severityLabels[annot.severity] ?? annot.severity}]` : '';
+      const rangeLabel = annot.start && annot.end ? ` (${annot.start}-${annot.end})` : '';
+      const schemeLabel = annot.sourceScheme ? ` ${annot.sourceScheme}` : '';
+      const removeBtn = ui.iconFA('trash', () => {
+        const updated = getColumnAnnotations(selectedCol).filter((a) => a.id !== annot.id);
+        setColumnAnnotations(selectedCol, updated);
+        df.fireValuesChanged();
+        refreshList();
+      });
+      removeBtn.style.cursor = 'pointer';
+      removeBtn.style.color = '#999';
+      removeBtn.style.marginLeft = '8px';
+      const originalColor = annot.color ?? '#ccc';
+      let currentColor = originalColor;
+      const colorSwatch = ui.div([], {style: {
+        width: '12px', height: '12px', borderRadius: '2px',
+        backgroundColor: currentColor, display: 'inline-block', marginRight: '6px',
+        flexShrink: '0', cursor: 'pointer',
+      }});
+      ui.colorPicker(DG.Color.fromHtml(annot.color ?? '#ccc'), (newColor) => {
+        currentColor = DG.Color.toHtml(newColor);
+      }, colorSwatch, () => {
+        const updated = getColumnAnnotations(selectedCol).map((a) => a.id === annot.id ? {...a, color: currentColor} : a);
+        setColumnAnnotations(selectedCol, updated);
+        df.fireValuesChanged();
+        refreshList();
+      }, () => {
+        currentColor = originalColor;
+        colorSwatch.style.backgroundColor = currentColor;
+      });
+      const row = ui.divH([
+        colorSwatch,
+        ui.divText(`${annot.name}${rangeLabel}${schemeLabel}${sevLabel}`, {style: {flex: '1', fontSize: '12px', padding: '4px'}}),
+        ui.divText(catLabel, {style: {color: '#888', fontSize: '11px', marginRight: '8px'}}),
+        removeBtn,
+      ], {style: {alignItems: 'center', padding: '4px 0', borderBottom: '1px solid #eee'}});
+      listDiv.append(row);
+    }
+  }
+  refreshList();
+  const clearBtn = ui.button('Clear All', () => {
+    clearAnnotations(df, selectedCol);
+    df.fireValuesChanged();
+    refreshList();
+    grok.shell.info('All annotations cleared');
+  });
+  const dialog = ui.dialog({title: 'Manage Annotations'})
+    .add(ui.inputs([colInput]))
+    .add(ui.h3('Annotations'))
+    .add(listDiv)
+    .add(ui.divH([clearBtn], {style: {marginTop: '8px'}}))
+    .onOK(() => {});
+  dialog.show();
+}

package/src/utils/annotations/annotation-manager.ts ADDED Viewed

@@ -0,0 +1,163 @@
+import * as DG from 'datagrok-api/dg';
+import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
+import {
+  SeqAnnotation, SeqAnnotationHit, RowAnnotationData,
+  AnnotationCategory, AnnotationVisualType,
+} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
+import {SeqRegion} from '../get-region-func-editor';
+/** Prefix for hidden companion annotation columns (~ hides them in Datagrok). */
+const ANNOTATION_COL_PREFIX = '~';
+/** Reads column-level annotations from the `.annotations` tag.
+ *  Falls back to `.regions` for backward compatibility. */
+export function getColumnAnnotations(col: DG.Column<string>): SeqAnnotation[] {
+  const annotationsTag = col.getTag(bioTAGS.annotations);
+  if (annotationsTag) {
+    try {
+      return JSON.parse(annotationsTag) as SeqAnnotation[];
+    } catch { /* fall through */ }
+  }
+  // Backward compat: convert legacy .regions to SeqAnnotation[]
+  const regionsTag = col.getTag(bioTAGS.regions);
+  if (regionsTag) {
+    try {
+      const regions: SeqRegion[] = JSON.parse(regionsTag);
+      return regions.map((r, i) => ({
+        id: `legacy-region-${i}`,
+        name: r.name,
+        description: r.description,
+        start: r.start,
+        end: r.end,
+        visualType: AnnotationVisualType.Region,
+        category: AnnotationCategory.Structure,
+        autoGenerated: true,
+      }));
+    } catch { /* ignore */ }
+  }
+  return [];
+}
+/** Writes column-level annotations to the `.annotations` tag.
+ *  Also keeps `.regions` in sync for backward compatibility with GetRegionFuncEditor. */
+export function setColumnAnnotations(col: DG.Column<string>, annotations: SeqAnnotation[]): void {
+  col.setTag(bioTAGS.annotations, JSON.stringify(annotations));
+  // Keep .regions in sync with structure annotations
+  const structureAnnotations = annotations.filter((a) => a.category === AnnotationCategory.Structure);
+  if (structureAnnotations.length > 0) {
+    const regions: SeqRegion[] = structureAnnotations
+      .filter((a) => a.start != null && a.end != null)
+      .map((a) => ({
+        name: a.name,
+        description: a.description ?? '',
+        start: a.start!,
+        end: a.end!,
+      }));
+    col.setTag(bioTAGS.regions, JSON.stringify(regions));
+  }
+}
+/** Returns the name for the hidden companion annotation column. */
+export function getAnnotationColumnName(seqColName: string): string {
+  return `${ANNOTATION_COL_PREFIX}${seqColName}_annotations`;
+}
+/** Gets or creates the hidden companion column for per-row annotation hits. */
+export function getOrCreateAnnotationColumn(df: DG.DataFrame, seqCol: DG.Column<string>): DG.Column<string> {
+  const colName = getAnnotationColumnName(seqCol.name);
+  let col = df.columns.byName(colName);
+  if (!col) {
+    col = df.columns.addNewString(colName);
+    seqCol.setTag(bioTAGS.annotationColumnName, colName);
+  }
+  return col as DG.Column<string>;
+}
+/** Reads per-row annotation hits from the companion column. Uses version-based caching. */
+const _rowDataCache = new WeakMap<DG.Column, {version: number; data:(RowAnnotationData | null)[]}>();
+export function getRowAnnotations(annotCol: DG.Column<string>, rowIdx: number): RowAnnotationData | null {
+  const cached = _rowDataCache.get(annotCol);
+  if (cached && cached.version === annotCol.version) {
+    if (cached.data[rowIdx] !== undefined)
+      return cached.data[rowIdx];
+  }
+  // Parse this row
+  const raw = annotCol.get(rowIdx);
+  if (!raw) return null;
+  try {
+    return JSON.parse(raw) as RowAnnotationData;
+  } catch { return null; }
+}
+/** Parses and caches all row annotations for the column. Call once when version changes. */
+export function cacheAllRowAnnotations(annotCol: DG.Column<string>): (RowAnnotationData | null)[] {
+  const cached = _rowDataCache.get(annotCol);
+  if (cached && cached.version === annotCol.version)
+    return cached.data;
+  const data: (RowAnnotationData | null)[] = new Array(annotCol.length);
+  for (let i = 0; i < annotCol.length; i++) {
+    const raw = annotCol.get(i);
+    if (!raw) {
+      data[i] = null;
+      continue;
+    }
+    try {
+      data[i] = JSON.parse(raw) as RowAnnotationData;
+    } catch {
+      data[i] = null;
+    }
+  }
+  _rowDataCache.set(annotCol, {version: annotCol.version, data});
+  return data;
+}
+/** Writes per-row annotation hits. */
+export function setRowAnnotations(annotCol: DG.Column<string>, rowIdx: number, hits: SeqAnnotationHit[]): void {
+  annotCol.set(rowIdx, hits.length > 0 ? JSON.stringify(hits) : '');
+}
+/** Clears all annotations from a column (both column-level and row-level). */
+export function clearAnnotations(df: DG.DataFrame, seqCol: DG.Column<string>): void {
+  seqCol.setTag(bioTAGS.annotations, '');
+  const annotColName = getAnnotationColumnName(seqCol.name);
+  const annotCol = df.columns.byName(annotColName);
+  if (annotCol)
+    df.columns.remove(annotColName);
+  // Clear .regions too
+  seqCol.setTag(bioTAGS.regions, '');
+}
+/** Adds an annotation to the column-level list. */
+export function addColumnAnnotation(col: DG.Column<string>, annotation: SeqAnnotation): void {
+  const existing = getColumnAnnotations(col);
+  existing.push(annotation);
+  setColumnAnnotations(col, existing);
+}
+/** Removes an annotation by id from the column-level list. */
+export function removeColumnAnnotation(col: DG.Column<string>, annotationId: string): void {
+  const existing = getColumnAnnotations(col).filter((a) => a.id !== annotationId);
+  setColumnAnnotations(col, existing);
+}
+/** Merges row-level annotation hits by replacing hits of one kind while preserving the rest.
+ *  @param existingHits Current per-row hits
+ *  @param newHits New hits to add
+ *  @param replaceRegions If true, removes existing region span hits (endPositionIndex set) before merging.
+ *  @param replaceLiabilities If true, removes existing non-region hits before merging. */
+export function mergeRowHits(
+  existingHits: SeqAnnotationHit[],
+  newHits: SeqAnnotationHit[],
+  replaceRegions: boolean,
+  replaceLiabilities: boolean,
+): SeqAnnotationHit[] {
+  let kept = existingHits;
+  if (replaceRegions)
+    kept = kept.filter((h) => h.endPositionIndex == null);
+  if (replaceLiabilities)
+    kept = kept.filter((h) => h.endPositionIndex != null);
+  return [...kept, ...newHits];
+}

package/src/utils/annotations/liability-scanner-ui.ts ADDED Viewed

@@ -0,0 +1,88 @@
+import * as grok from 'datagrok-api/grok';
+import * as ui from 'datagrok-api/ui';
+import * as DG from 'datagrok-api/dg';
+import {_package} from '../../package';
+import {
+  BUILTIN_LIABILITY_RULES, LiabilityRule, scanLiabilities,
+  applyLiabilityScanResults, createLiabilitySummaryColumn,
+} from './liability-scanner';
+import {LiabilitySeverity} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';
+const severityLabels: Record<string, string> = {
+  [LiabilitySeverity.High]: 'High',
+  [LiabilitySeverity.Medium]: 'Medium',
+  [LiabilitySeverity.Low]: 'Low',
+  [LiabilitySeverity.Info]: 'Info',
+};
+export function showLiabilityScannerDialog(): void {
+  const df = grok.shell.tv?.dataFrame;
+  if (!df) {
+    grok.shell.warning('No table open');
+    return;
+  }
+  const seqCols = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
+  if (seqCols.length === 0) {
+    grok.shell.warning('No macromolecule columns found');
+    return;
+  }
+  const rules = BUILTIN_LIABILITY_RULES.map((r) => ({...r, pattern: new RegExp(r.pattern.source, 'g')}));
+  const tableInput = ui.input.table('Table', {value: df});
+  const seqInput = ui.input.column('Sequence', {
+    table: df, value: seqCols[0],
+    filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,
+  });
+  // Rule checkboxes
+  const ruleChecks: {rule: LiabilityRule; input: DG.InputBase<boolean>}[] = [];
+  const rulesDiv = ui.divV([]);
+  for (const rule of rules) {
+    const check = ui.input.bool(rule.name, {
+      value: rule.enabled,
+      tooltipText: `Severity: ${severityLabels[rule.severity] ?? rule.severity}`,
+    });
+    ruleChecks.push({rule, input: check});
+    rulesDiv.append(check.root);
+  }
+  const highlightInput = ui.input.bool('Highlight in cell renderer', {value: true});
+  const annotColInput = ui.input.bool('Create annotation column', {value: true});
+  const summaryInput = ui.input.bool('Create summary count column', {value: false});
+  const dialog = ui.dialog({title: 'Scan Sequence Liabilities'})
+    .add(ui.inputs([tableInput, seqInput]))
+    .add(ui.h3('Rules'))
+    .add(rulesDiv)
+    .add(ui.h3('Output'))
+    .add(ui.inputs([highlightInput, annotColInput, summaryInput]))
+    .onOK(() => {
+      try {
+        const seqCol = seqInput.value!;
+        const sh = _package.seqHelper.getSeqHandler(seqCol);
+        // Apply checkbox state
+        for (const {rule, input} of ruleChecks)
+          rule.enabled = input.value;
+        const result = scanLiabilities(seqCol, sh, rules);
+        if (annotColInput.value || highlightInput.value)
+          applyLiabilityScanResults(df, seqCol, result);
+        if (summaryInput.value)
+          createLiabilitySummaryColumn(df, seqCol, result);
+        grok.shell.info(`Liability scan: ${result.totalHits} hits found across ${result.annotations.length} rules`);
+        df.fireValuesChanged();
+      } catch (err: any) {
+        grok.shell.error(`Liability scan failed: ${err.message ?? err}`);
+        console.error(err);
+      }
+    });
+  dialog.show();
+}