@datagrok/bio 2.26.8 → 2.27.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/CLAUDE.md +35 -5
- package/detectors.js +4 -2
- package/dist/287.js +1 -1
- package/dist/287.js.map +1 -1
- package/dist/422.js +1 -1
- package/dist/422.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/package.json +2 -2
- package/scripts/mol-to-helm.py +642 -170
- package/src/analysis/sequence-activity-cliffs.ts +8 -6
- package/src/package-api.ts +9 -2
- package/src/package.g.ts +12 -0
- package/src/package.ts +23 -9
- package/src/tests/msa-tests.ts +6 -2
- package/src/utils/annotations/annotation-manager-ui.ts +1 -1
- package/src/utils/constants.ts +3 -7
- package/src/utils/monomer-lib/library-file-manager/ui.ts +1 -1
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +1 -1
- package/src/utils/multiple-sequence-alignment-ui.ts +403 -194
- package/src/utils/pepsea.ts +138 -116
- package/src/utils/types.ts +7 -4
- package/test-console-output-1.log +584 -615
- package/test-record-1.mp4 +0 -0
package/CHANGELOG.md
CHANGED
package/CLAUDE.md
CHANGED
|
@@ -44,7 +44,7 @@ Other packages depend on these implementations: **Helm**, **Peptides**, **Biostr
|
|
|
44
44
|
|---|---|---|
|
|
45
45
|
| `Bio \| Analyze \| Activity Cliffs...` | `activityCliffs` | Detects sequence pairs with similar structure but significant activity difference |
|
|
46
46
|
| `Bio \| Analyze \| Sequence Space...` | `sequenceSpaceTopMenu` | UMAP/tSNE 2D projection of sequences by pairwise distance |
|
|
47
|
-
| `Bio \| Analyze \| MSA...` | `multipleSequenceAlignmentDialog` | Multiple sequence alignment via kalign (WASM) or PepSeA
|
|
47
|
+
| `Bio \| Analyze \| MSA...` | `multipleSequenceAlignmentDialog` | Multiple sequence alignment via kalign (WASM) for canonical sequences, or dynamically discovered engines (e.g. PepSeA Docker) for non-canonical |
|
|
48
48
|
| `Bio \| Analyze \| Composition` | `compositionAnalysis` | Docks a WebLogo viewer for sequence composition |
|
|
49
49
|
| `Bio \| Transform \| Convert Sequence Notation...` | `convertDialog` | FASTA ↔ SEPARATOR ↔ HELM ↔ BILN conversion |
|
|
50
50
|
| `Bio \| Transform \| To Atomic Level...` | `toAtomicLevel` | Converts sequences to V3000 molfiles |
|
|
@@ -177,9 +177,38 @@ Key methods: `detectSeparator()`, `detectAlphabet()`, `getAlphabetSimilarity()`,
|
|
|
177
177
|
|
|
178
178
|
| File | Purpose |
|
|
179
179
|
|---|---|
|
|
180
|
-
| `multiple-sequence-alignment.ts` | `
|
|
181
|
-
| `multiple-sequence-alignment-ui.ts` | `multipleSequenceAlignmentUI()` — MSA dialog with column selection,
|
|
182
|
-
| `pepsea.ts` | `
|
|
180
|
+
| `multiple-sequence-alignment.ts` | `runKalign()` — core MSA via **kalign** (WebAssembly/Aioli). Supports per-cluster alignment, gap penalties, selected-rows-only mode. Used for canonical sequences (DNA/RNA/PT). |
|
|
181
|
+
| `multiple-sequence-alignment-ui.ts` | `multipleSequenceAlignmentUI()` — MSA dialog with column selection, mode switching (kalign for canonical, dynamically discovered engines for non-canonical), per-cluster alignment, selected-rows-only. |
|
|
182
|
+
| `pepsea.ts` | `alignWithPepsea()` / `runPepsea()` — MSA for HELM peptides via **PepSeA Docker container** (mafft/linsi/ginsi methods). Registered as a `sequenceMSA` engine via `pepseaMsa()` in `package.ts`. |
|
|
183
|
+
|
|
184
|
+
##### Adding a New MSA Engine
|
|
185
|
+
|
|
186
|
+
Non-canonical MSA engines are discovered dynamically via `DG.Func.find({meta: {role: 'sequenceMSA'}})`.
|
|
187
|
+
To add a new alignment engine (in this or any other package):
|
|
188
|
+
|
|
189
|
+
1. Register a function with `meta.role: 'sequenceMSA'` and `outputs: [{name: 'result', type: 'column'}]`.
|
|
190
|
+
2. The **first parameter** must be a `column` input with `semType: 'Macromolecule'` — the sequences to align.
|
|
191
|
+
3. All **remaining parameters** are engine-specific configuration (method, gap penalties, etc.) and will be rendered automatically in the MSA dialog under "Alignment parameters".
|
|
192
|
+
4. The function must **create and return** the aligned output column with appropriate metadata tags (`meta.units`, `semType`, `aligned`, `alphabet`, `separator`, etc.). Different engines can produce different output notations (e.g. PepSeA produces separator notation with `.` delimiter).
|
|
193
|
+
5. The MSA dialog handles clustering and row selection — the engine function receives a single column to align.
|
|
194
|
+
|
|
195
|
+
Example (decorator style):
|
|
196
|
+
```typescript
|
|
197
|
+
@grok.decorators.func({
|
|
198
|
+
name: 'My Aligner',
|
|
199
|
+
description: 'Custom MSA engine',
|
|
200
|
+
meta: {role: 'sequenceMSA'},
|
|
201
|
+
outputs: [{name: 'result', type: 'column'}],
|
|
202
|
+
})
|
|
203
|
+
static async myAligner(
|
|
204
|
+
@grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) sequenceCol: DG.Column<string>,
|
|
205
|
+
@grok.decorators.param({type: 'double'}) gapOpen: number = 1.0,
|
|
206
|
+
): Promise<DG.Column<string>> {
|
|
207
|
+
// Align sequences, create result column with metadata tags, return it.
|
|
208
|
+
}
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
See `pepseaMsa()` in `package.ts` and `alignWithPepsea()` in `pepsea.ts` for a complete reference implementation.
|
|
183
212
|
|
|
184
213
|
#### Seq Helper — `src/utils/seq-helper/`
|
|
185
214
|
|
|
@@ -351,7 +380,8 @@ Test entry point: `src/package-test.ts` — imports all test files, exports `tes
|
|
|
351
380
|
| Sequence space (UMAP/tSNE) | `src/analysis/sequence-space.ts` |
|
|
352
381
|
| Similarity/diversity viewers | `src/analysis/sequence-similarity-viewer.ts` / `sequence-diversity-viewer.ts` |
|
|
353
382
|
| MSA (kalign) | `src/utils/multiple-sequence-alignment.ts` |
|
|
354
|
-
| MSA (PepSeA Docker) | `src/utils/pepsea.ts` |
|
|
383
|
+
| MSA (PepSeA Docker) | `src/utils/pepsea.ts` + `pepseaMsa()` in `src/package.ts` |
|
|
384
|
+
| Adding MSA engines | See "Adding a New MSA Engine" in Multiple Sequence Alignment section |
|
|
355
385
|
| Notation conversion | `src/utils/convert.ts` |
|
|
356
386
|
| Seq → molfile conversion | `src/utils/sequence-to-mol.ts` |
|
|
357
387
|
| HELM → molfile pipeline | `src/utils/helm-to-molfile/converter/` |
|
package/detectors.js
CHANGED
|
@@ -333,7 +333,8 @@ class BioPackageDetectors extends DG.Package {
|
|
|
333
333
|
// const forbidden = this.checkForbiddenWoSeparator(stats.freq);
|
|
334
334
|
col.meta.units = units;
|
|
335
335
|
if (separator) col.setTag(SeqHandler.TAGS.separator, separator);
|
|
336
|
-
col.
|
|
336
|
+
if (!col.getTag(SeqHandler.TAGS.aligned))
|
|
337
|
+
col.setTag(SeqHandler.TAGS.aligned, aligned);
|
|
337
338
|
col.setTag(SeqHandler.TAGS.alphabet, alphabet);
|
|
338
339
|
if (alphabet === ALPHABET.UN) {
|
|
339
340
|
// alphabetSize calculated on (sub)sample of data is incorrect
|
|
@@ -706,7 +707,8 @@ class BioPackageDetectors extends DG.Package {
|
|
|
706
707
|
const isPotentiallyMSA = averageLength > 1 && std < 2; // if the average length is more than 1 and the std is less than 0.5, then it is potentially MSA
|
|
707
708
|
column.setTag('units', notationInput.value);
|
|
708
709
|
separatorInput.value && column.setTag('separator', separatorInput.value);
|
|
709
|
-
column.
|
|
710
|
+
if (!column.getTag('aligned'))
|
|
711
|
+
column.setTag('aligned', isPotentiallyMSA ? 'SEQ.MSA' : 'SEQ');
|
|
710
712
|
column.setTag('alphabet', defaultAlphabet);
|
|
711
713
|
isMultichar && column.setTag('.alphabetIsMultichar', 'true');
|
|
712
714
|
column.semType = 'Macromolecule';
|
package/dist/287.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
"use strict";(self.webpackChunkbio=self.webpackChunkbio||[]).push([[287],{9287(e,o,t){t.d(o,{showAnnotationManagerDialog:()=>d});var n=t(4328),i=t(7389),l=t(6082),a=t(4517),r=t(3736);const s={[a.eI.Structure]:"Structure (FR/CDR)",[a.eI.Liability]:"Liability",[a.eI.PTM]:"Post-translational Modification",[a.eI.Custom]:"Custom"},c={[a.Hq.High]:"High",[a.Hq.Medium]:"Medium",[a.Hq.Low]:"Low",[a.Hq.Info]:"Info"};function d(){const e=n.shell.tv?.dataFrame;if(!e)return void n.shell.warning("No table open");const o=e.columns.bySemTypeAll(l.SEMTYPE.MACROMOLECULE);if(0===o.length)return void n.shell.warning("No macromolecule columns found");let t=o[0];const a=i.input.column("Sequence Column",{table:e,value:t,filter:e=>e.semType===l.SEMTYPE.MACROMOLECULE,onValueChanged:e=>{t=e,u()}}),d=i.divV([],{style:{maxHeight:"380px",overflowY:"auto",paddingRight:"8px"}});function u(){d.innerHTML="";const o=(0,r.Ln)(t);if(0!==o.length)for(const n of o){const o=s[n.category]??n.category,a=n.severity?` [${c[n.severity]??n.severity}]`:"",p=n.start&&n.end?` (${n.start}-${n.end})`:"",g=n.sourceScheme?` ${n.sourceScheme}`:"",h=i.iconFA("trash",()=>{const o=(0,r.Ln)(t).filter(e=>e.id!==n.id);(0,r.fh)(t,o),e.fireValuesChanged(),u()});h.style.cursor="pointer",h.style.color="#999",h.style.marginLeft="8px";const f=n.color??"#ccc";let m=f;const y=i.div([],{style:{width:"12px",height:"12px",borderRadius:"2px",backgroundColor:m,display:"inline-block",marginRight:"6px",flexShrink:"0",cursor:"pointer"}});i.colorPicker(l.Color.fromHtml(n.color??"#ccc"),e=>{m=l.Color.toHtml(e)},y,()=>{const o=(0,r.Ln)(t).map(e=>e.id===n.id?{...e,color:m}:e);(0,r.fh)(t,o),e.fireValuesChanged(),u()},()=>{m=f,y.style.backgroundColor=m});const x=i.divH([y,i.divText(`${n.name}${p}${g}${a}`,{style:{flex:"1",fontSize:"12px",padding:"4px"}}),i.divText(o,{style:{color:"#888",fontSize:"11px",marginRight:"8px"}}),h],{style:{alignItems:"center",padding:"4px 0",borderBottom:"1px solid #eee"}});d.append(x)}else d.append(i.divText("No annotations on this column.",{style:{color:"#888",padding:"8px"}}))}u();const p=i.button("Clear All",()=>{(0,r.OW)(e,t),e.fireValuesChanged(),u(),n.shell.info("All annotations cleared")});i.dialog({title:"Manage Annotations"}).add(i.inputs([a])).add(i.h3("Annotations")).add(d).add(i.divH([p],{style:{marginTop:"8px"}})).onOK(()=>{}).show()}}}]);
|
|
1
|
+
"use strict";(self.webpackChunkbio=self.webpackChunkbio||[]).push([[287],{9287(e,o,t){t.d(o,{showAnnotationManagerDialog:()=>d});var n=t(4328),i=t(7389),l=t(6082),a=t(4517),r=t(3736);const s={[a.eI.Structure]:"Structure (FR/CDR)",[a.eI.Liability]:"Liability",[a.eI.PTM]:"Post-translational Modification",[a.eI.Custom]:"Custom"},c={[a.Hq.High]:"High",[a.Hq.Medium]:"Medium",[a.Hq.Low]:"Low",[a.Hq.Info]:"Info"};function d(){const e=n.shell.tv?.dataFrame;if(!e)return void n.shell.warning("No table open");const o=e.columns.bySemTypeAll(l.SEMTYPE.MACROMOLECULE);if(0===o.length)return void n.shell.warning("No macromolecule columns found");let t=o[0];const a=i.input.column("Sequence Column",{table:e,value:t,filter:e=>e.semType===l.SEMTYPE.MACROMOLECULE,onValueChanged:e=>{t=e,u()}}),d=i.divV([],{style:{maxHeight:"380px",overflowY:"auto",paddingRight:"8px"}});function u(){d.innerHTML="";const o=(0,r.Ln)(t);if(0!==o.length)for(const n of o){const o=s[n.category]??n.category,a=n.severity?` [${c[n.severity]??n.severity}]`:"",p=n.start&&n.end?` (${n.start}-${n.end})`:"",g=n.sourceScheme?` ${n.sourceScheme}`:"",h=i.iconFA("trash",()=>{const o=(0,r.Ln)(t).filter(e=>e.id!==n.id);(0,r.fh)(t,o),e.fireValuesChanged(),u()},"Delete");h.style.cursor="pointer",h.style.color="#999",h.style.marginLeft="8px";const f=n.color??"#ccc";let m=f;const y=i.div([],{style:{width:"12px",height:"12px",borderRadius:"2px",backgroundColor:m,display:"inline-block",marginRight:"6px",flexShrink:"0",cursor:"pointer"}});i.colorPicker(l.Color.fromHtml(n.color??"#ccc"),e=>{m=l.Color.toHtml(e)},y,()=>{const o=(0,r.Ln)(t).map(e=>e.id===n.id?{...e,color:m}:e);(0,r.fh)(t,o),e.fireValuesChanged(),u()},()=>{m=f,y.style.backgroundColor=m});const x=i.divH([y,i.divText(`${n.name}${p}${g}${a}`,{style:{flex:"1",fontSize:"12px",padding:"4px"}}),i.divText(o,{style:{color:"#888",fontSize:"11px",marginRight:"8px"}}),h],{style:{alignItems:"center",padding:"4px 0",borderBottom:"1px solid #eee"}});d.append(x)}else d.append(i.divText("No annotations on this column.",{style:{color:"#888",padding:"8px"}}))}u();const p=i.button("Clear All",()=>{(0,r.OW)(e,t),e.fireValuesChanged(),u(),n.shell.info("All annotations cleared")});i.dialog({title:"Manage Annotations"}).add(i.inputs([a])).add(i.h3("Annotations")).add(d).add(i.divH([p],{style:{marginTop:"8px"}})).onOK(()=>{}).show()}}}]);
|
|
2
2
|
//# sourceMappingURL=287.js.map
|
package/dist/287.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"287.js","mappings":"uLAUA,MAAMA,EAAyC,CAC7C,CAAC,KAAmBC,WAAY,qBAChC,CAAC,KAAmBC,WAAY,YAChC,CAAC,KAAmBC,KAAM,kCAC1B,CAAC,KAAmBC,QAAS,UAGzBC,EAAyC,CAC7C,CAAC,KAAkBC,MAAO,OAC1B,CAAC,KAAkBC,QAAS,SAC5B,CAAC,KAAkBC,KAAM,MACzB,CAAC,KAAkBC,MAAO,QAGrB,SAASC,IACd,MAAMC,EAAK,QAAWC,IAAIC,UAC1B,IAAKF,EAEH,YADA,QAAWG,QAAQ,iBAIrB,MAAMC,EAAUJ,EAAGK,QAAQC,aAAa,UAAWC,eACnD,GAAuB,IAAnBH,EAAQI,OAEV,YADA,QAAWL,QAAQ,kCAIrB,IAAIM,EAAcL,EAAQ,GAC1B,MAAMM,EAAW,QAASC,OAAO,kBAAmB,CAClDC,MAAOZ,EAAIa,MAAOJ,EAClBK,OAASC,GAAmBA,EAAIC,UAAY,UAAWT,cACvDU,eAAiBF,IAAUN,EAAcM,EAAMG,OAG3CC,EAAU,OAAQ,GAAI,CAACC,MAAO,CAACC,UAAW,QAASC,UAAW,OAAQC,aAAc,SAE1F,SAASL,IACPC,EAAQK,UAAY,GACpB,MAAMC,GAAc,QAAqBhB,GACzC,GAA2B,IAAvBgB,EAAYjB,OAKhB,IAAK,MAAMkB,KAASD,EAAa,CAC/B,MAAME,EAAWtC,EAAeqC,EAAME,WAAaF,EAAME,SACnDC,EAAWH,EAAMI,SAAW,KAAKpC,EAAegC,EAAMI,WAAaJ,EAAMI,YAAc,GACvFC,EAAaL,EAAMM,OAASN,EAAMO,IAAM,KAAKP,EAAMM,SAASN,EAAMO,OAAS,GAC3EC,EAAcR,EAAMS,aAAe,IAAIT,EAAMS,eAAiB,GAE9DC,EAAY,SAAU,QAAS,KACnC,MAAMC,GAAU,QAAqB5B,GAAaK,OAAQwB,GAAMA,EAAEC,KAAOb,EAAMa,KAC/E,QAAqB9B,EAAa4B,GAClCrC,EAAGwC,oBACHtB,
|
|
1
|
+
{"version":3,"file":"287.js","mappings":"uLAUA,MAAMA,EAAyC,CAC7C,CAAC,KAAmBC,WAAY,qBAChC,CAAC,KAAmBC,WAAY,YAChC,CAAC,KAAmBC,KAAM,kCAC1B,CAAC,KAAmBC,QAAS,UAGzBC,EAAyC,CAC7C,CAAC,KAAkBC,MAAO,OAC1B,CAAC,KAAkBC,QAAS,SAC5B,CAAC,KAAkBC,KAAM,MACzB,CAAC,KAAkBC,MAAO,QAGrB,SAASC,IACd,MAAMC,EAAK,QAAWC,IAAIC,UAC1B,IAAKF,EAEH,YADA,QAAWG,QAAQ,iBAIrB,MAAMC,EAAUJ,EAAGK,QAAQC,aAAa,UAAWC,eACnD,GAAuB,IAAnBH,EAAQI,OAEV,YADA,QAAWL,QAAQ,kCAIrB,IAAIM,EAAcL,EAAQ,GAC1B,MAAMM,EAAW,QAASC,OAAO,kBAAmB,CAClDC,MAAOZ,EAAIa,MAAOJ,EAClBK,OAASC,GAAmBA,EAAIC,UAAY,UAAWT,cACvDU,eAAiBF,IAAUN,EAAcM,EAAMG,OAG3CC,EAAU,OAAQ,GAAI,CAACC,MAAO,CAACC,UAAW,QAASC,UAAW,OAAQC,aAAc,SAE1F,SAASL,IACPC,EAAQK,UAAY,GACpB,MAAMC,GAAc,QAAqBhB,GACzC,GAA2B,IAAvBgB,EAAYjB,OAKhB,IAAK,MAAMkB,KAASD,EAAa,CAC/B,MAAME,EAAWtC,EAAeqC,EAAME,WAAaF,EAAME,SACnDC,EAAWH,EAAMI,SAAW,KAAKpC,EAAegC,EAAMI,WAAaJ,EAAMI,YAAc,GACvFC,EAAaL,EAAMM,OAASN,EAAMO,IAAM,KAAKP,EAAMM,SAASN,EAAMO,OAAS,GAC3EC,EAAcR,EAAMS,aAAe,IAAIT,EAAMS,eAAiB,GAE9DC,EAAY,SAAU,QAAS,KACnC,MAAMC,GAAU,QAAqB5B,GAAaK,OAAQwB,GAAMA,EAAEC,KAAOb,EAAMa,KAC/E,QAAqB9B,EAAa4B,GAClCrC,EAAGwC,oBACHtB,KACC,UACHkB,EAAUhB,MAAMqB,OAAS,UACzBL,EAAUhB,MAAMsB,MAAQ,OACxBN,EAAUhB,MAAMuB,WAAa,MAC7B,MAAMC,EAAgBlB,EAAMgB,OAAS,OACrC,IAAIG,EAAeD,EACnB,MAAME,EAAc,MAAO,GAAI,CAAC1B,MAAO,CACrC2B,MAAO,OAAQC,OAAQ,OAAQC,aAAc,MAC7CC,gBAAiBL,EAAcM,QAAS,eAAgBC,YAAa,MACrEC,WAAY,IAAKZ,OAAQ,aAG3B,cAAe,QAASa,SAAS5B,EAAMgB,OAAS,QAAUa,IACxDV,EAAe,QAASW,OAAOD,IAC9BT,EAAa,KACd,MAAMT,GAAU,QAAqB5B,GAAagD,IAAKnB,GAAMA,EAAEC,KAAOb,EAAMa,GAAK,IAAID,EAAGI,MAAOG,GAAgBP,IAC/G,QAAqB7B,EAAa4B,GAClCrC,EAAGwC,oBACHtB,KACC,KACD2B,EAAeD,EACfE,EAAY1B,MAAM8B,gBAAkBL,IAGtC,MAAMa,EAAM,OAAQ,CAClBZ,EACA,UAAW,GAAGpB,EAAMiC,OAAO5B,IAAaG,IAAcL,IAAY,CAACT,MAAO,CAACwC,KAAM,IAAKC,SAAU,OAAQC,QAAS,SACjH,UAAWnC,EAAU,CAACP,MAAO,CAACsB,MAAO,OAAQmB,SAAU,OAAQT,YAAa,SAC5EhB,GACC,CAAChB,MAAO,CAAC2C,WAAY,SAAUD,QAAS,QAASE,aAAc,oBAElE7C,EAAQ8C,OAAOP,EACjB,MA/CEvC,EAAQ8C,OAAO,UAAW,iCAAkC,CAAC7C,MAAO,CAACsB,MAAO,OAAQoB,QAAS,SAgDjG,CAEA5C,IAEA,MAAMgD,EAAW,SAAU,YAAa,MACtC,QAAiBlE,EAAIS,GACrBT,EAAGwC,oBACHtB,IACA,QAAWiD,KAAK,6BAGH,SAAU,CAACC,MAAO,uBAC9BC,IAAI,SAAU,CAAC3D,KACf2D,IAAI,KAAM,gBACVA,IAAIlD,GACJkD,IAAI,OAAQ,CAACH,GAAW,CAAC9C,MAAO,CAACkD,UAAW,UAC5CC,KAAK,QAEDC,MACT,C","sources":["webpack://bio/./src/utils/annotations/annotation-manager-ui.ts"],"sourcesContent":["/* eslint-disable max-len */\nimport * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {\n SeqAnnotation, AnnotationCategory, LiabilitySeverity,\n} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';\nimport {getColumnAnnotations, setColumnAnnotations, clearAnnotations} from './annotation-manager';\n\nconst categoryLabels: Record<string, string> = {\n [AnnotationCategory.Structure]: 'Structure (FR/CDR)',\n [AnnotationCategory.Liability]: 'Liability',\n [AnnotationCategory.PTM]: 'Post-translational Modification',\n [AnnotationCategory.Custom]: 'Custom',\n};\n\nconst severityLabels: Record<string, string> = {\n [LiabilitySeverity.High]: 'High',\n [LiabilitySeverity.Medium]: 'Medium',\n [LiabilitySeverity.Low]: 'Low',\n [LiabilitySeverity.Info]: 'Info',\n};\n\nexport function showAnnotationManagerDialog(): void {\n const df = grok.shell.tv?.dataFrame;\n if (!df) {\n grok.shell.warning('No table open');\n return;\n }\n\n const seqCols = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);\n if (seqCols.length === 0) {\n grok.shell.warning('No macromolecule columns found');\n return;\n }\n\n let selectedCol = seqCols[0];\n const colInput = ui.input.column('Sequence Column', {\n table: df, value: selectedCol,\n filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,\n onValueChanged: (col) => { selectedCol = col!; refreshList(); },\n });\n\n const listDiv = ui.divV([], {style: {maxHeight: '380px', overflowY: 'auto', paddingRight: '8px'}});\n\n function refreshList(): void {\n listDiv.innerHTML = '';\n const annotations = getColumnAnnotations(selectedCol);\n if (annotations.length === 0) {\n listDiv.append(ui.divText('No annotations on this column.', {style: {color: '#888', padding: '8px'}}));\n return;\n }\n\n for (const annot of annotations) {\n const catLabel = categoryLabels[annot.category] ?? annot.category;\n const sevLabel = annot.severity ? ` [${severityLabels[annot.severity] ?? annot.severity}]` : '';\n const rangeLabel = annot.start && annot.end ? ` (${annot.start}-${annot.end})` : '';\n const schemeLabel = annot.sourceScheme ? ` ${annot.sourceScheme}` : '';\n\n const removeBtn = ui.iconFA('trash', () => {\n const updated = getColumnAnnotations(selectedCol).filter((a) => a.id !== annot.id);\n setColumnAnnotations(selectedCol, updated);\n df.fireValuesChanged();\n refreshList();\n }, 'Delete');\n removeBtn.style.cursor = 'pointer';\n removeBtn.style.color = '#999';\n removeBtn.style.marginLeft = '8px';\n const originalColor = annot.color ?? '#ccc';\n let currentColor = originalColor;\n const colorSwatch = ui.div([], {style: {\n width: '12px', height: '12px', borderRadius: '2px',\n backgroundColor: currentColor, display: 'inline-block', marginRight: '6px',\n flexShrink: '0', cursor: 'pointer',\n }});\n\n ui.colorPicker(DG.Color.fromHtml(annot.color ?? '#ccc'), (newColor) => {\n currentColor = DG.Color.toHtml(newColor);\n }, colorSwatch, () => {\n const updated = getColumnAnnotations(selectedCol).map((a) => a.id === annot.id ? {...a, color: currentColor} : a);\n setColumnAnnotations(selectedCol, updated);\n df.fireValuesChanged();\n refreshList();\n }, () => {\n currentColor = originalColor;\n colorSwatch.style.backgroundColor = currentColor;\n });\n\n const row = ui.divH([\n colorSwatch,\n ui.divText(`${annot.name}${rangeLabel}${schemeLabel}${sevLabel}`, {style: {flex: '1', fontSize: '12px', padding: '4px'}}),\n ui.divText(catLabel, {style: {color: '#888', fontSize: '11px', marginRight: '8px'}}),\n removeBtn,\n ], {style: {alignItems: 'center', padding: '4px 0', borderBottom: '1px solid #eee'}});\n\n listDiv.append(row);\n }\n }\n\n refreshList();\n\n const clearBtn = ui.button('Clear All', () => {\n clearAnnotations(df, selectedCol);\n df.fireValuesChanged();\n refreshList();\n grok.shell.info('All annotations cleared');\n });\n\n const dialog = ui.dialog({title: 'Manage Annotations'})\n .add(ui.inputs([colInput]))\n .add(ui.h3('Annotations'))\n .add(listDiv)\n .add(ui.divH([clearBtn], {style: {marginTop: '8px'}}))\n .onOK(() => {});\n\n dialog.show();\n}\n"],"names":["categoryLabels","Structure","Liability","PTM","Custom","severityLabels","High","Medium","Low","Info","showAnnotationManagerDialog","df","tv","dataFrame","warning","seqCols","columns","bySemTypeAll","MACROMOLECULE","length","selectedCol","colInput","column","table","value","filter","col","semType","onValueChanged","refreshList","listDiv","style","maxHeight","overflowY","paddingRight","innerHTML","annotations","annot","catLabel","category","sevLabel","severity","rangeLabel","start","end","schemeLabel","sourceScheme","removeBtn","updated","a","id","fireValuesChanged","cursor","color","marginLeft","originalColor","currentColor","colorSwatch","width","height","borderRadius","backgroundColor","display","marginRight","flexShrink","fromHtml","newColor","toHtml","map","row","name","flex","fontSize","padding","alignItems","borderBottom","append","clearBtn","info","title","add","marginTop","onOK","show"],"sourceRoot":""}
|
package/dist/422.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
"use strict";(self.webpackChunkbio=self.webpackChunkbio||[]).push([[422],{9422(e,t,i){i.d(t,{showLiabilityScannerDialog:()=>c});var n=i(4328),a=i(7389),o=i(6082),l=i(
|
|
1
|
+
"use strict";(self.webpackChunkbio=self.webpackChunkbio||[]).push([[422],{9422(e,t,i){i.d(t,{showLiabilityScannerDialog:()=>c});var n=i(4328),a=i(7389),o=i(6082),l=i(8012),r=i(4517),d=i(3736);const s=[{id:"deamid-ng",name:"Deamidation (NG)",pattern:/NG/g,length:2,severity:r.Hq.High,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-ns",name:"Deamidation (NS)",pattern:/NS/g,length:2,severity:r.Hq.Medium,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-na",name:"Deamidation (NA)",pattern:/NA/g,length:2,severity:r.Hq.Low,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-nd",name:"Deamidation (ND)",pattern:/ND/g,length:2,severity:r.Hq.Low,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-nt",name:"Deamidation (NT)",pattern:/NT/g,length:2,severity:r.Hq.Low,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"isom-dg",name:"Isomerization (DG)",pattern:/DG/g,length:2,severity:r.Hq.High,ruleCategory:"isomerization",color:r.D5.liability.isomerization,enabled:!0},{id:"isom-ds",name:"Isomerization (DS)",pattern:/DS/g,length:2,severity:r.Hq.Medium,ruleCategory:"isomerization",color:r.D5.liability.isomerization,enabled:!0},{id:"oxid-m",name:"Oxidation (Met)",pattern:/M/g,length:1,severity:r.Hq.Medium,ruleCategory:"oxidation",color:r.D5.liability.oxidation,enabled:!0},{id:"oxid-w",name:"Oxidation (Trp)",pattern:/W/g,length:1,severity:r.Hq.Low,ruleCategory:"oxidation",color:r.D5.liability.oxidation,enabled:!0},{id:"glyco-nxst",name:"N-glycosylation",pattern:/N[^P][ST]/g,length:3,severity:r.Hq.High,ruleCategory:"glycosylation",color:r.D5.liability.glycosylation,enabled:!0},{id:"free-cys",name:"Free Cysteine",pattern:/C/g,length:1,severity:r.Hq.Info,ruleCategory:"freeCysteine",color:r.D5.liability.freeCysteine,enabled:!1}];function u(e,t){const i=e.getSplitted(t),n=new Array(i.length);for(let e=0;e<i.length;e++)n[e]=i.getOriginal(e);return n.join("")}const g={[r.Hq.High]:"High",[r.Hq.Medium]:"Medium",[r.Hq.Low]:"Low",[r.Hq.Info]:"Info"};function c(){const e=n.shell.tv?.dataFrame;if(!e)return void n.shell.warning("No table open");const t=e.columns.bySemTypeAll(o.SEMTYPE.MACROMOLECULE);if(0===t.length)return void n.shell.warning("No macromolecule columns found");const i=s.map(e=>({...e,pattern:new RegExp(e.pattern.source,"g")})),c=a.input.table("Table",{value:e}),m=a.input.column("Sequence",{table:e,value:t[0],filter:e=>e.semType===o.SEMTYPE.MACROMOLECULE}),y=[],p=a.divV([]);for(const e of i){const t=a.input.bool(e.name,{value:e.enabled,tooltipText:`Severity: ${g[e.severity]??e.severity}`});y.push({rule:e,input:t}),p.append(t.root)}const h=a.input.bool("Highlight in cell renderer",{value:!0}),b=a.input.bool("Create annotation column",{value:!0}),f=a.input.bool("Create summary count column",{value:!1}),v=a.dialog({title:"Scan Sequence Liabilities"}).add(a.inputs([c,m])).add(a.h3("Rules")).add(p).add(a.h3("Output")).add(a.inputs([h,b,f])).onOK(()=>{try{const t=m.value,a=l._package.seqHelper.getSeqHandler(t);for(const{rule:e,input:t}of y)e.enabled=t.value;const o=function(e,t,i){const n=i.filter(e=>e.enabled),a=t.posList,o=new Map,l=new Array(e.length);let d=0;for(let i=0;i<e.length;i++){const e=u(t,i),r=[];for(const t of n){let i;for(t.pattern.lastIndex=0;null!==(i=t.pattern.exec(e));)r.push({annotationId:t.id,positionIndex:i.index,positionName:i.index<a.length?a[i.index]:void 0,matchedMonomers:i[0]}),o.set(t.id,(o.get(t.id)??0)+1),d++}l[i]=r}return{annotations:n.filter(e=>o.has(e.id)).map(e=>({id:e.id,name:e.name,description:`${e.ruleCategory} liability pattern (${o.get(e.id)} hits)`,start:null,end:null,visualType:1===e.length?r.ao.Point:r.ao.Motif,category:r.eI.Liability,color:e.color,severity:e.severity,motifPattern:e.pattern.source,autoGenerated:!0})),rowData:l,totalHits:d}}(t,a,i);(b.value||h.value)&&function(e,t,i){const n=(0,d.Ln)(t).filter(e=>e.category!==r.eI.Liability);(0,d.fh)(t,[...n,...i.annotations]);const a=(0,d.Lz)(e,t);for(let e=0;e<i.rowData.length;e++){const t=(0,d.JG)(a,e)??[];(0,d.z5)(a,e,(0,d.bq)(t,i.rowData[e],!1,!0))}}(e,t,o),f.value&&function(e,t,i){const n=`${t.name}_liability_count`,a=i.rowData.map(e=>e.length),o=e.columns.addNewInt(n);for(let e=0;e<a.length;e++)o.set(e,a[e])}(e,t,o),n.shell.info(`Liability scan: ${o.totalHits} hits found across ${o.annotations.length} rules`),e.fireValuesChanged()}catch(e){n.shell.error(`Liability scan failed: ${e.message??e}`),console.error(e)}});v.show()}}}]);
|
|
2
2
|
//# sourceMappingURL=422.js.map
|
package/dist/422.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"422.js","mappings":"+LA4BO,MAAMA,EAA2C,CACtD,CAACC,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC/L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GACjM,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,UAAWC,KAAM,qBAAsBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUK,cAAeH,SAAS,GACnM,CAACV,GAAI,UAAWC,KAAM,qBAAsBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUK,cAAeH,SAAS,GACrM,CAACV,GAAI,SAAUC,KAAM,kBAAmBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,YAAaC,MAAO,KAAkBC,UAAUM,UAAWJ,SAAS,GACxL,CAACV,GAAI,SAAUC,KAAM,kBAAmBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,YAAaC,MAAO,KAAkBC,UAAUM,UAAWJ,SAAS,GACrL,CAACV,GAAI,aAAcC,KAAM,kBAAmBC,QAAS,aAAcC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUO,cAAeL,SAAS,GAC1M,CAACV,GAAI,WAAYC,KAAM,gBAAiBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBY,KAAMV,aAAc,eAAgBC,MAAO,KAAkBC,UAAUS,aAAcP,SAAS,IAI9L,SAASQ,EAAmBC,EAAiBC,GAC3C,MAAMC,EAAWF,EAAGG,YAAYF,GAC1BG,EAAkB,IAAIC,MAAMH,EAASlB,QAC3C,IAAK,IAAIsB,EAAI,EAAGA,EAAIJ,EAASlB,OAAQsB,IACnCF,EAAME,GAAKJ,EAASK,YAAYD,GAClC,OAAOF,EAAMI,KAAK,GACpB,CCtCA,MAAMC,EAAyC,CAC7C,CAAC,KAAkBvB,MAAO,OAC1B,CAAC,KAAkBM,QAAS,SAC5B,CAAC,KAAkBC,KAAM,MACzB,CAAC,KAAkBI,MAAO,QAGrB,SAASa,IACd,MAAMC,EAAK,QAAWC,IAAIC,UAC1B,IAAKF,EAEH,YADA,QAAWG,QAAQ,iBAIrB,MAAMC,EAAUJ,EAAGK,QAAQC,aAAa,UAAWC,eACnD,GAAuB,IAAnBH,EAAQ/B,OAEV,YADA,QAAW8B,QAAQ,kCAIrB,MAAMK,EAAQvC,EAAwBwC,IAAKC,IAAM,IAAKA,EAAGtC,QAAS,IAAIuC,OAAOD,EAAEtC,QAAQwC,OAAQ,QAEzFC,EAAa,QAASC,MAAM,QAAS,CAACC,MAAOf,IAC7CgB,EAAW,QAASC,OAAO,WAAY,CAC3CH,MAAOd,EAAIe,MAAOX,EAAQ,GAC1Bc,OAASC,GAAmBA,EAAIC,UAAY,UAAWb,gBAInDc,EAAoE,GACpEC,EAAW,OAAQ,IACzB,IAAK,MAAMC,KAAQf,EAAO,CACxB,MAAMgB,EAAQ,QAASC,KAAKF,EAAKpD,KAAM,CACrC4C,MAAOQ,EAAK3C,QACZ8C,YAAa,aAAa5B,EAAeyB,EAAKjD,WAAaiD,EAAKjD,aAElE+C,EAAWM,KAAK,CAACJ,OAAMK,MAAOJ,IAC9BF,EAASO,OAAOL,EAAMM,KACxB,CAEA,MAAMC,EAAiB,QAASN,KAAK,6BAA8B,CAACV,OAAO,IACrEiB,EAAgB,QAASP,KAAK,2BAA4B,CAACV,OAAO,IAClEkB,EAAe,QAASR,KAAK,8BAA+B,CAACV,OAAO,IAEpEmB,EAAS,SAAU,CAACC,MAAO,8BAC9BC,IAAI,SAAU,CAACvB,EAAYG,KAC3BoB,IAAI,KAAM,UACVA,IAAId,GACJc,IAAI,KAAM,WACVA,IAAI,SAAU,CAACL,EAAgBC,EAAeC,KAC9CI,KAAK,KACJ,IACE,MAAMC,EAAStB,EAASD,MAClB1B,EAAK,EAAAkD,SAASC,UAAUC,cAAcH,GAG5C,IAAK,MAAM,KAACf,EAAI,MAAEK,KAAUP,EAC1BE,EAAK3C,QAAUgD,EAAMb,MAEvB,MAAM2B,EDXP,SACLvB,EACA9B,EACAmB,GAEA,MAAMmC,EAAenC,EAAMU,OAAQR,GAAMA,EAAE9B,SACrCgE,EAAUvD,EAAGuD,QAGbC,EAAgB,IAAIC,IAEpBC,EAA+B,IAAIrD,MAAMyB,EAAI9C,QACnD,IAAI2E,EAAY,EAEhB,IAAK,IAAI1D,EAAS,EAAGA,EAAS6B,EAAI9C,OAAQiB,IAAU,CAClD,MAAM2D,EAAM7D,EAAmBC,EAAIC,GAC7B4D,EAA2B,GAEjC,IAAK,MAAM3B,KAAQoB,EAAc,CAG/B,IAAIQ,EACJ,IAFA5B,EAAKnD,QAAQgF,UAAY,EAEmB,QAApCD,EAAQ5B,EAAKnD,QAAQiF,KAAKJ,KAChCC,EAAKvB,KAAK,CACR2B,aAAc/B,EAAKrD,GACnBqF,cAAeJ,EAAMK,MACrBC,aAAcN,EAAMK,MAAQZ,EAAQvE,OAASuE,EAAQO,EAAMK,YAASE,EACpEC,gBAAiBR,EAAM,KAEzBN,EAAce,IAAIrC,EAAKrD,IAAK2E,EAAcgB,IAAItC,EAAKrD,KAAO,GAAK,GAC/D8E,GAEJ,CACAD,EAAQzD,GAAU4D,CACpB,CAmBA,MAAO,CAACY,YAhB6BnB,EAClCzB,OAAQR,GAAMmC,EAAckB,IAAIrD,EAAExC,KAClCuC,IAAKC,IAAM,CACVxC,GAAIwC,EAAExC,GACNC,KAAMuC,EAAEvC,KACR6F,YAAa,GAAGtD,EAAElC,mCAAmCqE,EAAcgB,IAAInD,EAAExC,YACzE+F,MAAO,KACPC,IAAK,KACLC,WAAyB,IAAbzD,EAAErC,OAAe,KAAqB+F,MAAQ,KAAqBC,MAC/EC,SAAU,KAAmBC,UAC7B9F,MAAOiC,EAAEjC,MACTH,SAAUoC,EAAEpC,SACZkG,aAAc9D,EAAEtC,QAAQwC,OACxB6D,eAAe,KAGE1B,UAASC,YAChC,CC3CuB0B,CAAgBpC,EAAQjD,EAAImB,IAEvCwB,EAAcjB,OAASgB,EAAehB,QD4C3C,SACLf,EACAsC,EACAI,GAGA,MAAMiC,GAAW,QAAqBrC,GACnCpB,OAAQ0D,GAAMA,EAAEN,WAAa,KAAmBC,YACnD,QAAqBjC,EAAQ,IAAIqC,KAAajC,EAAOoB,cAGrD,MAAMe,GAAW,QAA4B7E,EAAIsC,GACjD,IAAK,IAAI3C,EAAI,EAAGA,EAAI+C,EAAOK,QAAQ1E,OAAQsB,IAAK,CAC9C,MAAMmF,GAAe,QAAkBD,EAAUlF,IAAM,IACvD,QAAkBkF,EAAUlF,GAAG,QAAamF,EAAcpC,EAAOK,QAAQpD,IAAI,GAAO,GACtF,CACF,CC3DUoF,CAA0B/E,EAAIsC,EAAQI,GAEpCT,EAAalB,OD4DlB,SACLf,EACAsC,EACAI,GAEA,MAAMsC,EAAU,GAAG1C,EAAOnE,uBACpB8G,EAASvC,EAAOK,QAAQtC,IAAKyC,GAASA,EAAK7E,QAC3C8C,EAAMnB,EAAGK,QAAQ6E,UAAUF,GACjC,IAAK,IAAIrF,EAAI,EAAGA,EAAIsF,EAAO5G,OAAQsB,IACjCwB,EAAIyC,IAAIjE,EAAGsF,EAAOtF,GAEtB,CCtEUwF,CAA6BnF,EAAIsC,EAAQI,GAE3C,QAAW0C,KAAK,mBAAmB1C,EAAOM,+BAA+BN,EAAOoB,YAAYzF,gBAC5F2B,EAAGqF,mBACL,CAAE,MAAOC,GACP,QAAWC,MAAM,0BAA0BD,EAAIE,SAAWF,KAC1DG,QAAQF,MAAMD,EAChB,IAGJpD,EAAOwD,MACT,C","sources":["webpack://bio/./src/utils/annotations/liability-scanner.ts","webpack://bio/./src/utils/annotations/liability-scanner-ui.ts"],"sourcesContent":["/* eslint-disable max-len */\nimport * as DG from 'datagrok-api/dg';\n\nimport {ISeqHandler} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';\nimport {\n SeqAnnotation, SeqAnnotationHit, RowAnnotationData,\n AnnotationVisualType, AnnotationCategory, LiabilitySeverity,\n ANNOTATION_COLORS,\n} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';\nimport {\n getOrCreateAnnotationColumn, setColumnAnnotations, setRowAnnotations,\n getColumnAnnotations, getRowAnnotations, mergeRowHits,\n} from './annotation-manager';\n\n/** A single liability scanning rule. */\nexport interface LiabilityRule {\n id: string;\n name: string;\n pattern: RegExp;\n length: number;\n severity: LiabilitySeverity;\n /** Sub-category for grouping (e.g. \"deamidation\", \"oxidation\") */\n ruleCategory: string;\n color: string;\n enabled: boolean;\n}\n\n/** Built-in liability rules for antibody engineering. */\nexport const BUILTIN_LIABILITY_RULES: LiabilityRule[] = [\n {id: 'deamid-ng', name: 'Deamidation (NG)', pattern: /NG/g, length: 2, severity: LiabilitySeverity.High, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-ns', name: 'Deamidation (NS)', pattern: /NS/g, length: 2, severity: LiabilitySeverity.Medium, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-na', name: 'Deamidation (NA)', pattern: /NA/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-nd', name: 'Deamidation (ND)', pattern: /ND/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-nt', name: 'Deamidation (NT)', pattern: /NT/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'isom-dg', name: 'Isomerization (DG)', pattern: /DG/g, length: 2, severity: LiabilitySeverity.High, ruleCategory: 'isomerization', color: ANNOTATION_COLORS.liability.isomerization, enabled: true},\n {id: 'isom-ds', name: 'Isomerization (DS)', pattern: /DS/g, length: 2, severity: LiabilitySeverity.Medium, ruleCategory: 'isomerization', color: ANNOTATION_COLORS.liability.isomerization, enabled: true},\n {id: 'oxid-m', name: 'Oxidation (Met)', pattern: /M/g, length: 1, severity: LiabilitySeverity.Medium, ruleCategory: 'oxidation', color: ANNOTATION_COLORS.liability.oxidation, enabled: true},\n {id: 'oxid-w', name: 'Oxidation (Trp)', pattern: /W/g, length: 1, severity: LiabilitySeverity.Low, ruleCategory: 'oxidation', color: ANNOTATION_COLORS.liability.oxidation, enabled: true},\n {id: 'glyco-nxst', name: 'N-glycosylation', pattern: /N[^P][ST]/g, length: 3, severity: LiabilitySeverity.High, ruleCategory: 'glycosylation', color: ANNOTATION_COLORS.liability.glycosylation, enabled: true},\n {id: 'free-cys', name: 'Free Cysteine', pattern: /C/g, length: 1, severity: LiabilitySeverity.Info, ruleCategory: 'freeCysteine', color: ANNOTATION_COLORS.liability.freeCysteine, enabled: false},\n];\n\n/** Extracts a canonical single-letter string from a sequence handler for a given row. */\nfunction getCanonicalString(sh: ISeqHandler, rowIdx: number): string {\n const splitted = sh.getSplitted(rowIdx);\n const chars: string[] = new Array(splitted.length);\n for (let i = 0; i < splitted.length; i++)\n chars[i] = splitted.getOriginal(i);\n return chars.join('');\n}\n\nexport interface ScanLiabilitiesResult {\n annotations: SeqAnnotation[];\n rowData: RowAnnotationData[];\n totalHits: number;\n}\n\n/** Scans all rows of a macromolecule column for liability motifs.\n * Returns column-level SeqAnnotation entries + per-row SeqAnnotationHit arrays. */\nexport function scanLiabilities(\n col: DG.Column<string>,\n sh: ISeqHandler,\n rules: LiabilityRule[],\n): ScanLiabilitiesResult {\n const enabledRules = rules.filter((r) => r.enabled);\n const posList = sh.posList;\n\n // Track which rules had hits\n const ruleHitCounts = new Map<string, number>();\n\n const rowData: RowAnnotationData[] = new Array(col.length);\n let totalHits = 0;\n\n for (let rowIdx = 0; rowIdx < col.length; rowIdx++) {\n const seq = getCanonicalString(sh, rowIdx);\n const hits: SeqAnnotationHit[] = [];\n\n for (const rule of enabledRules) {\n // Reset regex lastIndex for global patterns\n rule.pattern.lastIndex = 0;\n let match: RegExpExecArray | null;\n while ((match = rule.pattern.exec(seq)) !== null) {\n hits.push({\n annotationId: rule.id,\n positionIndex: match.index,\n positionName: match.index < posList.length ? posList[match.index] : undefined,\n matchedMonomers: match[0],\n });\n ruleHitCounts.set(rule.id, (ruleHitCounts.get(rule.id) ?? 0) + 1);\n totalHits++;\n }\n }\n rowData[rowIdx] = hits;\n }\n\n // Build column-level annotations only for rules that had hits\n const annotations: SeqAnnotation[] = enabledRules\n .filter((r) => ruleHitCounts.has(r.id))\n .map((r) => ({\n id: r.id,\n name: r.name,\n description: `${r.ruleCategory} liability pattern (${ruleHitCounts.get(r.id)} hits)`,\n start: null,\n end: null,\n visualType: r.length === 1 ? AnnotationVisualType.Point : AnnotationVisualType.Motif,\n category: AnnotationCategory.Liability,\n color: r.color,\n severity: r.severity,\n motifPattern: r.pattern.source,\n autoGenerated: true,\n }));\n\n return {annotations, rowData, totalHits};\n}\n\n/** Applies liability scan results to the DataFrame (writes tags + companion column). */\nexport function applyLiabilityScanResults(\n df: DG.DataFrame,\n seqCol: DG.Column<string>,\n result: ScanLiabilitiesResult,\n): void {\n // Merge with existing annotations, removing old liability entries\n const existing = getColumnAnnotations(seqCol)\n .filter((a) => a.category !== AnnotationCategory.Liability);\n setColumnAnnotations(seqCol, [...existing, ...result.annotations]);\n\n // Write per-row data to hidden companion column, preserving region hits from numbering\n const annotCol = getOrCreateAnnotationColumn(df, seqCol);\n for (let i = 0; i < result.rowData.length; i++) {\n const existingHits = getRowAnnotations(annotCol, i) ?? [];\n setRowAnnotations(annotCol, i, mergeRowHits(existingHits, result.rowData[i], false, true));\n }\n}\n\n/** Creates a liability summary count column (total hits per row). */\nexport function createLiabilitySummaryColumn(\n df: DG.DataFrame,\n seqCol: DG.Column<string>,\n result: ScanLiabilitiesResult,\n): DG.Column<number> {\n const colName = `${seqCol.name}_liability_count`;\n const counts = result.rowData.map((hits) => hits.length);\n const col = df.columns.addNewInt(colName);\n for (let i = 0; i < counts.length; i++)\n col.set(i, counts[i]);\n return col;\n}\n","import * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {_package} from '../../package';\nimport {\n BUILTIN_LIABILITY_RULES, LiabilityRule, scanLiabilities,\n applyLiabilityScanResults, createLiabilitySummaryColumn,\n} from './liability-scanner';\nimport {LiabilitySeverity} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';\n\nconst severityLabels: Record<string, string> = {\n [LiabilitySeverity.High]: 'High',\n [LiabilitySeverity.Medium]: 'Medium',\n [LiabilitySeverity.Low]: 'Low',\n [LiabilitySeverity.Info]: 'Info',\n};\n\nexport function showLiabilityScannerDialog(): void {\n const df = grok.shell.tv?.dataFrame;\n if (!df) {\n grok.shell.warning('No table open');\n return;\n }\n\n const seqCols = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);\n if (seqCols.length === 0) {\n grok.shell.warning('No macromolecule columns found');\n return;\n }\n\n const rules = BUILTIN_LIABILITY_RULES.map((r) => ({...r, pattern: new RegExp(r.pattern.source, 'g')}));\n\n const tableInput = ui.input.table('Table', {value: df});\n const seqInput = ui.input.column('Sequence', {\n table: df, value: seqCols[0],\n filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,\n });\n\n // Rule checkboxes\n const ruleChecks: {rule: LiabilityRule; input: DG.InputBase<boolean>}[] = [];\n const rulesDiv = ui.divV([]);\n for (const rule of rules) {\n const check = ui.input.bool(rule.name, {\n value: rule.enabled,\n tooltipText: `Severity: ${severityLabels[rule.severity] ?? rule.severity}`,\n });\n ruleChecks.push({rule, input: check});\n rulesDiv.append(check.root);\n }\n\n const highlightInput = ui.input.bool('Highlight in cell renderer', {value: true});\n const annotColInput = ui.input.bool('Create annotation column', {value: true});\n const summaryInput = ui.input.bool('Create summary count column', {value: false});\n\n const dialog = ui.dialog({title: 'Scan Sequence Liabilities'})\n .add(ui.inputs([tableInput, seqInput]))\n .add(ui.h3('Rules'))\n .add(rulesDiv)\n .add(ui.h3('Output'))\n .add(ui.inputs([highlightInput, annotColInput, summaryInput]))\n .onOK(() => {\n try {\n const seqCol = seqInput.value!;\n const sh = _package.seqHelper.getSeqHandler(seqCol);\n\n // Apply checkbox state\n for (const {rule, input} of ruleChecks)\n rule.enabled = input.value;\n\n const result = scanLiabilities(seqCol, sh, rules);\n\n if (annotColInput.value || highlightInput.value)\n applyLiabilityScanResults(df, seqCol, result);\n\n if (summaryInput.value)\n createLiabilitySummaryColumn(df, seqCol, result);\n\n grok.shell.info(`Liability scan: ${result.totalHits} hits found across ${result.annotations.length} rules`);\n df.fireValuesChanged();\n } catch (err: any) {\n grok.shell.error(`Liability scan failed: ${err.message ?? err}`);\n console.error(err);\n }\n });\n\n dialog.show();\n}\n"],"names":["BUILTIN_LIABILITY_RULES","id","name","pattern","length","severity","High","ruleCategory","color","liability","deamidation","enabled","Medium","Low","isomerization","oxidation","glycosylation","Info","freeCysteine","getCanonicalString","sh","rowIdx","splitted","getSplitted","chars","Array","i","getOriginal","join","severityLabels","showLiabilityScannerDialog","df","tv","dataFrame","warning","seqCols","columns","bySemTypeAll","MACROMOLECULE","rules","map","r","RegExp","source","tableInput","table","value","seqInput","column","filter","col","semType","ruleChecks","rulesDiv","rule","check","bool","tooltipText","push","input","append","root","highlightInput","annotColInput","summaryInput","dialog","title","add","onOK","seqCol","_package","seqHelper","getSeqHandler","result","enabledRules","posList","ruleHitCounts","Map","rowData","totalHits","seq","hits","match","lastIndex","exec","annotationId","positionIndex","index","positionName","undefined","matchedMonomers","set","get","annotations","has","description","start","end","visualType","Point","Motif","category","Liability","motifPattern","autoGenerated","scanLiabilities","existing","a","annotCol","existingHits","applyLiabilityScanResults","colName","counts","addNewInt","createLiabilitySummaryColumn","info","fireValuesChanged","err","error","message","console","show"],"sourceRoot":""}
|
|
1
|
+
{"version":3,"file":"422.js","mappings":"gMA4BO,MAAMA,EAA2C,CACtD,CAACC,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC/L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GACjM,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,UAAWC,KAAM,qBAAsBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUK,cAAeH,SAAS,GACnM,CAACV,GAAI,UAAWC,KAAM,qBAAsBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUK,cAAeH,SAAS,GACrM,CAACV,GAAI,SAAUC,KAAM,kBAAmBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,YAAaC,MAAO,KAAkBC,UAAUM,UAAWJ,SAAS,GACxL,CAACV,GAAI,SAAUC,KAAM,kBAAmBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,YAAaC,MAAO,KAAkBC,UAAUM,UAAWJ,SAAS,GACrL,CAACV,GAAI,aAAcC,KAAM,kBAAmBC,QAAS,aAAcC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUO,cAAeL,SAAS,GAC1M,CAACV,GAAI,WAAYC,KAAM,gBAAiBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBY,KAAMV,aAAc,eAAgBC,MAAO,KAAkBC,UAAUS,aAAcP,SAAS,IAI9L,SAASQ,EAAmBC,EAAiBC,GAC3C,MAAMC,EAAWF,EAAGG,YAAYF,GAC1BG,EAAkB,IAAIC,MAAMH,EAASlB,QAC3C,IAAK,IAAIsB,EAAI,EAAGA,EAAIJ,EAASlB,OAAQsB,IACnCF,EAAME,GAAKJ,EAASK,YAAYD,GAClC,OAAOF,EAAMI,KAAK,GACpB,CCtCA,MAAMC,EAAyC,CAC7C,CAAC,KAAkBvB,MAAO,OAC1B,CAAC,KAAkBM,QAAS,SAC5B,CAAC,KAAkBC,KAAM,MACzB,CAAC,KAAkBI,MAAO,QAGrB,SAASa,IACd,MAAMC,EAAK,QAAWC,IAAIC,UAC1B,IAAKF,EAEH,YADA,QAAWG,QAAQ,iBAIrB,MAAMC,EAAUJ,EAAGK,QAAQC,aAAa,UAAWC,eACnD,GAAuB,IAAnBH,EAAQ/B,OAEV,YADA,QAAW8B,QAAQ,kCAIrB,MAAMK,EAAQvC,EAAwBwC,IAAKC,IAAM,IAAKA,EAAGtC,QAAS,IAAIuC,OAAOD,EAAEtC,QAAQwC,OAAQ,QAEzFC,EAAa,QAASC,MAAM,QAAS,CAACC,MAAOf,IAC7CgB,EAAW,QAASC,OAAO,WAAY,CAC3CH,MAAOd,EAAIe,MAAOX,EAAQ,GAC1Bc,OAASC,GAAmBA,EAAIC,UAAY,UAAWb,gBAInDc,EAAoE,GACpEC,EAAW,OAAQ,IACzB,IAAK,MAAMC,KAAQf,EAAO,CACxB,MAAMgB,EAAQ,QAASC,KAAKF,EAAKpD,KAAM,CACrC4C,MAAOQ,EAAK3C,QACZ8C,YAAa,aAAa5B,EAAeyB,EAAKjD,WAAaiD,EAAKjD,aAElE+C,EAAWM,KAAK,CAACJ,OAAMK,MAAOJ,IAC9BF,EAASO,OAAOL,EAAMM,KACxB,CAEA,MAAMC,EAAiB,QAASN,KAAK,6BAA8B,CAACV,OAAO,IACrEiB,EAAgB,QAASP,KAAK,2BAA4B,CAACV,OAAO,IAClEkB,EAAe,QAASR,KAAK,8BAA+B,CAACV,OAAO,IAEpEmB,EAAS,SAAU,CAACC,MAAO,8BAC9BC,IAAI,SAAU,CAACvB,EAAYG,KAC3BoB,IAAI,KAAM,UACVA,IAAId,GACJc,IAAI,KAAM,WACVA,IAAI,SAAU,CAACL,EAAgBC,EAAeC,KAC9CI,KAAK,KACJ,IACE,MAAMC,EAAStB,EAASD,MAClB1B,EAAK,EAAAkD,SAASC,UAAUC,cAAcH,GAG5C,IAAK,MAAM,KAACf,EAAI,MAAEK,KAAUP,EAC1BE,EAAK3C,QAAUgD,EAAMb,MAEvB,MAAM2B,EDXP,SACLvB,EACA9B,EACAmB,GAEA,MAAMmC,EAAenC,EAAMU,OAAQR,GAAMA,EAAE9B,SACrCgE,EAAUvD,EAAGuD,QAGbC,EAAgB,IAAIC,IAEpBC,EAA+B,IAAIrD,MAAMyB,EAAI9C,QACnD,IAAI2E,EAAY,EAEhB,IAAK,IAAI1D,EAAS,EAAGA,EAAS6B,EAAI9C,OAAQiB,IAAU,CAClD,MAAM2D,EAAM7D,EAAmBC,EAAIC,GAC7B4D,EAA2B,GAEjC,IAAK,MAAM3B,KAAQoB,EAAc,CAG/B,IAAIQ,EACJ,IAFA5B,EAAKnD,QAAQgF,UAAY,EAEmB,QAApCD,EAAQ5B,EAAKnD,QAAQiF,KAAKJ,KAChCC,EAAKvB,KAAK,CACR2B,aAAc/B,EAAKrD,GACnBqF,cAAeJ,EAAMK,MACrBC,aAAcN,EAAMK,MAAQZ,EAAQvE,OAASuE,EAAQO,EAAMK,YAASE,EACpEC,gBAAiBR,EAAM,KAEzBN,EAAce,IAAIrC,EAAKrD,IAAK2E,EAAcgB,IAAItC,EAAKrD,KAAO,GAAK,GAC/D8E,GAEJ,CACAD,EAAQzD,GAAU4D,CACpB,CAmBA,MAAO,CAACY,YAhB6BnB,EAClCzB,OAAQR,GAAMmC,EAAckB,IAAIrD,EAAExC,KAClCuC,IAAKC,IAAM,CACVxC,GAAIwC,EAAExC,GACNC,KAAMuC,EAAEvC,KACR6F,YAAa,GAAGtD,EAAElC,mCAAmCqE,EAAcgB,IAAInD,EAAExC,YACzE+F,MAAO,KACPC,IAAK,KACLC,WAAyB,IAAbzD,EAAErC,OAAe,KAAqB+F,MAAQ,KAAqBC,MAC/EC,SAAU,KAAmBC,UAC7B9F,MAAOiC,EAAEjC,MACTH,SAAUoC,EAAEpC,SACZkG,aAAc9D,EAAEtC,QAAQwC,OACxB6D,eAAe,KAGE1B,UAASC,YAChC,CC3CuB0B,CAAgBpC,EAAQjD,EAAImB,IAEvCwB,EAAcjB,OAASgB,EAAehB,QD4C3C,SACLf,EACAsC,EACAI,GAGA,MAAMiC,GAAW,QAAqBrC,GACnCpB,OAAQ0D,GAAMA,EAAEN,WAAa,KAAmBC,YACnD,QAAqBjC,EAAQ,IAAIqC,KAAajC,EAAOoB,cAGrD,MAAMe,GAAW,QAA4B7E,EAAIsC,GACjD,IAAK,IAAI3C,EAAI,EAAGA,EAAI+C,EAAOK,QAAQ1E,OAAQsB,IAAK,CAC9C,MAAMmF,GAAe,QAAkBD,EAAUlF,IAAM,IACvD,QAAkBkF,EAAUlF,GAAG,QAAamF,EAAcpC,EAAOK,QAAQpD,IAAI,GAAO,GACtF,CACF,CC3DUoF,CAA0B/E,EAAIsC,EAAQI,GAEpCT,EAAalB,OD4DlB,SACLf,EACAsC,EACAI,GAEA,MAAMsC,EAAU,GAAG1C,EAAOnE,uBACpB8G,EAASvC,EAAOK,QAAQtC,IAAKyC,GAASA,EAAK7E,QAC3C8C,EAAMnB,EAAGK,QAAQ6E,UAAUF,GACjC,IAAK,IAAIrF,EAAI,EAAGA,EAAIsF,EAAO5G,OAAQsB,IACjCwB,EAAIyC,IAAIjE,EAAGsF,EAAOtF,GAEtB,CCtEUwF,CAA6BnF,EAAIsC,EAAQI,GAE3C,QAAW0C,KAAK,mBAAmB1C,EAAOM,+BAA+BN,EAAOoB,YAAYzF,gBAC5F2B,EAAGqF,mBACL,CAAE,MAAOC,GACP,QAAWC,MAAM,0BAA0BD,EAAIE,SAAWF,KAC1DG,QAAQF,MAAMD,EAChB,IAGJpD,EAAOwD,MACT,C","sources":["webpack://bio/./src/utils/annotations/liability-scanner.ts","webpack://bio/./src/utils/annotations/liability-scanner-ui.ts"],"sourcesContent":["/* eslint-disable max-len */\nimport * as DG from 'datagrok-api/dg';\n\nimport {ISeqHandler} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';\nimport {\n SeqAnnotation, SeqAnnotationHit, RowAnnotationData,\n AnnotationVisualType, AnnotationCategory, LiabilitySeverity,\n ANNOTATION_COLORS,\n} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';\nimport {\n getOrCreateAnnotationColumn, setColumnAnnotations, setRowAnnotations,\n getColumnAnnotations, getRowAnnotations, mergeRowHits,\n} from './annotation-manager';\n\n/** A single liability scanning rule. */\nexport interface LiabilityRule {\n id: string;\n name: string;\n pattern: RegExp;\n length: number;\n severity: LiabilitySeverity;\n /** Sub-category for grouping (e.g. \"deamidation\", \"oxidation\") */\n ruleCategory: string;\n color: string;\n enabled: boolean;\n}\n\n/** Built-in liability rules for antibody engineering. */\nexport const BUILTIN_LIABILITY_RULES: LiabilityRule[] = [\n {id: 'deamid-ng', name: 'Deamidation (NG)', pattern: /NG/g, length: 2, severity: LiabilitySeverity.High, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-ns', name: 'Deamidation (NS)', pattern: /NS/g, length: 2, severity: LiabilitySeverity.Medium, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-na', name: 'Deamidation (NA)', pattern: /NA/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-nd', name: 'Deamidation (ND)', pattern: /ND/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-nt', name: 'Deamidation (NT)', pattern: /NT/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'isom-dg', name: 'Isomerization (DG)', pattern: /DG/g, length: 2, severity: LiabilitySeverity.High, ruleCategory: 'isomerization', color: ANNOTATION_COLORS.liability.isomerization, enabled: true},\n {id: 'isom-ds', name: 'Isomerization (DS)', pattern: /DS/g, length: 2, severity: LiabilitySeverity.Medium, ruleCategory: 'isomerization', color: ANNOTATION_COLORS.liability.isomerization, enabled: true},\n {id: 'oxid-m', name: 'Oxidation (Met)', pattern: /M/g, length: 1, severity: LiabilitySeverity.Medium, ruleCategory: 'oxidation', color: ANNOTATION_COLORS.liability.oxidation, enabled: true},\n {id: 'oxid-w', name: 'Oxidation (Trp)', pattern: /W/g, length: 1, severity: LiabilitySeverity.Low, ruleCategory: 'oxidation', color: ANNOTATION_COLORS.liability.oxidation, enabled: true},\n {id: 'glyco-nxst', name: 'N-glycosylation', pattern: /N[^P][ST]/g, length: 3, severity: LiabilitySeverity.High, ruleCategory: 'glycosylation', color: ANNOTATION_COLORS.liability.glycosylation, enabled: true},\n {id: 'free-cys', name: 'Free Cysteine', pattern: /C/g, length: 1, severity: LiabilitySeverity.Info, ruleCategory: 'freeCysteine', color: ANNOTATION_COLORS.liability.freeCysteine, enabled: false},\n];\n\n/** Extracts a canonical single-letter string from a sequence handler for a given row. */\nfunction getCanonicalString(sh: ISeqHandler, rowIdx: number): string {\n const splitted = sh.getSplitted(rowIdx);\n const chars: string[] = new Array(splitted.length);\n for (let i = 0; i < splitted.length; i++)\n chars[i] = splitted.getOriginal(i);\n return chars.join('');\n}\n\nexport interface ScanLiabilitiesResult {\n annotations: SeqAnnotation[];\n rowData: RowAnnotationData[];\n totalHits: number;\n}\n\n/** Scans all rows of a macromolecule column for liability motifs.\n * Returns column-level SeqAnnotation entries + per-row SeqAnnotationHit arrays. */\nexport function scanLiabilities(\n col: DG.Column<string>,\n sh: ISeqHandler,\n rules: LiabilityRule[],\n): ScanLiabilitiesResult {\n const enabledRules = rules.filter((r) => r.enabled);\n const posList = sh.posList;\n\n // Track which rules had hits\n const ruleHitCounts = new Map<string, number>();\n\n const rowData: RowAnnotationData[] = new Array(col.length);\n let totalHits = 0;\n\n for (let rowIdx = 0; rowIdx < col.length; rowIdx++) {\n const seq = getCanonicalString(sh, rowIdx);\n const hits: SeqAnnotationHit[] = [];\n\n for (const rule of enabledRules) {\n // Reset regex lastIndex for global patterns\n rule.pattern.lastIndex = 0;\n let match: RegExpExecArray | null;\n while ((match = rule.pattern.exec(seq)) !== null) {\n hits.push({\n annotationId: rule.id,\n positionIndex: match.index,\n positionName: match.index < posList.length ? posList[match.index] : undefined,\n matchedMonomers: match[0],\n });\n ruleHitCounts.set(rule.id, (ruleHitCounts.get(rule.id) ?? 0) + 1);\n totalHits++;\n }\n }\n rowData[rowIdx] = hits;\n }\n\n // Build column-level annotations only for rules that had hits\n const annotations: SeqAnnotation[] = enabledRules\n .filter((r) => ruleHitCounts.has(r.id))\n .map((r) => ({\n id: r.id,\n name: r.name,\n description: `${r.ruleCategory} liability pattern (${ruleHitCounts.get(r.id)} hits)`,\n start: null,\n end: null,\n visualType: r.length === 1 ? AnnotationVisualType.Point : AnnotationVisualType.Motif,\n category: AnnotationCategory.Liability,\n color: r.color,\n severity: r.severity,\n motifPattern: r.pattern.source,\n autoGenerated: true,\n }));\n\n return {annotations, rowData, totalHits};\n}\n\n/** Applies liability scan results to the DataFrame (writes tags + companion column). */\nexport function applyLiabilityScanResults(\n df: DG.DataFrame,\n seqCol: DG.Column<string>,\n result: ScanLiabilitiesResult,\n): void {\n // Merge with existing annotations, removing old liability entries\n const existing = getColumnAnnotations(seqCol)\n .filter((a) => a.category !== AnnotationCategory.Liability);\n setColumnAnnotations(seqCol, [...existing, ...result.annotations]);\n\n // Write per-row data to hidden companion column, preserving region hits from numbering\n const annotCol = getOrCreateAnnotationColumn(df, seqCol);\n for (let i = 0; i < result.rowData.length; i++) {\n const existingHits = getRowAnnotations(annotCol, i) ?? [];\n setRowAnnotations(annotCol, i, mergeRowHits(existingHits, result.rowData[i], false, true));\n }\n}\n\n/** Creates a liability summary count column (total hits per row). */\nexport function createLiabilitySummaryColumn(\n df: DG.DataFrame,\n seqCol: DG.Column<string>,\n result: ScanLiabilitiesResult,\n): DG.Column<number> {\n const colName = `${seqCol.name}_liability_count`;\n const counts = result.rowData.map((hits) => hits.length);\n const col = df.columns.addNewInt(colName);\n for (let i = 0; i < counts.length; i++)\n col.set(i, counts[i]);\n return col;\n}\n","import * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {_package} from '../../package';\nimport {\n BUILTIN_LIABILITY_RULES, LiabilityRule, scanLiabilities,\n applyLiabilityScanResults, createLiabilitySummaryColumn,\n} from './liability-scanner';\nimport {LiabilitySeverity} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';\n\nconst severityLabels: Record<string, string> = {\n [LiabilitySeverity.High]: 'High',\n [LiabilitySeverity.Medium]: 'Medium',\n [LiabilitySeverity.Low]: 'Low',\n [LiabilitySeverity.Info]: 'Info',\n};\n\nexport function showLiabilityScannerDialog(): void {\n const df = grok.shell.tv?.dataFrame;\n if (!df) {\n grok.shell.warning('No table open');\n return;\n }\n\n const seqCols = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);\n if (seqCols.length === 0) {\n grok.shell.warning('No macromolecule columns found');\n return;\n }\n\n const rules = BUILTIN_LIABILITY_RULES.map((r) => ({...r, pattern: new RegExp(r.pattern.source, 'g')}));\n\n const tableInput = ui.input.table('Table', {value: df});\n const seqInput = ui.input.column('Sequence', {\n table: df, value: seqCols[0],\n filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,\n });\n\n // Rule checkboxes\n const ruleChecks: {rule: LiabilityRule; input: DG.InputBase<boolean>}[] = [];\n const rulesDiv = ui.divV([]);\n for (const rule of rules) {\n const check = ui.input.bool(rule.name, {\n value: rule.enabled,\n tooltipText: `Severity: ${severityLabels[rule.severity] ?? rule.severity}`,\n });\n ruleChecks.push({rule, input: check});\n rulesDiv.append(check.root);\n }\n\n const highlightInput = ui.input.bool('Highlight in cell renderer', {value: true});\n const annotColInput = ui.input.bool('Create annotation column', {value: true});\n const summaryInput = ui.input.bool('Create summary count column', {value: false});\n\n const dialog = ui.dialog({title: 'Scan Sequence Liabilities'})\n .add(ui.inputs([tableInput, seqInput]))\n .add(ui.h3('Rules'))\n .add(rulesDiv)\n .add(ui.h3('Output'))\n .add(ui.inputs([highlightInput, annotColInput, summaryInput]))\n .onOK(() => {\n try {\n const seqCol = seqInput.value!;\n const sh = _package.seqHelper.getSeqHandler(seqCol);\n\n // Apply checkbox state\n for (const {rule, input} of ruleChecks)\n rule.enabled = input.value;\n\n const result = scanLiabilities(seqCol, sh, rules);\n\n if (annotColInput.value || highlightInput.value)\n applyLiabilityScanResults(df, seqCol, result);\n\n if (summaryInput.value)\n createLiabilitySummaryColumn(df, seqCol, result);\n\n grok.shell.info(`Liability scan: ${result.totalHits} hits found across ${result.annotations.length} rules`);\n df.fireValuesChanged();\n } catch (err: any) {\n grok.shell.error(`Liability scan failed: ${err.message ?? err}`);\n console.error(err);\n }\n });\n\n dialog.show();\n}\n"],"names":["BUILTIN_LIABILITY_RULES","id","name","pattern","length","severity","High","ruleCategory","color","liability","deamidation","enabled","Medium","Low","isomerization","oxidation","glycosylation","Info","freeCysteine","getCanonicalString","sh","rowIdx","splitted","getSplitted","chars","Array","i","getOriginal","join","severityLabels","showLiabilityScannerDialog","df","tv","dataFrame","warning","seqCols","columns","bySemTypeAll","MACROMOLECULE","rules","map","r","RegExp","source","tableInput","table","value","seqInput","column","filter","col","semType","ruleChecks","rulesDiv","rule","check","bool","tooltipText","push","input","append","root","highlightInput","annotColInput","summaryInput","dialog","title","add","onOK","seqCol","_package","seqHelper","getSeqHandler","result","enabledRules","posList","ruleHitCounts","Map","rowData","totalHits","seq","hits","match","lastIndex","exec","annotationId","positionIndex","index","positionName","undefined","matchedMonomers","set","get","annotations","has","description","start","end","visualType","Point","Motif","category","Liability","motifPattern","autoGenerated","scanLiabilities","existing","a","annotCol","existingHits","applyLiabilityScanResults","colName","counts","addNewInt","createLiabilitySummaryColumn","info","fireValuesChanged","err","error","message","console","show"],"sourceRoot":""}
|