@datagrok/bio 2.26.8 → 2.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Bio changelog
2
2
 
3
+ ## 2.27.0 (2026-04-09)
4
+
5
+ * Engine based MSA
6
+
3
7
  ## 2.26.5 (2026-04-01)
4
8
 
5
9
  ### Features
package/CLAUDE.md CHANGED
@@ -44,7 +44,7 @@ Other packages depend on these implementations: **Helm**, **Peptides**, **Biostr
44
44
  |---|---|---|
45
45
  | `Bio \| Analyze \| Activity Cliffs...` | `activityCliffs` | Detects sequence pairs with similar structure but significant activity difference |
46
46
  | `Bio \| Analyze \| Sequence Space...` | `sequenceSpaceTopMenu` | UMAP/tSNE 2D projection of sequences by pairwise distance |
47
- | `Bio \| Analyze \| MSA...` | `multipleSequenceAlignmentDialog` | Multiple sequence alignment via kalign (WASM) or PepSeA (Docker) |
47
+ | `Bio \| Analyze \| MSA...` | `multipleSequenceAlignmentDialog` | Multiple sequence alignment via kalign (WASM) for canonical sequences, or dynamically discovered engines (e.g. PepSeA Docker) for non-canonical |
48
48
  | `Bio \| Analyze \| Composition` | `compositionAnalysis` | Docks a WebLogo viewer for sequence composition |
49
49
  | `Bio \| Transform \| Convert Sequence Notation...` | `convertDialog` | FASTA ↔ SEPARATOR ↔ HELM ↔ BILN conversion |
50
50
  | `Bio \| Transform \| To Atomic Level...` | `toAtomicLevel` | Converts sequences to V3000 molfiles |
@@ -177,9 +177,38 @@ Key methods: `detectSeparator()`, `detectAlphabet()`, `getAlphabetSimilarity()`,
177
177
 
178
178
  | File | Purpose |
179
179
  |---|---|
180
- | `multiple-sequence-alignment.ts` | `multipleSequenceAlignment()` — core MSA via **kalign** (WebAssembly/Aioli). Supports per-cluster alignment, gap penalties, selected-rows-only mode. |
181
- | `multiple-sequence-alignment-ui.ts` | `multipleSequenceAlignmentUI()` — MSA dialog with column selection, alignment method (kalign vs PepSeA), gap penalties |
182
- | `pepsea.ts` | `pepseaAlignSequences()` — MSA for HELM peptides via **PepSeA Docker container** (mafft/linsi/ginsi methods) |
180
+ | `multiple-sequence-alignment.ts` | `runKalign()` — core MSA via **kalign** (WebAssembly/Aioli). Supports per-cluster alignment, gap penalties, selected-rows-only mode. Used for canonical sequences (DNA/RNA/PT). |
181
+ | `multiple-sequence-alignment-ui.ts` | `multipleSequenceAlignmentUI()` — MSA dialog with column selection, mode switching (kalign for canonical, dynamically discovered engines for non-canonical), per-cluster alignment, selected-rows-only. |
182
+ | `pepsea.ts` | `alignWithPepsea()` / `runPepsea()` — MSA for HELM peptides via **PepSeA Docker container** (mafft/linsi/ginsi methods). Registered as a `sequenceMSA` engine via `pepseaMsa()` in `package.ts`. |
183
+
184
+ ##### Adding a New MSA Engine
185
+
186
+ Non-canonical MSA engines are discovered dynamically via `DG.Func.find({meta: {role: 'sequenceMSA'}})`.
187
+ To add a new alignment engine (in this or any other package):
188
+
189
+ 1. Register a function with `meta.role: 'sequenceMSA'` and `outputs: [{name: 'result', type: 'column'}]`.
190
+ 2. The **first parameter** must be a `column` input with `semType: 'Macromolecule'` — the sequences to align.
191
+ 3. All **remaining parameters** are engine-specific configuration (method, gap penalties, etc.) and will be rendered automatically in the MSA dialog under "Alignment parameters".
192
+ 4. The function must **create and return** the aligned output column with appropriate metadata tags (`meta.units`, `semType`, `aligned`, `alphabet`, `separator`, etc.). Different engines can produce different output notations (e.g. PepSeA produces separator notation with `.` delimiter).
193
+ 5. The MSA dialog handles clustering and row selection — the engine function receives a single column to align.
194
+
195
+ Example (decorator style):
196
+ ```typescript
197
+ @grok.decorators.func({
198
+ name: 'My Aligner',
199
+ description: 'Custom MSA engine',
200
+ meta: {role: 'sequenceMSA'},
201
+ outputs: [{name: 'result', type: 'column'}],
202
+ })
203
+ static async myAligner(
204
+ @grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) sequenceCol: DG.Column<string>,
205
+ @grok.decorators.param({type: 'double'}) gapOpen: number = 1.0,
206
+ ): Promise<DG.Column<string>> {
207
+ // Align sequences, create result column with metadata tags, return it.
208
+ }
209
+ ```
210
+
211
+ See `pepseaMsa()` in `package.ts` and `alignWithPepsea()` in `pepsea.ts` for a complete reference implementation.
183
212
 
184
213
  #### Seq Helper — `src/utils/seq-helper/`
185
214
 
@@ -351,7 +380,8 @@ Test entry point: `src/package-test.ts` — imports all test files, exports `tes
351
380
  | Sequence space (UMAP/tSNE) | `src/analysis/sequence-space.ts` |
352
381
  | Similarity/diversity viewers | `src/analysis/sequence-similarity-viewer.ts` / `sequence-diversity-viewer.ts` |
353
382
  | MSA (kalign) | `src/utils/multiple-sequence-alignment.ts` |
354
- | MSA (PepSeA Docker) | `src/utils/pepsea.ts` |
383
+ | MSA (PepSeA Docker) | `src/utils/pepsea.ts` + `pepseaMsa()` in `src/package.ts` |
384
+ | Adding MSA engines | See "Adding a New MSA Engine" in Multiple Sequence Alignment section |
355
385
  | Notation conversion | `src/utils/convert.ts` |
356
386
  | Seq → molfile conversion | `src/utils/sequence-to-mol.ts` |
357
387
  | HELM → molfile pipeline | `src/utils/helm-to-molfile/converter/` |
package/detectors.js CHANGED
@@ -333,7 +333,8 @@ class BioPackageDetectors extends DG.Package {
333
333
  // const forbidden = this.checkForbiddenWoSeparator(stats.freq);
334
334
  col.meta.units = units;
335
335
  if (separator) col.setTag(SeqHandler.TAGS.separator, separator);
336
- col.setTag(SeqHandler.TAGS.aligned, aligned);
336
+ if (!col.getTag(SeqHandler.TAGS.aligned))
337
+ col.setTag(SeqHandler.TAGS.aligned, aligned);
337
338
  col.setTag(SeqHandler.TAGS.alphabet, alphabet);
338
339
  if (alphabet === ALPHABET.UN) {
339
340
  // alphabetSize calculated on (sub)sample of data is incorrect
@@ -706,7 +707,8 @@ class BioPackageDetectors extends DG.Package {
706
707
  const isPotentiallyMSA = averageLength > 1 && std < 2; // if the average length is more than 1 and the std is less than 0.5, then it is potentially MSA
707
708
  column.setTag('units', notationInput.value);
708
709
  separatorInput.value && column.setTag('separator', separatorInput.value);
709
- column.setTag('aligned', isPotentiallyMSA ? 'SEQ.MSA' : 'SEQ');
710
+ if (!column.getTag('aligned'))
711
+ column.setTag('aligned', isPotentiallyMSA ? 'SEQ.MSA' : 'SEQ');
710
712
  column.setTag('alphabet', defaultAlphabet);
711
713
  isMultichar && column.setTag('.alphabetIsMultichar', 'true');
712
714
  column.semType = 'Macromolecule';
package/dist/422.js CHANGED
@@ -1,2 +1,2 @@
1
- "use strict";(self.webpackChunkbio=self.webpackChunkbio||[]).push([[422],{9422(e,t,i){i.d(t,{showLiabilityScannerDialog:()=>c});var n=i(4328),a=i(7389),o=i(6082),l=i(980),r=i(4517),d=i(3736);const s=[{id:"deamid-ng",name:"Deamidation (NG)",pattern:/NG/g,length:2,severity:r.Hq.High,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-ns",name:"Deamidation (NS)",pattern:/NS/g,length:2,severity:r.Hq.Medium,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-na",name:"Deamidation (NA)",pattern:/NA/g,length:2,severity:r.Hq.Low,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-nd",name:"Deamidation (ND)",pattern:/ND/g,length:2,severity:r.Hq.Low,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-nt",name:"Deamidation (NT)",pattern:/NT/g,length:2,severity:r.Hq.Low,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"isom-dg",name:"Isomerization (DG)",pattern:/DG/g,length:2,severity:r.Hq.High,ruleCategory:"isomerization",color:r.D5.liability.isomerization,enabled:!0},{id:"isom-ds",name:"Isomerization (DS)",pattern:/DS/g,length:2,severity:r.Hq.Medium,ruleCategory:"isomerization",color:r.D5.liability.isomerization,enabled:!0},{id:"oxid-m",name:"Oxidation (Met)",pattern:/M/g,length:1,severity:r.Hq.Medium,ruleCategory:"oxidation",color:r.D5.liability.oxidation,enabled:!0},{id:"oxid-w",name:"Oxidation (Trp)",pattern:/W/g,length:1,severity:r.Hq.Low,ruleCategory:"oxidation",color:r.D5.liability.oxidation,enabled:!0},{id:"glyco-nxst",name:"N-glycosylation",pattern:/N[^P][ST]/g,length:3,severity:r.Hq.High,ruleCategory:"glycosylation",color:r.D5.liability.glycosylation,enabled:!0},{id:"free-cys",name:"Free Cysteine",pattern:/C/g,length:1,severity:r.Hq.Info,ruleCategory:"freeCysteine",color:r.D5.liability.freeCysteine,enabled:!1}];function u(e,t){const i=e.getSplitted(t),n=new Array(i.length);for(let e=0;e<i.length;e++)n[e]=i.getOriginal(e);return n.join("")}const g={[r.Hq.High]:"High",[r.Hq.Medium]:"Medium",[r.Hq.Low]:"Low",[r.Hq.Info]:"Info"};function c(){const e=n.shell.tv?.dataFrame;if(!e)return void n.shell.warning("No table open");const t=e.columns.bySemTypeAll(o.SEMTYPE.MACROMOLECULE);if(0===t.length)return void n.shell.warning("No macromolecule columns found");const i=s.map(e=>({...e,pattern:new RegExp(e.pattern.source,"g")})),c=a.input.table("Table",{value:e}),m=a.input.column("Sequence",{table:e,value:t[0],filter:e=>e.semType===o.SEMTYPE.MACROMOLECULE}),y=[],p=a.divV([]);for(const e of i){const t=a.input.bool(e.name,{value:e.enabled,tooltipText:`Severity: ${g[e.severity]??e.severity}`});y.push({rule:e,input:t}),p.append(t.root)}const h=a.input.bool("Highlight in cell renderer",{value:!0}),b=a.input.bool("Create annotation column",{value:!0}),f=a.input.bool("Create summary count column",{value:!1}),v=a.dialog({title:"Scan Sequence Liabilities"}).add(a.inputs([c,m])).add(a.h3("Rules")).add(p).add(a.h3("Output")).add(a.inputs([h,b,f])).onOK(()=>{try{const t=m.value,a=l._package.seqHelper.getSeqHandler(t);for(const{rule:e,input:t}of y)e.enabled=t.value;const o=function(e,t,i){const n=i.filter(e=>e.enabled),a=t.posList,o=new Map,l=new Array(e.length);let d=0;for(let i=0;i<e.length;i++){const e=u(t,i),r=[];for(const t of n){let i;for(t.pattern.lastIndex=0;null!==(i=t.pattern.exec(e));)r.push({annotationId:t.id,positionIndex:i.index,positionName:i.index<a.length?a[i.index]:void 0,matchedMonomers:i[0]}),o.set(t.id,(o.get(t.id)??0)+1),d++}l[i]=r}return{annotations:n.filter(e=>o.has(e.id)).map(e=>({id:e.id,name:e.name,description:`${e.ruleCategory} liability pattern (${o.get(e.id)} hits)`,start:null,end:null,visualType:1===e.length?r.ao.Point:r.ao.Motif,category:r.eI.Liability,color:e.color,severity:e.severity,motifPattern:e.pattern.source,autoGenerated:!0})),rowData:l,totalHits:d}}(t,a,i);(b.value||h.value)&&function(e,t,i){const n=(0,d.Ln)(t).filter(e=>e.category!==r.eI.Liability);(0,d.fh)(t,[...n,...i.annotations]);const a=(0,d.Lz)(e,t);for(let e=0;e<i.rowData.length;e++){const t=(0,d.JG)(a,e)??[];(0,d.z5)(a,e,(0,d.bq)(t,i.rowData[e],!1,!0))}}(e,t,o),f.value&&function(e,t,i){const n=`${t.name}_liability_count`,a=i.rowData.map(e=>e.length),o=e.columns.addNewInt(n);for(let e=0;e<a.length;e++)o.set(e,a[e])}(e,t,o),n.shell.info(`Liability scan: ${o.totalHits} hits found across ${o.annotations.length} rules`),e.fireValuesChanged()}catch(e){n.shell.error(`Liability scan failed: ${e.message??e}`),console.error(e)}});v.show()}}}]);
1
+ "use strict";(self.webpackChunkbio=self.webpackChunkbio||[]).push([[422],{9422(e,t,i){i.d(t,{showLiabilityScannerDialog:()=>c});var n=i(4328),a=i(7389),o=i(6082),l=i(8012),r=i(4517),d=i(3736);const s=[{id:"deamid-ng",name:"Deamidation (NG)",pattern:/NG/g,length:2,severity:r.Hq.High,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-ns",name:"Deamidation (NS)",pattern:/NS/g,length:2,severity:r.Hq.Medium,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-na",name:"Deamidation (NA)",pattern:/NA/g,length:2,severity:r.Hq.Low,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-nd",name:"Deamidation (ND)",pattern:/ND/g,length:2,severity:r.Hq.Low,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"deamid-nt",name:"Deamidation (NT)",pattern:/NT/g,length:2,severity:r.Hq.Low,ruleCategory:"deamidation",color:r.D5.liability.deamidation,enabled:!0},{id:"isom-dg",name:"Isomerization (DG)",pattern:/DG/g,length:2,severity:r.Hq.High,ruleCategory:"isomerization",color:r.D5.liability.isomerization,enabled:!0},{id:"isom-ds",name:"Isomerization (DS)",pattern:/DS/g,length:2,severity:r.Hq.Medium,ruleCategory:"isomerization",color:r.D5.liability.isomerization,enabled:!0},{id:"oxid-m",name:"Oxidation (Met)",pattern:/M/g,length:1,severity:r.Hq.Medium,ruleCategory:"oxidation",color:r.D5.liability.oxidation,enabled:!0},{id:"oxid-w",name:"Oxidation (Trp)",pattern:/W/g,length:1,severity:r.Hq.Low,ruleCategory:"oxidation",color:r.D5.liability.oxidation,enabled:!0},{id:"glyco-nxst",name:"N-glycosylation",pattern:/N[^P][ST]/g,length:3,severity:r.Hq.High,ruleCategory:"glycosylation",color:r.D5.liability.glycosylation,enabled:!0},{id:"free-cys",name:"Free Cysteine",pattern:/C/g,length:1,severity:r.Hq.Info,ruleCategory:"freeCysteine",color:r.D5.liability.freeCysteine,enabled:!1}];function u(e,t){const i=e.getSplitted(t),n=new Array(i.length);for(let e=0;e<i.length;e++)n[e]=i.getOriginal(e);return n.join("")}const g={[r.Hq.High]:"High",[r.Hq.Medium]:"Medium",[r.Hq.Low]:"Low",[r.Hq.Info]:"Info"};function c(){const e=n.shell.tv?.dataFrame;if(!e)return void n.shell.warning("No table open");const t=e.columns.bySemTypeAll(o.SEMTYPE.MACROMOLECULE);if(0===t.length)return void n.shell.warning("No macromolecule columns found");const i=s.map(e=>({...e,pattern:new RegExp(e.pattern.source,"g")})),c=a.input.table("Table",{value:e}),m=a.input.column("Sequence",{table:e,value:t[0],filter:e=>e.semType===o.SEMTYPE.MACROMOLECULE}),y=[],p=a.divV([]);for(const e of i){const t=a.input.bool(e.name,{value:e.enabled,tooltipText:`Severity: ${g[e.severity]??e.severity}`});y.push({rule:e,input:t}),p.append(t.root)}const h=a.input.bool("Highlight in cell renderer",{value:!0}),b=a.input.bool("Create annotation column",{value:!0}),f=a.input.bool("Create summary count column",{value:!1}),v=a.dialog({title:"Scan Sequence Liabilities"}).add(a.inputs([c,m])).add(a.h3("Rules")).add(p).add(a.h3("Output")).add(a.inputs([h,b,f])).onOK(()=>{try{const t=m.value,a=l._package.seqHelper.getSeqHandler(t);for(const{rule:e,input:t}of y)e.enabled=t.value;const o=function(e,t,i){const n=i.filter(e=>e.enabled),a=t.posList,o=new Map,l=new Array(e.length);let d=0;for(let i=0;i<e.length;i++){const e=u(t,i),r=[];for(const t of n){let i;for(t.pattern.lastIndex=0;null!==(i=t.pattern.exec(e));)r.push({annotationId:t.id,positionIndex:i.index,positionName:i.index<a.length?a[i.index]:void 0,matchedMonomers:i[0]}),o.set(t.id,(o.get(t.id)??0)+1),d++}l[i]=r}return{annotations:n.filter(e=>o.has(e.id)).map(e=>({id:e.id,name:e.name,description:`${e.ruleCategory} liability pattern (${o.get(e.id)} hits)`,start:null,end:null,visualType:1===e.length?r.ao.Point:r.ao.Motif,category:r.eI.Liability,color:e.color,severity:e.severity,motifPattern:e.pattern.source,autoGenerated:!0})),rowData:l,totalHits:d}}(t,a,i);(b.value||h.value)&&function(e,t,i){const n=(0,d.Ln)(t).filter(e=>e.category!==r.eI.Liability);(0,d.fh)(t,[...n,...i.annotations]);const a=(0,d.Lz)(e,t);for(let e=0;e<i.rowData.length;e++){const t=(0,d.JG)(a,e)??[];(0,d.z5)(a,e,(0,d.bq)(t,i.rowData[e],!1,!0))}}(e,t,o),f.value&&function(e,t,i){const n=`${t.name}_liability_count`,a=i.rowData.map(e=>e.length),o=e.columns.addNewInt(n);for(let e=0;e<a.length;e++)o.set(e,a[e])}(e,t,o),n.shell.info(`Liability scan: ${o.totalHits} hits found across ${o.annotations.length} rules`),e.fireValuesChanged()}catch(e){n.shell.error(`Liability scan failed: ${e.message??e}`),console.error(e)}});v.show()}}}]);
2
2
  //# sourceMappingURL=422.js.map
package/dist/422.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"422.js","mappings":"+LA4BO,MAAMA,EAA2C,CACtD,CAACC,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC/L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GACjM,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,UAAWC,KAAM,qBAAsBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUK,cAAeH,SAAS,GACnM,CAACV,GAAI,UAAWC,KAAM,qBAAsBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUK,cAAeH,SAAS,GACrM,CAACV,GAAI,SAAUC,KAAM,kBAAmBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,YAAaC,MAAO,KAAkBC,UAAUM,UAAWJ,SAAS,GACxL,CAACV,GAAI,SAAUC,KAAM,kBAAmBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,YAAaC,MAAO,KAAkBC,UAAUM,UAAWJ,SAAS,GACrL,CAACV,GAAI,aAAcC,KAAM,kBAAmBC,QAAS,aAAcC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUO,cAAeL,SAAS,GAC1M,CAACV,GAAI,WAAYC,KAAM,gBAAiBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBY,KAAMV,aAAc,eAAgBC,MAAO,KAAkBC,UAAUS,aAAcP,SAAS,IAI9L,SAASQ,EAAmBC,EAAiBC,GAC3C,MAAMC,EAAWF,EAAGG,YAAYF,GAC1BG,EAAkB,IAAIC,MAAMH,EAASlB,QAC3C,IAAK,IAAIsB,EAAI,EAAGA,EAAIJ,EAASlB,OAAQsB,IACnCF,EAAME,GAAKJ,EAASK,YAAYD,GAClC,OAAOF,EAAMI,KAAK,GACpB,CCtCA,MAAMC,EAAyC,CAC7C,CAAC,KAAkBvB,MAAO,OAC1B,CAAC,KAAkBM,QAAS,SAC5B,CAAC,KAAkBC,KAAM,MACzB,CAAC,KAAkBI,MAAO,QAGrB,SAASa,IACd,MAAMC,EAAK,QAAWC,IAAIC,UAC1B,IAAKF,EAEH,YADA,QAAWG,QAAQ,iBAIrB,MAAMC,EAAUJ,EAAGK,QAAQC,aAAa,UAAWC,eACnD,GAAuB,IAAnBH,EAAQ/B,OAEV,YADA,QAAW8B,QAAQ,kCAIrB,MAAMK,EAAQvC,EAAwBwC,IAAKC,IAAM,IAAKA,EAAGtC,QAAS,IAAIuC,OAAOD,EAAEtC,QAAQwC,OAAQ,QAEzFC,EAAa,QAASC,MAAM,QAAS,CAACC,MAAOf,IAC7CgB,EAAW,QAASC,OAAO,WAAY,CAC3CH,MAAOd,EAAIe,MAAOX,EAAQ,GAC1Bc,OAASC,GAAmBA,EAAIC,UAAY,UAAWb,gBAInDc,EAAoE,GACpEC,EAAW,OAAQ,IACzB,IAAK,MAAMC,KAAQf,EAAO,CACxB,MAAMgB,EAAQ,QAASC,KAAKF,EAAKpD,KAAM,CACrC4C,MAAOQ,EAAK3C,QACZ8C,YAAa,aAAa5B,EAAeyB,EAAKjD,WAAaiD,EAAKjD,aAElE+C,EAAWM,KAAK,CAACJ,OAAMK,MAAOJ,IAC9BF,EAASO,OAAOL,EAAMM,KACxB,CAEA,MAAMC,EAAiB,QAASN,KAAK,6BAA8B,CAACV,OAAO,IACrEiB,EAAgB,QAASP,KAAK,2BAA4B,CAACV,OAAO,IAClEkB,EAAe,QAASR,KAAK,8BAA+B,CAACV,OAAO,IAEpEmB,EAAS,SAAU,CAACC,MAAO,8BAC9BC,IAAI,SAAU,CAACvB,EAAYG,KAC3BoB,IAAI,KAAM,UACVA,IAAId,GACJc,IAAI,KAAM,WACVA,IAAI,SAAU,CAACL,EAAgBC,EAAeC,KAC9CI,KAAK,KACJ,IACE,MAAMC,EAAStB,EAASD,MAClB1B,EAAK,EAAAkD,SAASC,UAAUC,cAAcH,GAG5C,IAAK,MAAM,KAACf,EAAI,MAAEK,KAAUP,EAC1BE,EAAK3C,QAAUgD,EAAMb,MAEvB,MAAM2B,EDXP,SACLvB,EACA9B,EACAmB,GAEA,MAAMmC,EAAenC,EAAMU,OAAQR,GAAMA,EAAE9B,SACrCgE,EAAUvD,EAAGuD,QAGbC,EAAgB,IAAIC,IAEpBC,EAA+B,IAAIrD,MAAMyB,EAAI9C,QACnD,IAAI2E,EAAY,EAEhB,IAAK,IAAI1D,EAAS,EAAGA,EAAS6B,EAAI9C,OAAQiB,IAAU,CAClD,MAAM2D,EAAM7D,EAAmBC,EAAIC,GAC7B4D,EAA2B,GAEjC,IAAK,MAAM3B,KAAQoB,EAAc,CAG/B,IAAIQ,EACJ,IAFA5B,EAAKnD,QAAQgF,UAAY,EAEmB,QAApCD,EAAQ5B,EAAKnD,QAAQiF,KAAKJ,KAChCC,EAAKvB,KAAK,CACR2B,aAAc/B,EAAKrD,GACnBqF,cAAeJ,EAAMK,MACrBC,aAAcN,EAAMK,MAAQZ,EAAQvE,OAASuE,EAAQO,EAAMK,YAASE,EACpEC,gBAAiBR,EAAM,KAEzBN,EAAce,IAAIrC,EAAKrD,IAAK2E,EAAcgB,IAAItC,EAAKrD,KAAO,GAAK,GAC/D8E,GAEJ,CACAD,EAAQzD,GAAU4D,CACpB,CAmBA,MAAO,CAACY,YAhB6BnB,EAClCzB,OAAQR,GAAMmC,EAAckB,IAAIrD,EAAExC,KAClCuC,IAAKC,IAAM,CACVxC,GAAIwC,EAAExC,GACNC,KAAMuC,EAAEvC,KACR6F,YAAa,GAAGtD,EAAElC,mCAAmCqE,EAAcgB,IAAInD,EAAExC,YACzE+F,MAAO,KACPC,IAAK,KACLC,WAAyB,IAAbzD,EAAErC,OAAe,KAAqB+F,MAAQ,KAAqBC,MAC/EC,SAAU,KAAmBC,UAC7B9F,MAAOiC,EAAEjC,MACTH,SAAUoC,EAAEpC,SACZkG,aAAc9D,EAAEtC,QAAQwC,OACxB6D,eAAe,KAGE1B,UAASC,YAChC,CC3CuB0B,CAAgBpC,EAAQjD,EAAImB,IAEvCwB,EAAcjB,OAASgB,EAAehB,QD4C3C,SACLf,EACAsC,EACAI,GAGA,MAAMiC,GAAW,QAAqBrC,GACnCpB,OAAQ0D,GAAMA,EAAEN,WAAa,KAAmBC,YACnD,QAAqBjC,EAAQ,IAAIqC,KAAajC,EAAOoB,cAGrD,MAAMe,GAAW,QAA4B7E,EAAIsC,GACjD,IAAK,IAAI3C,EAAI,EAAGA,EAAI+C,EAAOK,QAAQ1E,OAAQsB,IAAK,CAC9C,MAAMmF,GAAe,QAAkBD,EAAUlF,IAAM,IACvD,QAAkBkF,EAAUlF,GAAG,QAAamF,EAAcpC,EAAOK,QAAQpD,IAAI,GAAO,GACtF,CACF,CC3DUoF,CAA0B/E,EAAIsC,EAAQI,GAEpCT,EAAalB,OD4DlB,SACLf,EACAsC,EACAI,GAEA,MAAMsC,EAAU,GAAG1C,EAAOnE,uBACpB8G,EAASvC,EAAOK,QAAQtC,IAAKyC,GAASA,EAAK7E,QAC3C8C,EAAMnB,EAAGK,QAAQ6E,UAAUF,GACjC,IAAK,IAAIrF,EAAI,EAAGA,EAAIsF,EAAO5G,OAAQsB,IACjCwB,EAAIyC,IAAIjE,EAAGsF,EAAOtF,GAEtB,CCtEUwF,CAA6BnF,EAAIsC,EAAQI,GAE3C,QAAW0C,KAAK,mBAAmB1C,EAAOM,+BAA+BN,EAAOoB,YAAYzF,gBAC5F2B,EAAGqF,mBACL,CAAE,MAAOC,GACP,QAAWC,MAAM,0BAA0BD,EAAIE,SAAWF,KAC1DG,QAAQF,MAAMD,EAChB,IAGJpD,EAAOwD,MACT,C","sources":["webpack://bio/./src/utils/annotations/liability-scanner.ts","webpack://bio/./src/utils/annotations/liability-scanner-ui.ts"],"sourcesContent":["/* eslint-disable max-len */\nimport * as DG from 'datagrok-api/dg';\n\nimport {ISeqHandler} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';\nimport {\n SeqAnnotation, SeqAnnotationHit, RowAnnotationData,\n AnnotationVisualType, AnnotationCategory, LiabilitySeverity,\n ANNOTATION_COLORS,\n} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';\nimport {\n getOrCreateAnnotationColumn, setColumnAnnotations, setRowAnnotations,\n getColumnAnnotations, getRowAnnotations, mergeRowHits,\n} from './annotation-manager';\n\n/** A single liability scanning rule. */\nexport interface LiabilityRule {\n id: string;\n name: string;\n pattern: RegExp;\n length: number;\n severity: LiabilitySeverity;\n /** Sub-category for grouping (e.g. \"deamidation\", \"oxidation\") */\n ruleCategory: string;\n color: string;\n enabled: boolean;\n}\n\n/** Built-in liability rules for antibody engineering. */\nexport const BUILTIN_LIABILITY_RULES: LiabilityRule[] = [\n {id: 'deamid-ng', name: 'Deamidation (NG)', pattern: /NG/g, length: 2, severity: LiabilitySeverity.High, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-ns', name: 'Deamidation (NS)', pattern: /NS/g, length: 2, severity: LiabilitySeverity.Medium, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-na', name: 'Deamidation (NA)', pattern: /NA/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-nd', name: 'Deamidation (ND)', pattern: /ND/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-nt', name: 'Deamidation (NT)', pattern: /NT/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'isom-dg', name: 'Isomerization (DG)', pattern: /DG/g, length: 2, severity: LiabilitySeverity.High, ruleCategory: 'isomerization', color: ANNOTATION_COLORS.liability.isomerization, enabled: true},\n {id: 'isom-ds', name: 'Isomerization (DS)', pattern: /DS/g, length: 2, severity: LiabilitySeverity.Medium, ruleCategory: 'isomerization', color: ANNOTATION_COLORS.liability.isomerization, enabled: true},\n {id: 'oxid-m', name: 'Oxidation (Met)', pattern: /M/g, length: 1, severity: LiabilitySeverity.Medium, ruleCategory: 'oxidation', color: ANNOTATION_COLORS.liability.oxidation, enabled: true},\n {id: 'oxid-w', name: 'Oxidation (Trp)', pattern: /W/g, length: 1, severity: LiabilitySeverity.Low, ruleCategory: 'oxidation', color: ANNOTATION_COLORS.liability.oxidation, enabled: true},\n {id: 'glyco-nxst', name: 'N-glycosylation', pattern: /N[^P][ST]/g, length: 3, severity: LiabilitySeverity.High, ruleCategory: 'glycosylation', color: ANNOTATION_COLORS.liability.glycosylation, enabled: true},\n {id: 'free-cys', name: 'Free Cysteine', pattern: /C/g, length: 1, severity: LiabilitySeverity.Info, ruleCategory: 'freeCysteine', color: ANNOTATION_COLORS.liability.freeCysteine, enabled: false},\n];\n\n/** Extracts a canonical single-letter string from a sequence handler for a given row. */\nfunction getCanonicalString(sh: ISeqHandler, rowIdx: number): string {\n const splitted = sh.getSplitted(rowIdx);\n const chars: string[] = new Array(splitted.length);\n for (let i = 0; i < splitted.length; i++)\n chars[i] = splitted.getOriginal(i);\n return chars.join('');\n}\n\nexport interface ScanLiabilitiesResult {\n annotations: SeqAnnotation[];\n rowData: RowAnnotationData[];\n totalHits: number;\n}\n\n/** Scans all rows of a macromolecule column for liability motifs.\n * Returns column-level SeqAnnotation entries + per-row SeqAnnotationHit arrays. */\nexport function scanLiabilities(\n col: DG.Column<string>,\n sh: ISeqHandler,\n rules: LiabilityRule[],\n): ScanLiabilitiesResult {\n const enabledRules = rules.filter((r) => r.enabled);\n const posList = sh.posList;\n\n // Track which rules had hits\n const ruleHitCounts = new Map<string, number>();\n\n const rowData: RowAnnotationData[] = new Array(col.length);\n let totalHits = 0;\n\n for (let rowIdx = 0; rowIdx < col.length; rowIdx++) {\n const seq = getCanonicalString(sh, rowIdx);\n const hits: SeqAnnotationHit[] = [];\n\n for (const rule of enabledRules) {\n // Reset regex lastIndex for global patterns\n rule.pattern.lastIndex = 0;\n let match: RegExpExecArray | null;\n while ((match = rule.pattern.exec(seq)) !== null) {\n hits.push({\n annotationId: rule.id,\n positionIndex: match.index,\n positionName: match.index < posList.length ? posList[match.index] : undefined,\n matchedMonomers: match[0],\n });\n ruleHitCounts.set(rule.id, (ruleHitCounts.get(rule.id) ?? 0) + 1);\n totalHits++;\n }\n }\n rowData[rowIdx] = hits;\n }\n\n // Build column-level annotations only for rules that had hits\n const annotations: SeqAnnotation[] = enabledRules\n .filter((r) => ruleHitCounts.has(r.id))\n .map((r) => ({\n id: r.id,\n name: r.name,\n description: `${r.ruleCategory} liability pattern (${ruleHitCounts.get(r.id)} hits)`,\n start: null,\n end: null,\n visualType: r.length === 1 ? AnnotationVisualType.Point : AnnotationVisualType.Motif,\n category: AnnotationCategory.Liability,\n color: r.color,\n severity: r.severity,\n motifPattern: r.pattern.source,\n autoGenerated: true,\n }));\n\n return {annotations, rowData, totalHits};\n}\n\n/** Applies liability scan results to the DataFrame (writes tags + companion column). */\nexport function applyLiabilityScanResults(\n df: DG.DataFrame,\n seqCol: DG.Column<string>,\n result: ScanLiabilitiesResult,\n): void {\n // Merge with existing annotations, removing old liability entries\n const existing = getColumnAnnotations(seqCol)\n .filter((a) => a.category !== AnnotationCategory.Liability);\n setColumnAnnotations(seqCol, [...existing, ...result.annotations]);\n\n // Write per-row data to hidden companion column, preserving region hits from numbering\n const annotCol = getOrCreateAnnotationColumn(df, seqCol);\n for (let i = 0; i < result.rowData.length; i++) {\n const existingHits = getRowAnnotations(annotCol, i) ?? [];\n setRowAnnotations(annotCol, i, mergeRowHits(existingHits, result.rowData[i], false, true));\n }\n}\n\n/** Creates a liability summary count column (total hits per row). */\nexport function createLiabilitySummaryColumn(\n df: DG.DataFrame,\n seqCol: DG.Column<string>,\n result: ScanLiabilitiesResult,\n): DG.Column<number> {\n const colName = `${seqCol.name}_liability_count`;\n const counts = result.rowData.map((hits) => hits.length);\n const col = df.columns.addNewInt(colName);\n for (let i = 0; i < counts.length; i++)\n col.set(i, counts[i]);\n return col;\n}\n","import * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {_package} from '../../package';\nimport {\n BUILTIN_LIABILITY_RULES, LiabilityRule, scanLiabilities,\n applyLiabilityScanResults, createLiabilitySummaryColumn,\n} from './liability-scanner';\nimport {LiabilitySeverity} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';\n\nconst severityLabels: Record<string, string> = {\n [LiabilitySeverity.High]: 'High',\n [LiabilitySeverity.Medium]: 'Medium',\n [LiabilitySeverity.Low]: 'Low',\n [LiabilitySeverity.Info]: 'Info',\n};\n\nexport function showLiabilityScannerDialog(): void {\n const df = grok.shell.tv?.dataFrame;\n if (!df) {\n grok.shell.warning('No table open');\n return;\n }\n\n const seqCols = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);\n if (seqCols.length === 0) {\n grok.shell.warning('No macromolecule columns found');\n return;\n }\n\n const rules = BUILTIN_LIABILITY_RULES.map((r) => ({...r, pattern: new RegExp(r.pattern.source, 'g')}));\n\n const tableInput = ui.input.table('Table', {value: df});\n const seqInput = ui.input.column('Sequence', {\n table: df, value: seqCols[0],\n filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,\n });\n\n // Rule checkboxes\n const ruleChecks: {rule: LiabilityRule; input: DG.InputBase<boolean>}[] = [];\n const rulesDiv = ui.divV([]);\n for (const rule of rules) {\n const check = ui.input.bool(rule.name, {\n value: rule.enabled,\n tooltipText: `Severity: ${severityLabels[rule.severity] ?? rule.severity}`,\n });\n ruleChecks.push({rule, input: check});\n rulesDiv.append(check.root);\n }\n\n const highlightInput = ui.input.bool('Highlight in cell renderer', {value: true});\n const annotColInput = ui.input.bool('Create annotation column', {value: true});\n const summaryInput = ui.input.bool('Create summary count column', {value: false});\n\n const dialog = ui.dialog({title: 'Scan Sequence Liabilities'})\n .add(ui.inputs([tableInput, seqInput]))\n .add(ui.h3('Rules'))\n .add(rulesDiv)\n .add(ui.h3('Output'))\n .add(ui.inputs([highlightInput, annotColInput, summaryInput]))\n .onOK(() => {\n try {\n const seqCol = seqInput.value!;\n const sh = _package.seqHelper.getSeqHandler(seqCol);\n\n // Apply checkbox state\n for (const {rule, input} of ruleChecks)\n rule.enabled = input.value;\n\n const result = scanLiabilities(seqCol, sh, rules);\n\n if (annotColInput.value || highlightInput.value)\n applyLiabilityScanResults(df, seqCol, result);\n\n if (summaryInput.value)\n createLiabilitySummaryColumn(df, seqCol, result);\n\n grok.shell.info(`Liability scan: ${result.totalHits} hits found across ${result.annotations.length} rules`);\n df.fireValuesChanged();\n } catch (err: any) {\n grok.shell.error(`Liability scan failed: ${err.message ?? err}`);\n console.error(err);\n }\n });\n\n dialog.show();\n}\n"],"names":["BUILTIN_LIABILITY_RULES","id","name","pattern","length","severity","High","ruleCategory","color","liability","deamidation","enabled","Medium","Low","isomerization","oxidation","glycosylation","Info","freeCysteine","getCanonicalString","sh","rowIdx","splitted","getSplitted","chars","Array","i","getOriginal","join","severityLabels","showLiabilityScannerDialog","df","tv","dataFrame","warning","seqCols","columns","bySemTypeAll","MACROMOLECULE","rules","map","r","RegExp","source","tableInput","table","value","seqInput","column","filter","col","semType","ruleChecks","rulesDiv","rule","check","bool","tooltipText","push","input","append","root","highlightInput","annotColInput","summaryInput","dialog","title","add","onOK","seqCol","_package","seqHelper","getSeqHandler","result","enabledRules","posList","ruleHitCounts","Map","rowData","totalHits","seq","hits","match","lastIndex","exec","annotationId","positionIndex","index","positionName","undefined","matchedMonomers","set","get","annotations","has","description","start","end","visualType","Point","Motif","category","Liability","motifPattern","autoGenerated","scanLiabilities","existing","a","annotCol","existingHits","applyLiabilityScanResults","colName","counts","addNewInt","createLiabilitySummaryColumn","info","fireValuesChanged","err","error","message","console","show"],"sourceRoot":""}
1
+ {"version":3,"file":"422.js","mappings":"gMA4BO,MAAMA,EAA2C,CACtD,CAACC,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC/L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GACjM,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,YAAaC,KAAM,mBAAoBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,cAAeC,MAAO,KAAkBC,UAAUC,YAAaC,SAAS,GAC9L,CAACV,GAAI,UAAWC,KAAM,qBAAsBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUK,cAAeH,SAAS,GACnM,CAACV,GAAI,UAAWC,KAAM,qBAAsBC,QAAS,MAAOC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUK,cAAeH,SAAS,GACrM,CAACV,GAAI,SAAUC,KAAM,kBAAmBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBO,OAAQL,aAAc,YAAaC,MAAO,KAAkBC,UAAUM,UAAWJ,SAAS,GACxL,CAACV,GAAI,SAAUC,KAAM,kBAAmBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBQ,IAAKN,aAAc,YAAaC,MAAO,KAAkBC,UAAUM,UAAWJ,SAAS,GACrL,CAACV,GAAI,aAAcC,KAAM,kBAAmBC,QAAS,aAAcC,OAAQ,EAAGC,SAAU,KAAkBC,KAAMC,aAAc,gBAAiBC,MAAO,KAAkBC,UAAUO,cAAeL,SAAS,GAC1M,CAACV,GAAI,WAAYC,KAAM,gBAAiBC,QAAS,KAAMC,OAAQ,EAAGC,SAAU,KAAkBY,KAAMV,aAAc,eAAgBC,MAAO,KAAkBC,UAAUS,aAAcP,SAAS,IAI9L,SAASQ,EAAmBC,EAAiBC,GAC3C,MAAMC,EAAWF,EAAGG,YAAYF,GAC1BG,EAAkB,IAAIC,MAAMH,EAASlB,QAC3C,IAAK,IAAIsB,EAAI,EAAGA,EAAIJ,EAASlB,OAAQsB,IACnCF,EAAME,GAAKJ,EAASK,YAAYD,GAClC,OAAOF,EAAMI,KAAK,GACpB,CCtCA,MAAMC,EAAyC,CAC7C,CAAC,KAAkBvB,MAAO,OAC1B,CAAC,KAAkBM,QAAS,SAC5B,CAAC,KAAkBC,KAAM,MACzB,CAAC,KAAkBI,MAAO,QAGrB,SAASa,IACd,MAAMC,EAAK,QAAWC,IAAIC,UAC1B,IAAKF,EAEH,YADA,QAAWG,QAAQ,iBAIrB,MAAMC,EAAUJ,EAAGK,QAAQC,aAAa,UAAWC,eACnD,GAAuB,IAAnBH,EAAQ/B,OAEV,YADA,QAAW8B,QAAQ,kCAIrB,MAAMK,EAAQvC,EAAwBwC,IAAKC,IAAM,IAAKA,EAAGtC,QAAS,IAAIuC,OAAOD,EAAEtC,QAAQwC,OAAQ,QAEzFC,EAAa,QAASC,MAAM,QAAS,CAACC,MAAOf,IAC7CgB,EAAW,QAASC,OAAO,WAAY,CAC3CH,MAAOd,EAAIe,MAAOX,EAAQ,GAC1Bc,OAASC,GAAmBA,EAAIC,UAAY,UAAWb,gBAInDc,EAAoE,GACpEC,EAAW,OAAQ,IACzB,IAAK,MAAMC,KAAQf,EAAO,CACxB,MAAMgB,EAAQ,QAASC,KAAKF,EAAKpD,KAAM,CACrC4C,MAAOQ,EAAK3C,QACZ8C,YAAa,aAAa5B,EAAeyB,EAAKjD,WAAaiD,EAAKjD,aAElE+C,EAAWM,KAAK,CAACJ,OAAMK,MAAOJ,IAC9BF,EAASO,OAAOL,EAAMM,KACxB,CAEA,MAAMC,EAAiB,QAASN,KAAK,6BAA8B,CAACV,OAAO,IACrEiB,EAAgB,QAASP,KAAK,2BAA4B,CAACV,OAAO,IAClEkB,EAAe,QAASR,KAAK,8BAA+B,CAACV,OAAO,IAEpEmB,EAAS,SAAU,CAACC,MAAO,8BAC9BC,IAAI,SAAU,CAACvB,EAAYG,KAC3BoB,IAAI,KAAM,UACVA,IAAId,GACJc,IAAI,KAAM,WACVA,IAAI,SAAU,CAACL,EAAgBC,EAAeC,KAC9CI,KAAK,KACJ,IACE,MAAMC,EAAStB,EAASD,MAClB1B,EAAK,EAAAkD,SAASC,UAAUC,cAAcH,GAG5C,IAAK,MAAM,KAACf,EAAI,MAAEK,KAAUP,EAC1BE,EAAK3C,QAAUgD,EAAMb,MAEvB,MAAM2B,EDXP,SACLvB,EACA9B,EACAmB,GAEA,MAAMmC,EAAenC,EAAMU,OAAQR,GAAMA,EAAE9B,SACrCgE,EAAUvD,EAAGuD,QAGbC,EAAgB,IAAIC,IAEpBC,EAA+B,IAAIrD,MAAMyB,EAAI9C,QACnD,IAAI2E,EAAY,EAEhB,IAAK,IAAI1D,EAAS,EAAGA,EAAS6B,EAAI9C,OAAQiB,IAAU,CAClD,MAAM2D,EAAM7D,EAAmBC,EAAIC,GAC7B4D,EAA2B,GAEjC,IAAK,MAAM3B,KAAQoB,EAAc,CAG/B,IAAIQ,EACJ,IAFA5B,EAAKnD,QAAQgF,UAAY,EAEmB,QAApCD,EAAQ5B,EAAKnD,QAAQiF,KAAKJ,KAChCC,EAAKvB,KAAK,CACR2B,aAAc/B,EAAKrD,GACnBqF,cAAeJ,EAAMK,MACrBC,aAAcN,EAAMK,MAAQZ,EAAQvE,OAASuE,EAAQO,EAAMK,YAASE,EACpEC,gBAAiBR,EAAM,KAEzBN,EAAce,IAAIrC,EAAKrD,IAAK2E,EAAcgB,IAAItC,EAAKrD,KAAO,GAAK,GAC/D8E,GAEJ,CACAD,EAAQzD,GAAU4D,CACpB,CAmBA,MAAO,CAACY,YAhB6BnB,EAClCzB,OAAQR,GAAMmC,EAAckB,IAAIrD,EAAExC,KAClCuC,IAAKC,IAAM,CACVxC,GAAIwC,EAAExC,GACNC,KAAMuC,EAAEvC,KACR6F,YAAa,GAAGtD,EAAElC,mCAAmCqE,EAAcgB,IAAInD,EAAExC,YACzE+F,MAAO,KACPC,IAAK,KACLC,WAAyB,IAAbzD,EAAErC,OAAe,KAAqB+F,MAAQ,KAAqBC,MAC/EC,SAAU,KAAmBC,UAC7B9F,MAAOiC,EAAEjC,MACTH,SAAUoC,EAAEpC,SACZkG,aAAc9D,EAAEtC,QAAQwC,OACxB6D,eAAe,KAGE1B,UAASC,YAChC,CC3CuB0B,CAAgBpC,EAAQjD,EAAImB,IAEvCwB,EAAcjB,OAASgB,EAAehB,QD4C3C,SACLf,EACAsC,EACAI,GAGA,MAAMiC,GAAW,QAAqBrC,GACnCpB,OAAQ0D,GAAMA,EAAEN,WAAa,KAAmBC,YACnD,QAAqBjC,EAAQ,IAAIqC,KAAajC,EAAOoB,cAGrD,MAAMe,GAAW,QAA4B7E,EAAIsC,GACjD,IAAK,IAAI3C,EAAI,EAAGA,EAAI+C,EAAOK,QAAQ1E,OAAQsB,IAAK,CAC9C,MAAMmF,GAAe,QAAkBD,EAAUlF,IAAM,IACvD,QAAkBkF,EAAUlF,GAAG,QAAamF,EAAcpC,EAAOK,QAAQpD,IAAI,GAAO,GACtF,CACF,CC3DUoF,CAA0B/E,EAAIsC,EAAQI,GAEpCT,EAAalB,OD4DlB,SACLf,EACAsC,EACAI,GAEA,MAAMsC,EAAU,GAAG1C,EAAOnE,uBACpB8G,EAASvC,EAAOK,QAAQtC,IAAKyC,GAASA,EAAK7E,QAC3C8C,EAAMnB,EAAGK,QAAQ6E,UAAUF,GACjC,IAAK,IAAIrF,EAAI,EAAGA,EAAIsF,EAAO5G,OAAQsB,IACjCwB,EAAIyC,IAAIjE,EAAGsF,EAAOtF,GAEtB,CCtEUwF,CAA6BnF,EAAIsC,EAAQI,GAE3C,QAAW0C,KAAK,mBAAmB1C,EAAOM,+BAA+BN,EAAOoB,YAAYzF,gBAC5F2B,EAAGqF,mBACL,CAAE,MAAOC,GACP,QAAWC,MAAM,0BAA0BD,EAAIE,SAAWF,KAC1DG,QAAQF,MAAMD,EAChB,IAGJpD,EAAOwD,MACT,C","sources":["webpack://bio/./src/utils/annotations/liability-scanner.ts","webpack://bio/./src/utils/annotations/liability-scanner-ui.ts"],"sourcesContent":["/* eslint-disable max-len */\nimport * as DG from 'datagrok-api/dg';\n\nimport {ISeqHandler} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';\nimport {\n SeqAnnotation, SeqAnnotationHit, RowAnnotationData,\n AnnotationVisualType, AnnotationCategory, LiabilitySeverity,\n ANNOTATION_COLORS,\n} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';\nimport {\n getOrCreateAnnotationColumn, setColumnAnnotations, setRowAnnotations,\n getColumnAnnotations, getRowAnnotations, mergeRowHits,\n} from './annotation-manager';\n\n/** A single liability scanning rule. */\nexport interface LiabilityRule {\n id: string;\n name: string;\n pattern: RegExp;\n length: number;\n severity: LiabilitySeverity;\n /** Sub-category for grouping (e.g. \"deamidation\", \"oxidation\") */\n ruleCategory: string;\n color: string;\n enabled: boolean;\n}\n\n/** Built-in liability rules for antibody engineering. */\nexport const BUILTIN_LIABILITY_RULES: LiabilityRule[] = [\n {id: 'deamid-ng', name: 'Deamidation (NG)', pattern: /NG/g, length: 2, severity: LiabilitySeverity.High, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-ns', name: 'Deamidation (NS)', pattern: /NS/g, length: 2, severity: LiabilitySeverity.Medium, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-na', name: 'Deamidation (NA)', pattern: /NA/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-nd', name: 'Deamidation (ND)', pattern: /ND/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'deamid-nt', name: 'Deamidation (NT)', pattern: /NT/g, length: 2, severity: LiabilitySeverity.Low, ruleCategory: 'deamidation', color: ANNOTATION_COLORS.liability.deamidation, enabled: true},\n {id: 'isom-dg', name: 'Isomerization (DG)', pattern: /DG/g, length: 2, severity: LiabilitySeverity.High, ruleCategory: 'isomerization', color: ANNOTATION_COLORS.liability.isomerization, enabled: true},\n {id: 'isom-ds', name: 'Isomerization (DS)', pattern: /DS/g, length: 2, severity: LiabilitySeverity.Medium, ruleCategory: 'isomerization', color: ANNOTATION_COLORS.liability.isomerization, enabled: true},\n {id: 'oxid-m', name: 'Oxidation (Met)', pattern: /M/g, length: 1, severity: LiabilitySeverity.Medium, ruleCategory: 'oxidation', color: ANNOTATION_COLORS.liability.oxidation, enabled: true},\n {id: 'oxid-w', name: 'Oxidation (Trp)', pattern: /W/g, length: 1, severity: LiabilitySeverity.Low, ruleCategory: 'oxidation', color: ANNOTATION_COLORS.liability.oxidation, enabled: true},\n {id: 'glyco-nxst', name: 'N-glycosylation', pattern: /N[^P][ST]/g, length: 3, severity: LiabilitySeverity.High, ruleCategory: 'glycosylation', color: ANNOTATION_COLORS.liability.glycosylation, enabled: true},\n {id: 'free-cys', name: 'Free Cysteine', pattern: /C/g, length: 1, severity: LiabilitySeverity.Info, ruleCategory: 'freeCysteine', color: ANNOTATION_COLORS.liability.freeCysteine, enabled: false},\n];\n\n/** Extracts a canonical single-letter string from a sequence handler for a given row. */\nfunction getCanonicalString(sh: ISeqHandler, rowIdx: number): string {\n const splitted = sh.getSplitted(rowIdx);\n const chars: string[] = new Array(splitted.length);\n for (let i = 0; i < splitted.length; i++)\n chars[i] = splitted.getOriginal(i);\n return chars.join('');\n}\n\nexport interface ScanLiabilitiesResult {\n annotations: SeqAnnotation[];\n rowData: RowAnnotationData[];\n totalHits: number;\n}\n\n/** Scans all rows of a macromolecule column for liability motifs.\n * Returns column-level SeqAnnotation entries + per-row SeqAnnotationHit arrays. */\nexport function scanLiabilities(\n col: DG.Column<string>,\n sh: ISeqHandler,\n rules: LiabilityRule[],\n): ScanLiabilitiesResult {\n const enabledRules = rules.filter((r) => r.enabled);\n const posList = sh.posList;\n\n // Track which rules had hits\n const ruleHitCounts = new Map<string, number>();\n\n const rowData: RowAnnotationData[] = new Array(col.length);\n let totalHits = 0;\n\n for (let rowIdx = 0; rowIdx < col.length; rowIdx++) {\n const seq = getCanonicalString(sh, rowIdx);\n const hits: SeqAnnotationHit[] = [];\n\n for (const rule of enabledRules) {\n // Reset regex lastIndex for global patterns\n rule.pattern.lastIndex = 0;\n let match: RegExpExecArray | null;\n while ((match = rule.pattern.exec(seq)) !== null) {\n hits.push({\n annotationId: rule.id,\n positionIndex: match.index,\n positionName: match.index < posList.length ? posList[match.index] : undefined,\n matchedMonomers: match[0],\n });\n ruleHitCounts.set(rule.id, (ruleHitCounts.get(rule.id) ?? 0) + 1);\n totalHits++;\n }\n }\n rowData[rowIdx] = hits;\n }\n\n // Build column-level annotations only for rules that had hits\n const annotations: SeqAnnotation[] = enabledRules\n .filter((r) => ruleHitCounts.has(r.id))\n .map((r) => ({\n id: r.id,\n name: r.name,\n description: `${r.ruleCategory} liability pattern (${ruleHitCounts.get(r.id)} hits)`,\n start: null,\n end: null,\n visualType: r.length === 1 ? AnnotationVisualType.Point : AnnotationVisualType.Motif,\n category: AnnotationCategory.Liability,\n color: r.color,\n severity: r.severity,\n motifPattern: r.pattern.source,\n autoGenerated: true,\n }));\n\n return {annotations, rowData, totalHits};\n}\n\n/** Applies liability scan results to the DataFrame (writes tags + companion column). */\nexport function applyLiabilityScanResults(\n df: DG.DataFrame,\n seqCol: DG.Column<string>,\n result: ScanLiabilitiesResult,\n): void {\n // Merge with existing annotations, removing old liability entries\n const existing = getColumnAnnotations(seqCol)\n .filter((a) => a.category !== AnnotationCategory.Liability);\n setColumnAnnotations(seqCol, [...existing, ...result.annotations]);\n\n // Write per-row data to hidden companion column, preserving region hits from numbering\n const annotCol = getOrCreateAnnotationColumn(df, seqCol);\n for (let i = 0; i < result.rowData.length; i++) {\n const existingHits = getRowAnnotations(annotCol, i) ?? [];\n setRowAnnotations(annotCol, i, mergeRowHits(existingHits, result.rowData[i], false, true));\n }\n}\n\n/** Creates a liability summary count column (total hits per row). */\nexport function createLiabilitySummaryColumn(\n df: DG.DataFrame,\n seqCol: DG.Column<string>,\n result: ScanLiabilitiesResult,\n): DG.Column<number> {\n const colName = `${seqCol.name}_liability_count`;\n const counts = result.rowData.map((hits) => hits.length);\n const col = df.columns.addNewInt(colName);\n for (let i = 0; i < counts.length; i++)\n col.set(i, counts[i]);\n return col;\n}\n","import * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {_package} from '../../package';\nimport {\n BUILTIN_LIABILITY_RULES, LiabilityRule, scanLiabilities,\n applyLiabilityScanResults, createLiabilitySummaryColumn,\n} from './liability-scanner';\nimport {LiabilitySeverity} from '@datagrok-libraries/bio/src/utils/macromolecule/annotations';\n\nconst severityLabels: Record<string, string> = {\n [LiabilitySeverity.High]: 'High',\n [LiabilitySeverity.Medium]: 'Medium',\n [LiabilitySeverity.Low]: 'Low',\n [LiabilitySeverity.Info]: 'Info',\n};\n\nexport function showLiabilityScannerDialog(): void {\n const df = grok.shell.tv?.dataFrame;\n if (!df) {\n grok.shell.warning('No table open');\n return;\n }\n\n const seqCols = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);\n if (seqCols.length === 0) {\n grok.shell.warning('No macromolecule columns found');\n return;\n }\n\n const rules = BUILTIN_LIABILITY_RULES.map((r) => ({...r, pattern: new RegExp(r.pattern.source, 'g')}));\n\n const tableInput = ui.input.table('Table', {value: df});\n const seqInput = ui.input.column('Sequence', {\n table: df, value: seqCols[0],\n filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,\n });\n\n // Rule checkboxes\n const ruleChecks: {rule: LiabilityRule; input: DG.InputBase<boolean>}[] = [];\n const rulesDiv = ui.divV([]);\n for (const rule of rules) {\n const check = ui.input.bool(rule.name, {\n value: rule.enabled,\n tooltipText: `Severity: ${severityLabels[rule.severity] ?? rule.severity}`,\n });\n ruleChecks.push({rule, input: check});\n rulesDiv.append(check.root);\n }\n\n const highlightInput = ui.input.bool('Highlight in cell renderer', {value: true});\n const annotColInput = ui.input.bool('Create annotation column', {value: true});\n const summaryInput = ui.input.bool('Create summary count column', {value: false});\n\n const dialog = ui.dialog({title: 'Scan Sequence Liabilities'})\n .add(ui.inputs([tableInput, seqInput]))\n .add(ui.h3('Rules'))\n .add(rulesDiv)\n .add(ui.h3('Output'))\n .add(ui.inputs([highlightInput, annotColInput, summaryInput]))\n .onOK(() => {\n try {\n const seqCol = seqInput.value!;\n const sh = _package.seqHelper.getSeqHandler(seqCol);\n\n // Apply checkbox state\n for (const {rule, input} of ruleChecks)\n rule.enabled = input.value;\n\n const result = scanLiabilities(seqCol, sh, rules);\n\n if (annotColInput.value || highlightInput.value)\n applyLiabilityScanResults(df, seqCol, result);\n\n if (summaryInput.value)\n createLiabilitySummaryColumn(df, seqCol, result);\n\n grok.shell.info(`Liability scan: ${result.totalHits} hits found across ${result.annotations.length} rules`);\n df.fireValuesChanged();\n } catch (err: any) {\n grok.shell.error(`Liability scan failed: ${err.message ?? err}`);\n console.error(err);\n }\n });\n\n dialog.show();\n}\n"],"names":["BUILTIN_LIABILITY_RULES","id","name","pattern","length","severity","High","ruleCategory","color","liability","deamidation","enabled","Medium","Low","isomerization","oxidation","glycosylation","Info","freeCysteine","getCanonicalString","sh","rowIdx","splitted","getSplitted","chars","Array","i","getOriginal","join","severityLabels","showLiabilityScannerDialog","df","tv","dataFrame","warning","seqCols","columns","bySemTypeAll","MACROMOLECULE","rules","map","r","RegExp","source","tableInput","table","value","seqInput","column","filter","col","semType","ruleChecks","rulesDiv","rule","check","bool","tooltipText","push","input","append","root","highlightInput","annotColInput","summaryInput","dialog","title","add","onOK","seqCol","_package","seqHelper","getSeqHandler","result","enabledRules","posList","ruleHitCounts","Map","rowData","totalHits","seq","hits","match","lastIndex","exec","annotationId","positionIndex","index","positionName","undefined","matchedMonomers","set","get","annotations","has","description","start","end","visualType","Point","Motif","category","Liability","motifPattern","autoGenerated","scanLiabilities","existing","a","annotCol","existingHits","applyLiabilityScanResults","colName","counts","addNewInt","createLiabilitySummaryColumn","info","fireValuesChanged","err","error","message","console","show"],"sourceRoot":""}