@datagrok/bio 2.26.8 → 2.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.26.8",
8
+ "version": "2.27.0",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -50,7 +50,7 @@
50
50
  "@datagrok-libraries/ml": "^6.10.11",
51
51
  "@datagrok-libraries/test": "^1.1.0",
52
52
  "@datagrok-libraries/tutorials": "^1.7.4",
53
- "@datagrok-libraries/utils": "^4.6.9",
53
+ "@datagrok-libraries/utils": "^4.7.2",
54
54
  "@webgpu/types": "^0.1.40",
55
55
  "ajv": "^8.12.0",
56
56
  "ajv-errors": "^3.0.0",
@@ -277,6 +277,13 @@ export namespace funcs {
277
277
  return await grok.functions.call('Bio:AlignSequences', { sequenceCol, clustersCol, options });
278
278
  }
279
279
 
280
+ /**
281
+ Aligns non-canonical peptide sequences using PepSeA Docker container (MAFFT)
282
+ */
283
+ export async function pepseaMsa(sequenceCol: DG.Column , method: string , gapOpen: number , gapExtend: number ): Promise<DG.Column> {
284
+ return await grok.functions.call('Bio:PepseaMsa', { sequenceCol, method, gapOpen, gapExtend });
285
+ }
286
+
280
287
  /**
281
288
  Visualizes sequence composition on a WebLogo plot
282
289
  */
package/src/package.g.ts CHANGED
@@ -444,6 +444,18 @@ export async function alignSequences(sequenceCol: any, clustersCol: any, options
444
444
  return await PackageFunctions.alignSequences(sequenceCol, clustersCol, options);
445
445
  }
446
446
 
447
+ //name: PepSeA
448
+ //description: Aligns non-canonical peptide sequences using PepSeA Docker container (MAFFT)
449
+ //input: column sequenceCol { semType: Macromolecule }
450
+ //input: string method = 'mafft --auto' { choices: ["mafft --auto","mafft","linsi","ginsi","einsi","fftns","fftnsi","nwns","nwnsi"] }
451
+ //input: double gapOpen = 1.53
452
+ //input: double gapExtend = 0
453
+ //output: column result
454
+ //meta.role: sequenceMSA
455
+ export async function pepseaMsa(sequenceCol: DG.Column<any>, method: string, gapOpen: number, gapExtend: number) : Promise<any> {
456
+ return await PackageFunctions.pepseaMsa(sequenceCol, method, gapOpen, gapExtend);
457
+ }
458
+
447
459
  //name: Composition Analysis
448
460
  //description: Visualizes sequence composition on a WebLogo plot
449
461
  //output: viewer result
package/src/package.ts CHANGED
@@ -33,6 +33,7 @@ import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
33
33
  import {ISeqHandler, SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
34
34
  import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
35
35
 
36
+ import {checkCurrentView} from '@datagrok-libraries/utils/src/view-utils';
36
37
  import {getMacromoleculeColumns} from './utils/ui-utils';
37
38
  import {MacromoleculeDifferenceCellRenderer, MacromoleculeSequenceCellRenderer,} from './utils/cell-renderer';
38
39
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
@@ -58,6 +59,7 @@ import {demoToAtomicLevel} from './demo/bio03-atomic-level';
58
59
  import {checkInputColumnUI} from './utils/check-input-column';
59
60
  import {MsaWarning} from './utils/multiple-sequence-alignment';
60
61
  import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
62
+ import {alignWithPepsea, pepseaMethods} from './utils/pepsea';
61
63
  import {WebLogoApp} from './apps/web-logo-app';
62
64
  import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
63
65
  import {splitToMonomersUI} from './utils/split-to-monomers';
@@ -527,10 +529,7 @@ export class PackageFunctions {
527
529
  @grok.decorators.param({type: 'object', options: {optional: true}}) options?: (IUMAPOptions | ITSNEOptions) & Options,
528
530
  @grok.decorators.param({options: {optional: true}}) demo?: boolean): Promise<DG.Viewer | undefined> {
529
531
  //workaround for functions which add viewers to tableView (can be run only on active table view)
530
- if (table.name !== grok.shell.tv.dataFrame.name) {
531
- grok.shell.error(`Table ${table.name} is not a current table view`);
532
- return;
533
- }
532
+ checkCurrentView(table);
534
533
  if (!checkInputColumnUI(molecules, 'Activity Cliffs'))
535
534
  return;
536
535
 
@@ -556,6 +555,7 @@ export class PackageFunctions {
556
555
  axesNames: axesNames,
557
556
  }).call(undefined, undefined, {processed: false});
558
557
 
558
+ checkCurrentView(table);
559
559
  const view = grok.shell.tv;
560
560
 
561
561
  const description = `Molecules: ${molecules.name}, activities: ${activities.name}, method: ${methodName}, ${options ? `options: ${JSON.stringify(options)},` : ``} similarity: ${similarityMetric}, similarity cutoff: ${similarity}`;
@@ -733,10 +733,7 @@ export class PackageFunctions {
733
733
  @grok.decorators.param({options: {optional: true}}) isDemo?: boolean
734
734
  ): Promise<DG.ScatterPlotViewer | undefined> {
735
735
  //workaround for functions which add viewers to tableView (can be run only on active table view)
736
- if (table.name !== grok.shell.tv.dataFrame.name) {
737
- grok.shell.error(`Table ${table.name} is not a current table view`);
738
- return;
739
- }
736
+ checkCurrentView(table);
740
737
  if (!checkInputColumnUI(molecules, 'Sequence Space'))
741
738
  return;
742
739
  const clusterColName = table.columns.getUnusedName('Cluster (DBSCAN)');
@@ -755,6 +752,7 @@ export class PackageFunctions {
755
752
 
756
753
  let res: DG.ScatterPlotViewer | undefined;
757
754
  if (plotEmbeddings) {
755
+ checkCurrentView(table);
758
756
  const tv = grok.shell.tv;
759
757
  res = tv.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
760
758
  const description = `Molecules column: ${molecules.name}, method: ${methodName}, ${options ? `options: ${JSON.stringify(options)},` : ``} similarity: ${similarityMetric}`;
@@ -978,6 +976,21 @@ export class PackageFunctions {
978
976
  return multipleSequenceAlignmentUI({col: sequenceCol, clustersCol: clustersCol, ...options}, _package.seqHelper);
979
977
  }
980
978
 
979
+ @grok.decorators.func({
980
+ name: 'PepSeA',
981
+ description: 'Aligns non-canonical peptide sequences using PepSeA Docker container (MAFFT)',
982
+ meta: {role: 'sequenceMSA'},
983
+ outputs: [{name: 'result', type: 'column'}],
984
+ })
985
+ static async pepseaMsa(
986
+ @grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) sequenceCol: DG.Column<string>,
987
+ @grok.decorators.param({type: 'string', options: {choices: ['mafft --auto', 'mafft', 'linsi', 'ginsi', 'einsi', 'fftns', 'fftnsi', 'nwns', 'nwnsi'], initialValue: 'mafft --auto'}}) method: string = 'mafft --auto',
988
+ @grok.decorators.param({type: 'double', options: {initialValue: '1.53'}}) gapOpen: number = 1.53,
989
+ @grok.decorators.param({type: 'double', options: {initialValue: '0'}}) gapExtend: number = 0,
990
+ ): Promise<DG.Column<string>> {
991
+ return alignWithPepsea(sequenceCol, method, gapOpen, gapExtend);
992
+ }
993
+
981
994
  @grok.decorators.func({
982
995
  name: 'Composition Analysis',
983
996
  description: 'Visualizes sequence composition on a WebLogo plot',
@@ -108,7 +108,7 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
108
108
 
109
109
  async function _testMSAOnColumn(
110
110
  srcCsv: string, tgtCsv: string,
111
- srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, pepseaMethod?: string,
111
+ srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, engineMethod?: string,
112
112
  ): Promise<void> {
113
113
  const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
114
114
  await grok.data.detectSemanticTypes(srcDf);
@@ -121,7 +121,11 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
121
121
  if (alphabet)
122
122
  expect(srcCol.getTag(bioTAGS.alphabet), alphabet);
123
123
 
124
- const msaSeqCol = await multipleSequenceAlignmentUI({col: srcCol, pepsea: {method: pepseaMethod}}, seqHelper);
124
+ const msaSeqCol = await multipleSequenceAlignmentUI({
125
+ col: srcCol,
126
+ engine: engineMethod ? 'PepSeA' : undefined,
127
+ engineParams: engineMethod ? {method: engineMethod} : undefined,
128
+ }, seqHelper);
125
129
  expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
126
130
  expect(msaSeqCol.meta.units, tgtNotation);
127
131
  expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
@@ -1,5 +1,3 @@
1
- import {pepseaMethods} from './pepsea';
2
-
3
1
  export enum COLUMNS_NAMES {
4
2
  SPLIT_COL = '~split',
5
3
  ACTIVITY = '~activity',
@@ -64,14 +62,12 @@ export namespace PEPSEA {
64
62
  export const kalignVersion = '3.3.1';
65
63
 
66
64
  export const msaDefaultOptions = {
67
- pepsea: {
68
- gapOpen: 1.53,
69
- gapExtend: 0,
70
- method: pepseaMethods[0],
71
- },
72
65
  kalign: {
73
66
  gapOpen: -1.0,
74
67
  gapExtend: -1.0,
75
68
  terminalGap: -1.0,
76
69
  },
77
70
  } as const;
71
+
72
+ /** meta.role value for dynamically discovered MSA engine functions */
73
+ export const MSA_ENGINE_ROLE = 'sequenceMSA';