@datagrok/bio 2.26.8 → 2.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/CLAUDE.md +35 -5
- package/detectors.js +4 -2
- package/dist/422.js +1 -1
- package/dist/422.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/package.json +2 -2
- package/src/package-api.ts +7 -0
- package/src/package.g.ts +12 -0
- package/src/package.ts +21 -8
- package/src/tests/msa-tests.ts +6 -2
- package/src/utils/constants.ts +3 -7
- package/src/utils/multiple-sequence-alignment-ui.ts +403 -194
- package/src/utils/pepsea.ts +138 -116
- package/src/utils/types.ts +7 -4
- package/test-console-output-1.log +564 -603
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.27.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"@datagrok-libraries/ml": "^6.10.11",
|
|
51
51
|
"@datagrok-libraries/test": "^1.1.0",
|
|
52
52
|
"@datagrok-libraries/tutorials": "^1.7.4",
|
|
53
|
-
"@datagrok-libraries/utils": "^4.
|
|
53
|
+
"@datagrok-libraries/utils": "^4.7.2",
|
|
54
54
|
"@webgpu/types": "^0.1.40",
|
|
55
55
|
"ajv": "^8.12.0",
|
|
56
56
|
"ajv-errors": "^3.0.0",
|
package/src/package-api.ts
CHANGED
|
@@ -277,6 +277,13 @@ export namespace funcs {
|
|
|
277
277
|
return await grok.functions.call('Bio:AlignSequences', { sequenceCol, clustersCol, options });
|
|
278
278
|
}
|
|
279
279
|
|
|
280
|
+
/**
|
|
281
|
+
Aligns non-canonical peptide sequences using PepSeA Docker container (MAFFT)
|
|
282
|
+
*/
|
|
283
|
+
export async function pepseaMsa(sequenceCol: DG.Column , method: string , gapOpen: number , gapExtend: number ): Promise<DG.Column> {
|
|
284
|
+
return await grok.functions.call('Bio:PepseaMsa', { sequenceCol, method, gapOpen, gapExtend });
|
|
285
|
+
}
|
|
286
|
+
|
|
280
287
|
/**
|
|
281
288
|
Visualizes sequence composition on a WebLogo plot
|
|
282
289
|
*/
|
package/src/package.g.ts
CHANGED
|
@@ -444,6 +444,18 @@ export async function alignSequences(sequenceCol: any, clustersCol: any, options
|
|
|
444
444
|
return await PackageFunctions.alignSequences(sequenceCol, clustersCol, options);
|
|
445
445
|
}
|
|
446
446
|
|
|
447
|
+
//name: PepSeA
|
|
448
|
+
//description: Aligns non-canonical peptide sequences using PepSeA Docker container (MAFFT)
|
|
449
|
+
//input: column sequenceCol { semType: Macromolecule }
|
|
450
|
+
//input: string method = 'mafft --auto' { choices: ["mafft --auto","mafft","linsi","ginsi","einsi","fftns","fftnsi","nwns","nwnsi"] }
|
|
451
|
+
//input: double gapOpen = 1.53
|
|
452
|
+
//input: double gapExtend = 0
|
|
453
|
+
//output: column result
|
|
454
|
+
//meta.role: sequenceMSA
|
|
455
|
+
export async function pepseaMsa(sequenceCol: DG.Column<any>, method: string, gapOpen: number, gapExtend: number) : Promise<any> {
|
|
456
|
+
return await PackageFunctions.pepseaMsa(sequenceCol, method, gapOpen, gapExtend);
|
|
457
|
+
}
|
|
458
|
+
|
|
447
459
|
//name: Composition Analysis
|
|
448
460
|
//description: Visualizes sequence composition on a WebLogo plot
|
|
449
461
|
//output: viewer result
|
package/src/package.ts
CHANGED
|
@@ -33,6 +33,7 @@ import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
|
|
|
33
33
|
import {ISeqHandler, SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
34
34
|
import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
35
35
|
|
|
36
|
+
import {checkCurrentView} from '@datagrok-libraries/utils/src/view-utils';
|
|
36
37
|
import {getMacromoleculeColumns} from './utils/ui-utils';
|
|
37
38
|
import {MacromoleculeDifferenceCellRenderer, MacromoleculeSequenceCellRenderer,} from './utils/cell-renderer';
|
|
38
39
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
@@ -58,6 +59,7 @@ import {demoToAtomicLevel} from './demo/bio03-atomic-level';
|
|
|
58
59
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
59
60
|
import {MsaWarning} from './utils/multiple-sequence-alignment';
|
|
60
61
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
62
|
+
import {alignWithPepsea, pepseaMethods} from './utils/pepsea';
|
|
61
63
|
import {WebLogoApp} from './apps/web-logo-app';
|
|
62
64
|
import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
|
|
63
65
|
import {splitToMonomersUI} from './utils/split-to-monomers';
|
|
@@ -527,10 +529,7 @@ export class PackageFunctions {
|
|
|
527
529
|
@grok.decorators.param({type: 'object', options: {optional: true}}) options?: (IUMAPOptions | ITSNEOptions) & Options,
|
|
528
530
|
@grok.decorators.param({options: {optional: true}}) demo?: boolean): Promise<DG.Viewer | undefined> {
|
|
529
531
|
//workaround for functions which add viewers to tableView (can be run only on active table view)
|
|
530
|
-
|
|
531
|
-
grok.shell.error(`Table ${table.name} is not a current table view`);
|
|
532
|
-
return;
|
|
533
|
-
}
|
|
532
|
+
checkCurrentView(table);
|
|
534
533
|
if (!checkInputColumnUI(molecules, 'Activity Cliffs'))
|
|
535
534
|
return;
|
|
536
535
|
|
|
@@ -556,6 +555,7 @@ export class PackageFunctions {
|
|
|
556
555
|
axesNames: axesNames,
|
|
557
556
|
}).call(undefined, undefined, {processed: false});
|
|
558
557
|
|
|
558
|
+
checkCurrentView(table);
|
|
559
559
|
const view = grok.shell.tv;
|
|
560
560
|
|
|
561
561
|
const description = `Molecules: ${molecules.name}, activities: ${activities.name}, method: ${methodName}, ${options ? `options: ${JSON.stringify(options)},` : ``} similarity: ${similarityMetric}, similarity cutoff: ${similarity}`;
|
|
@@ -733,10 +733,7 @@ export class PackageFunctions {
|
|
|
733
733
|
@grok.decorators.param({options: {optional: true}}) isDemo?: boolean
|
|
734
734
|
): Promise<DG.ScatterPlotViewer | undefined> {
|
|
735
735
|
//workaround for functions which add viewers to tableView (can be run only on active table view)
|
|
736
|
-
|
|
737
|
-
grok.shell.error(`Table ${table.name} is not a current table view`);
|
|
738
|
-
return;
|
|
739
|
-
}
|
|
736
|
+
checkCurrentView(table);
|
|
740
737
|
if (!checkInputColumnUI(molecules, 'Sequence Space'))
|
|
741
738
|
return;
|
|
742
739
|
const clusterColName = table.columns.getUnusedName('Cluster (DBSCAN)');
|
|
@@ -755,6 +752,7 @@ export class PackageFunctions {
|
|
|
755
752
|
|
|
756
753
|
let res: DG.ScatterPlotViewer | undefined;
|
|
757
754
|
if (plotEmbeddings) {
|
|
755
|
+
checkCurrentView(table);
|
|
758
756
|
const tv = grok.shell.tv;
|
|
759
757
|
res = tv.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
760
758
|
const description = `Molecules column: ${molecules.name}, method: ${methodName}, ${options ? `options: ${JSON.stringify(options)},` : ``} similarity: ${similarityMetric}`;
|
|
@@ -978,6 +976,21 @@ export class PackageFunctions {
|
|
|
978
976
|
return multipleSequenceAlignmentUI({col: sequenceCol, clustersCol: clustersCol, ...options}, _package.seqHelper);
|
|
979
977
|
}
|
|
980
978
|
|
|
979
|
+
@grok.decorators.func({
|
|
980
|
+
name: 'PepSeA',
|
|
981
|
+
description: 'Aligns non-canonical peptide sequences using PepSeA Docker container (MAFFT)',
|
|
982
|
+
meta: {role: 'sequenceMSA'},
|
|
983
|
+
outputs: [{name: 'result', type: 'column'}],
|
|
984
|
+
})
|
|
985
|
+
static async pepseaMsa(
|
|
986
|
+
@grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) sequenceCol: DG.Column<string>,
|
|
987
|
+
@grok.decorators.param({type: 'string', options: {choices: ['mafft --auto', 'mafft', 'linsi', 'ginsi', 'einsi', 'fftns', 'fftnsi', 'nwns', 'nwnsi'], initialValue: 'mafft --auto'}}) method: string = 'mafft --auto',
|
|
988
|
+
@grok.decorators.param({type: 'double', options: {initialValue: '1.53'}}) gapOpen: number = 1.53,
|
|
989
|
+
@grok.decorators.param({type: 'double', options: {initialValue: '0'}}) gapExtend: number = 0,
|
|
990
|
+
): Promise<DG.Column<string>> {
|
|
991
|
+
return alignWithPepsea(sequenceCol, method, gapOpen, gapExtend);
|
|
992
|
+
}
|
|
993
|
+
|
|
981
994
|
@grok.decorators.func({
|
|
982
995
|
name: 'Composition Analysis',
|
|
983
996
|
description: 'Visualizes sequence composition on a WebLogo plot',
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -108,7 +108,7 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
|
|
|
108
108
|
|
|
109
109
|
async function _testMSAOnColumn(
|
|
110
110
|
srcCsv: string, tgtCsv: string,
|
|
111
|
-
srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET,
|
|
111
|
+
srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, engineMethod?: string,
|
|
112
112
|
): Promise<void> {
|
|
113
113
|
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
|
|
114
114
|
await grok.data.detectSemanticTypes(srcDf);
|
|
@@ -121,7 +121,11 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
|
|
|
121
121
|
if (alphabet)
|
|
122
122
|
expect(srcCol.getTag(bioTAGS.alphabet), alphabet);
|
|
123
123
|
|
|
124
|
-
const msaSeqCol = await multipleSequenceAlignmentUI({
|
|
124
|
+
const msaSeqCol = await multipleSequenceAlignmentUI({
|
|
125
|
+
col: srcCol,
|
|
126
|
+
engine: engineMethod ? 'PepSeA' : undefined,
|
|
127
|
+
engineParams: engineMethod ? {method: engineMethod} : undefined,
|
|
128
|
+
}, seqHelper);
|
|
125
129
|
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
126
130
|
expect(msaSeqCol.meta.units, tgtNotation);
|
|
127
131
|
expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
package/src/utils/constants.ts
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import {pepseaMethods} from './pepsea';
|
|
2
|
-
|
|
3
1
|
export enum COLUMNS_NAMES {
|
|
4
2
|
SPLIT_COL = '~split',
|
|
5
3
|
ACTIVITY = '~activity',
|
|
@@ -64,14 +62,12 @@ export namespace PEPSEA {
|
|
|
64
62
|
export const kalignVersion = '3.3.1';
|
|
65
63
|
|
|
66
64
|
export const msaDefaultOptions = {
|
|
67
|
-
pepsea: {
|
|
68
|
-
gapOpen: 1.53,
|
|
69
|
-
gapExtend: 0,
|
|
70
|
-
method: pepseaMethods[0],
|
|
71
|
-
},
|
|
72
65
|
kalign: {
|
|
73
66
|
gapOpen: -1.0,
|
|
74
67
|
gapExtend: -1.0,
|
|
75
68
|
terminalGap: -1.0,
|
|
76
69
|
},
|
|
77
70
|
} as const;
|
|
71
|
+
|
|
72
|
+
/** meta.role value for dynamically discovered MSA engine functions */
|
|
73
|
+
export const MSA_ENGINE_ROLE = 'sequenceMSA';
|