@datagrok/bio 2.26.8 → 2.27.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/CLAUDE.md +35 -5
- package/detectors.js +4 -2
- package/dist/287.js +1 -1
- package/dist/287.js.map +1 -1
- package/dist/422.js +1 -1
- package/dist/422.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/package.json +2 -2
- package/scripts/mol-to-helm.py +642 -170
- package/src/analysis/sequence-activity-cliffs.ts +8 -6
- package/src/package-api.ts +9 -2
- package/src/package.g.ts +12 -0
- package/src/package.ts +23 -9
- package/src/tests/msa-tests.ts +6 -2
- package/src/utils/annotations/annotation-manager-ui.ts +1 -1
- package/src/utils/constants.ts +3 -7
- package/src/utils/monomer-lib/library-file-manager/ui.ts +1 -1
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +1 -1
- package/src/utils/multiple-sequence-alignment-ui.ts +403 -194
- package/src/utils/pepsea.ts +138 -116
- package/src/utils/types.ts +7 -4
- package/test-console-output-1.log +584 -615
- package/test-record-1.mp4 +0 -0
|
@@ -181,12 +181,14 @@ export function createDifferencesWithPositions(
|
|
|
181
181
|
}
|
|
182
182
|
|
|
183
183
|
export function createLinesGrid(df: DG.DataFrame, colNames: string[]): DG.Grid {
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
184
|
+
if (!df.col('seq_diff')) {
|
|
185
|
+
const seqDiffCol = DG.Column.string('seq_diff', df.rowCount)
|
|
186
|
+
.init((i) => `${df.get(colNames[0], i)}#${df.get(colNames[1], i)}`);
|
|
187
|
+
seqDiffCol.semType = 'MacromoleculeDifference';
|
|
188
|
+
seqDiffCol.meta.units = df.col(colNames[0])!.meta.units;
|
|
189
|
+
seqDiffCol.setTag(bioTAGS.separator, df.col(colNames[0])!.getTag(bioTAGS.separator));
|
|
190
|
+
df.columns.add(seqDiffCol);
|
|
191
|
+
}
|
|
190
192
|
const grid = df.plot.grid();
|
|
191
193
|
grid.col(colNames[0])!.visible = false;
|
|
192
194
|
grid.col(colNames[1])!.visible = false;
|
package/src/package-api.ts
CHANGED
|
@@ -15,8 +15,8 @@ export namespace scripts {
|
|
|
15
15
|
/**
|
|
16
16
|
Converts molecules to HELM notation based on monomer library
|
|
17
17
|
*/
|
|
18
|
-
export async function molToHelmConverterPy(moleculesDataframe: DG.DataFrame , moleculesColumn: DG.Column ,
|
|
19
|
-
return await grok.functions.call('Bio:MolToHelmConverterPy', { moleculesDataframe, moleculesColumn,
|
|
18
|
+
export async function molToHelmConverterPy(moleculesDataframe: DG.DataFrame , moleculesColumn: DG.Column , libraryFile: DG.FileInfo ): Promise<DG.DataFrame> {
|
|
19
|
+
return await grok.functions.call('Bio:MolToHelmConverterPy', { moleculesDataframe, moleculesColumn, libraryFile });
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
/**
|
|
@@ -277,6 +277,13 @@ export namespace funcs {
|
|
|
277
277
|
return await grok.functions.call('Bio:AlignSequences', { sequenceCol, clustersCol, options });
|
|
278
278
|
}
|
|
279
279
|
|
|
280
|
+
/**
|
|
281
|
+
Aligns non-canonical peptide sequences using PepSeA Docker container (MAFFT)
|
|
282
|
+
*/
|
|
283
|
+
export async function pepseaMsa(sequenceCol: DG.Column , method: string , gapOpen: number , gapExtend: number ): Promise<DG.Column> {
|
|
284
|
+
return await grok.functions.call('Bio:PepseaMsa', { sequenceCol, method, gapOpen, gapExtend });
|
|
285
|
+
}
|
|
286
|
+
|
|
280
287
|
/**
|
|
281
288
|
Visualizes sequence composition on a WebLogo plot
|
|
282
289
|
*/
|
package/src/package.g.ts
CHANGED
|
@@ -444,6 +444,18 @@ export async function alignSequences(sequenceCol: any, clustersCol: any, options
|
|
|
444
444
|
return await PackageFunctions.alignSequences(sequenceCol, clustersCol, options);
|
|
445
445
|
}
|
|
446
446
|
|
|
447
|
+
//name: PepSeA
|
|
448
|
+
//description: Aligns non-canonical peptide sequences using PepSeA Docker container (MAFFT)
|
|
449
|
+
//input: column sequenceCol { semType: Macromolecule }
|
|
450
|
+
//input: string method = 'mafft --auto' { choices: ["mafft --auto","mafft","linsi","ginsi","einsi","fftns","fftnsi","nwns","nwnsi"] }
|
|
451
|
+
//input: double gapOpen = 1.53
|
|
452
|
+
//input: double gapExtend = 0
|
|
453
|
+
//output: column result
|
|
454
|
+
//meta.role: sequenceMSA
|
|
455
|
+
export async function pepseaMsa(sequenceCol: DG.Column<any>, method: string, gapOpen: number, gapExtend: number) : Promise<any> {
|
|
456
|
+
return await PackageFunctions.pepseaMsa(sequenceCol, method, gapOpen, gapExtend);
|
|
457
|
+
}
|
|
458
|
+
|
|
447
459
|
//name: Composition Analysis
|
|
448
460
|
//description: Visualizes sequence composition on a WebLogo plot
|
|
449
461
|
//output: viewer result
|
package/src/package.ts
CHANGED
|
@@ -33,6 +33,7 @@ import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
|
|
|
33
33
|
import {ISeqHandler, SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
34
34
|
import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
35
35
|
|
|
36
|
+
import {checkCurrentView} from '@datagrok-libraries/utils/src/view-utils';
|
|
36
37
|
import {getMacromoleculeColumns} from './utils/ui-utils';
|
|
37
38
|
import {MacromoleculeDifferenceCellRenderer, MacromoleculeSequenceCellRenderer,} from './utils/cell-renderer';
|
|
38
39
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
@@ -58,6 +59,7 @@ import {demoToAtomicLevel} from './demo/bio03-atomic-level';
|
|
|
58
59
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
59
60
|
import {MsaWarning} from './utils/multiple-sequence-alignment';
|
|
60
61
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
62
|
+
import {alignWithPepsea, pepseaMethods} from './utils/pepsea';
|
|
61
63
|
import {WebLogoApp} from './apps/web-logo-app';
|
|
62
64
|
import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
|
|
63
65
|
import {splitToMonomersUI} from './utils/split-to-monomers';
|
|
@@ -527,10 +529,7 @@ export class PackageFunctions {
|
|
|
527
529
|
@grok.decorators.param({type: 'object', options: {optional: true}}) options?: (IUMAPOptions | ITSNEOptions) & Options,
|
|
528
530
|
@grok.decorators.param({options: {optional: true}}) demo?: boolean): Promise<DG.Viewer | undefined> {
|
|
529
531
|
//workaround for functions which add viewers to tableView (can be run only on active table view)
|
|
530
|
-
|
|
531
|
-
grok.shell.error(`Table ${table.name} is not a current table view`);
|
|
532
|
-
return;
|
|
533
|
-
}
|
|
532
|
+
checkCurrentView(table);
|
|
534
533
|
if (!checkInputColumnUI(molecules, 'Activity Cliffs'))
|
|
535
534
|
return;
|
|
536
535
|
|
|
@@ -556,6 +555,7 @@ export class PackageFunctions {
|
|
|
556
555
|
axesNames: axesNames,
|
|
557
556
|
}).call(undefined, undefined, {processed: false});
|
|
558
557
|
|
|
558
|
+
checkCurrentView(table);
|
|
559
559
|
const view = grok.shell.tv;
|
|
560
560
|
|
|
561
561
|
const description = `Molecules: ${molecules.name}, activities: ${activities.name}, method: ${methodName}, ${options ? `options: ${JSON.stringify(options)},` : ``} similarity: ${similarityMetric}, similarity cutoff: ${similarity}`;
|
|
@@ -733,10 +733,7 @@ export class PackageFunctions {
|
|
|
733
733
|
@grok.decorators.param({options: {optional: true}}) isDemo?: boolean
|
|
734
734
|
): Promise<DG.ScatterPlotViewer | undefined> {
|
|
735
735
|
//workaround for functions which add viewers to tableView (can be run only on active table view)
|
|
736
|
-
|
|
737
|
-
grok.shell.error(`Table ${table.name} is not a current table view`);
|
|
738
|
-
return;
|
|
739
|
-
}
|
|
736
|
+
checkCurrentView(table);
|
|
740
737
|
if (!checkInputColumnUI(molecules, 'Sequence Space'))
|
|
741
738
|
return;
|
|
742
739
|
const clusterColName = table.columns.getUnusedName('Cluster (DBSCAN)');
|
|
@@ -755,6 +752,7 @@ export class PackageFunctions {
|
|
|
755
752
|
|
|
756
753
|
let res: DG.ScatterPlotViewer | undefined;
|
|
757
754
|
if (plotEmbeddings) {
|
|
755
|
+
checkCurrentView(table);
|
|
758
756
|
const tv = grok.shell.tv;
|
|
759
757
|
res = tv.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
760
758
|
const description = `Molecules column: ${molecules.name}, method: ${methodName}, ${options ? `options: ${JSON.stringify(options)},` : ``} similarity: ${similarityMetric}`;
|
|
@@ -812,7 +810,8 @@ export class PackageFunctions {
|
|
|
812
810
|
// collect current monomer library
|
|
813
811
|
const monomerLib = _package.monomerLib;
|
|
814
812
|
const libJSON = JSON.stringify(monomerLib.toJSON());
|
|
815
|
-
|
|
813
|
+
const fileInfo = DG.FileInfo.fromString('monomerLib.json', libJSON);
|
|
814
|
+
await api.scripts.molToHelmConverterPy(table, molecules, fileInfo);
|
|
816
815
|
|
|
817
816
|
// semtype is not automatically set, so we set it manually
|
|
818
817
|
const newCol = table.columns.toList().find((c) => c.name.toLowerCase().includes('regenerated sequence') && c.semType !== DG.SEMTYPE.MACROMOLECULE);
|
|
@@ -978,6 +977,21 @@ export class PackageFunctions {
|
|
|
978
977
|
return multipleSequenceAlignmentUI({col: sequenceCol, clustersCol: clustersCol, ...options}, _package.seqHelper);
|
|
979
978
|
}
|
|
980
979
|
|
|
980
|
+
@grok.decorators.func({
|
|
981
|
+
name: 'PepSeA',
|
|
982
|
+
description: 'Aligns non-canonical peptide sequences using PepSeA Docker container (MAFFT)',
|
|
983
|
+
meta: {role: 'sequenceMSA'},
|
|
984
|
+
outputs: [{name: 'result', type: 'column'}],
|
|
985
|
+
})
|
|
986
|
+
static async pepseaMsa(
|
|
987
|
+
@grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) sequenceCol: DG.Column<string>,
|
|
988
|
+
@grok.decorators.param({type: 'string', options: {choices: ['mafft --auto', 'mafft', 'linsi', 'ginsi', 'einsi', 'fftns', 'fftnsi', 'nwns', 'nwnsi'], initialValue: 'mafft --auto'}}) method: string = 'mafft --auto',
|
|
989
|
+
@grok.decorators.param({type: 'double', options: {initialValue: '1.53'}}) gapOpen: number = 1.53,
|
|
990
|
+
@grok.decorators.param({type: 'double', options: {initialValue: '0'}}) gapExtend: number = 0,
|
|
991
|
+
): Promise<DG.Column<string>> {
|
|
992
|
+
return alignWithPepsea(sequenceCol, method, gapOpen, gapExtend);
|
|
993
|
+
}
|
|
994
|
+
|
|
981
995
|
@grok.decorators.func({
|
|
982
996
|
name: 'Composition Analysis',
|
|
983
997
|
description: 'Visualizes sequence composition on a WebLogo plot',
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -108,7 +108,7 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
|
|
|
108
108
|
|
|
109
109
|
async function _testMSAOnColumn(
|
|
110
110
|
srcCsv: string, tgtCsv: string,
|
|
111
|
-
srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET,
|
|
111
|
+
srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, engineMethod?: string,
|
|
112
112
|
): Promise<void> {
|
|
113
113
|
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
|
|
114
114
|
await grok.data.detectSemanticTypes(srcDf);
|
|
@@ -121,7 +121,11 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
|
|
|
121
121
|
if (alphabet)
|
|
122
122
|
expect(srcCol.getTag(bioTAGS.alphabet), alphabet);
|
|
123
123
|
|
|
124
|
-
const msaSeqCol = await multipleSequenceAlignmentUI({
|
|
124
|
+
const msaSeqCol = await multipleSequenceAlignmentUI({
|
|
125
|
+
col: srcCol,
|
|
126
|
+
engine: engineMethod ? 'PepSeA' : undefined,
|
|
127
|
+
engineParams: engineMethod ? {method: engineMethod} : undefined,
|
|
128
|
+
}, seqHelper);
|
|
125
129
|
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
126
130
|
expect(msaSeqCol.meta.units, tgtNotation);
|
|
127
131
|
expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
@@ -63,7 +63,7 @@ export function showAnnotationManagerDialog(): void {
|
|
|
63
63
|
setColumnAnnotations(selectedCol, updated);
|
|
64
64
|
df.fireValuesChanged();
|
|
65
65
|
refreshList();
|
|
66
|
-
});
|
|
66
|
+
}, 'Delete');
|
|
67
67
|
removeBtn.style.cursor = 'pointer';
|
|
68
68
|
removeBtn.style.color = '#999';
|
|
69
69
|
removeBtn.style.marginLeft = '8px';
|
package/src/utils/constants.ts
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import {pepseaMethods} from './pepsea';
|
|
2
|
-
|
|
3
1
|
export enum COLUMNS_NAMES {
|
|
4
2
|
SPLIT_COL = '~split',
|
|
5
3
|
ACTIVITY = '~activity',
|
|
@@ -64,14 +62,12 @@ export namespace PEPSEA {
|
|
|
64
62
|
export const kalignVersion = '3.3.1';
|
|
65
63
|
|
|
66
64
|
export const msaDefaultOptions = {
|
|
67
|
-
pepsea: {
|
|
68
|
-
gapOpen: 1.53,
|
|
69
|
-
gapExtend: 0,
|
|
70
|
-
method: pepseaMethods[0],
|
|
71
|
-
},
|
|
72
65
|
kalign: {
|
|
73
66
|
gapOpen: -1.0,
|
|
74
67
|
gapExtend: -1.0,
|
|
75
68
|
terminalGap: -1.0,
|
|
76
69
|
},
|
|
77
70
|
} as const;
|
|
71
|
+
|
|
72
|
+
/** meta.role value for dynamically discovered MSA engine functions */
|
|
73
|
+
export const MSA_ENGINE_ROLE = 'sequenceMSA';
|
|
@@ -198,7 +198,7 @@ class LibraryControlsManager {
|
|
|
198
198
|
updateLibrarySelectionStatus(libInput.value, libFileName);
|
|
199
199
|
}});
|
|
200
200
|
ui.tooltip.bind(libInput.root, `Include monomers from ${libFileName}`);
|
|
201
|
-
const deleteIcon = ui.iconFA('trash-alt', () => this.promptForLibraryDeletion(libFileName));
|
|
201
|
+
const deleteIcon = ui.iconFA('trash-alt', () => this.promptForLibraryDeletion(libFileName), 'Delete');
|
|
202
202
|
const editIcon = ui.icons.edit(async () => {
|
|
203
203
|
grok.shell.v = await (await MonomerManager.getInstance()).getViewRoot(libFileName);
|
|
204
204
|
}, 'Edit monomer library');
|
|
@@ -486,7 +486,7 @@ export class MonomerManager implements IMonomerManager {
|
|
|
486
486
|
}
|
|
487
487
|
const monomer = await monomerFromDfRow(this.tv!.dataFrame.rows.get(currentRowIdx));
|
|
488
488
|
await this._newMonomerForm.removeMonomers([monomer], this.libInput.value!);
|
|
489
|
-
});
|
|
489
|
+
}, 'Delete');
|
|
490
490
|
|
|
491
491
|
ui.tooltip.bind(deleteButton, () =>
|
|
492
492
|
`${(this.tv?.dataFrame?.selection?.trueCount ?? 0) > 0 ? 'Delete selected monomers' : 'Delete monomer'}`);
|