@datagrok/bio 2.1.11 → 2.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -12
- package/css/helm.css +10 -0
- package/detectors.js +83 -59
- package/dist/package-test.js +2 -68651
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -66040
- package/dist/package.js.map +1 -0
- package/dockerfiles/Dockerfile +86 -0
- package/files/icons/composition-analysis.svg +17 -0
- package/files/icons/sequence-diversity-viewer.svg +4 -0
- package/files/icons/sequence-similarity-viewer.svg +4 -0
- package/files/icons/vdregions-viewer.svg +22 -0
- package/files/icons/weblogo-viewer.svg +7 -0
- package/files/tests/testUrl.csv +11 -0
- package/files/tests/toAtomicLevelTest.csv +4 -0
- package/package.json +29 -32
- package/src/analysis/sequence-activity-cliffs.ts +15 -13
- package/src/analysis/sequence-diversity-viewer.ts +3 -2
- package/src/analysis/sequence-search-base-viewer.ts +4 -2
- package/src/analysis/sequence-similarity-viewer.ts +4 -4
- package/src/analysis/sequence-space.ts +2 -1
- package/src/calculations/monomerLevelMols.ts +6 -6
- package/src/package-test.ts +9 -2
- package/src/package.ts +230 -145
- package/src/substructure-search/substructure-search.ts +25 -22
- package/src/tests/Palettes-test.ts +9 -9
- package/src/tests/WebLogo-positions-test.ts +131 -68
- package/src/tests/_first-tests.ts +9 -0
- package/src/tests/activity-cliffs-tests.ts +8 -7
- package/src/tests/activity-cliffs-utils.ts +17 -9
- package/src/tests/bio-tests.ts +30 -21
- package/src/tests/checkInputColumn-tests.ts +17 -17
- package/src/tests/converters-test.ts +81 -46
- package/src/tests/detectors-benchmark-tests.ts +17 -17
- package/src/tests/detectors-tests.ts +190 -178
- package/src/tests/fasta-export-tests.ts +2 -3
- package/src/tests/monomer-libraries-tests.ts +34 -0
- package/src/tests/pepsea-tests.ts +21 -0
- package/src/tests/renderers-test.ts +33 -29
- package/src/tests/sequence-space-test.ts +6 -4
- package/src/tests/similarity-diversity-tests.ts +4 -4
- package/src/tests/splitters-test.ts +6 -7
- package/src/tests/substructure-filters-tests.ts +23 -1
- package/src/tests/utils/sequences-generators.ts +7 -7
- package/src/tests/utils.ts +2 -1
- package/src/tests/viewers.ts +16 -0
- package/src/utils/cell-renderer.ts +116 -54
- package/src/utils/constants.ts +7 -6
- package/src/utils/convert.ts +17 -11
- package/src/utils/monomer-lib.ts +174 -0
- package/src/utils/multiple-sequence-alignment.ts +49 -26
- package/src/utils/pepsea.ts +78 -0
- package/src/utils/save-as-fasta.ts +9 -8
- package/src/utils/ui-utils.ts +15 -3
- package/src/viewers/vd-regions-viewer.ts +125 -83
- package/src/viewers/web-logo-viewer.ts +1031 -0
- package/src/widgets/bio-substructure-filter.ts +38 -24
- package/tsconfig.json +71 -72
- package/webpack.config.js +4 -11
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -8988
- package/jest.config.js +0 -33
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -98
- package/test-Bio-91c83d8913ff-bb573307.html +0 -392
package/src/package.ts
CHANGED
|
@@ -7,24 +7,14 @@ export const _package = new DG.Package();
|
|
|
7
7
|
|
|
8
8
|
import {MacromoleculeDifferenceCellRenderer, MonomerCellRenderer} from './utils/cell-renderer';
|
|
9
9
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
10
|
-
import {runKalign
|
|
11
|
-
import {SequenceAlignment
|
|
12
|
-
import {getEmbeddingColsNames,
|
|
10
|
+
import {runKalign} from './utils/multiple-sequence-alignment';
|
|
11
|
+
import {SequenceAlignment} from './seq_align';
|
|
12
|
+
import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
|
|
13
13
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
14
14
|
import {
|
|
15
|
-
createLinesGrid,
|
|
16
|
-
createPropPanelElement,
|
|
17
|
-
createTooltipElement,
|
|
18
|
-
getChemSimilaritiesMarix,
|
|
19
|
-
getSimilaritiesMarix
|
|
15
|
+
createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
|
|
20
16
|
} from './analysis/sequence-activity-cliffs';
|
|
21
|
-
import {
|
|
22
|
-
createJsonMonomerLibFromSdf,
|
|
23
|
-
encodeMonomers,
|
|
24
|
-
getMolfilesFromSeq
|
|
25
|
-
} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
26
17
|
import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
|
|
27
|
-
import {getMacroMol} from './utils/atomic-works';
|
|
28
18
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
29
19
|
import {convert} from './utils/convert';
|
|
30
20
|
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
@@ -36,23 +26,39 @@ import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter'
|
|
|
36
26
|
import * as C from './utils/constants';
|
|
37
27
|
import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
38
28
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
39
|
-
import {
|
|
29
|
+
import {substructureSearchDialog} from './substructure-search/substructure-search';
|
|
40
30
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
41
31
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
42
|
-
import {getMonomericMols} from './calculations/monomerLevelMols';
|
|
43
32
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
44
|
-
import {from, Observable, Subject} from 'rxjs';
|
|
45
33
|
import {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
34
|
+
getStats,
|
|
35
|
+
NOTATION,
|
|
36
|
+
splitterAsHelm,
|
|
37
|
+
TAGS as bioTAGS,
|
|
38
|
+
ALPHABET
|
|
39
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
40
|
+
import {pepseaMethods, runPepsea} from './utils/pepsea';
|
|
41
|
+
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
42
|
+
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
43
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
44
|
+
import {WebLogoViewer} from './viewers/web-logo-viewer';
|
|
45
|
+
import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
46
|
+
import {LIB_PATH, LIB_STORAGE_NAME, MonomerLibHelper} from './utils/monomer-lib';
|
|
47
|
+
import {getMacromoleculeColumn} from './utils/ui-utils';
|
|
48
|
+
import {IUMAPOptions, ITSNEOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
49
|
+
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
50
|
+
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
51
|
+
|
|
52
|
+
// /** Avoid reassinging {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
|
|
53
|
+
// let monomerLib: MonomerLib | null = null;
|
|
54
|
+
|
|
55
|
+
//name: getMonomerLibHelper
|
|
56
|
+
//description:
|
|
57
|
+
//output: object result
|
|
58
|
+
export function getMonomerLibHelper(): IMonomerLibHelper {
|
|
59
|
+
return MonomerLibHelper.instance;
|
|
60
|
+
}
|
|
54
61
|
|
|
55
|
-
let monomerLib: IMonomerLib | null = null;
|
|
56
62
|
export let hydrophobPalette: SeqPaletteCustom | null = null;
|
|
57
63
|
|
|
58
64
|
export class SeqPaletteCustom implements SeqPalette {
|
|
@@ -67,16 +73,21 @@ export class SeqPaletteCustom implements SeqPalette {
|
|
|
67
73
|
}
|
|
68
74
|
}
|
|
69
75
|
|
|
76
|
+
// let loadLibrariesPromise: Promise<void> = Promise.resolve();
|
|
77
|
+
|
|
70
78
|
//tags: init
|
|
71
79
|
export async function initBio() {
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
80
|
+
// loadLibrariesPromise = loadLibrariesPromise.then(() => {
|
|
81
|
+
await MonomerLibHelper.instance.loadLibraries(); // from initBio()
|
|
82
|
+
// });
|
|
83
|
+
// await loadLibrariesPromise;
|
|
84
|
+
const monomerLib = MonomerLibHelper.instance.getBioLib();
|
|
85
|
+
const monomers: string[] = [];
|
|
86
|
+
const logPs: number[] = [];
|
|
75
87
|
const module = await grok.functions.call('Chem:getRdKitModule');
|
|
76
88
|
|
|
77
|
-
|
|
78
89
|
const series = monomerLib!.getMonomerMolsByType('PEPTIDE')!;
|
|
79
|
-
Object.keys(series).forEach(symbol => {
|
|
90
|
+
Object.keys(series).forEach((symbol) => {
|
|
80
91
|
monomers.push(symbol);
|
|
81
92
|
const block = series[symbol].replaceAll('#R', 'O ');
|
|
82
93
|
const mol = module.get_mol(block);
|
|
@@ -88,38 +99,34 @@ export async function initBio() {
|
|
|
88
99
|
const sum = logPs.reduce((a, b) => a + b, 0);
|
|
89
100
|
const avg = (sum / logPs.length) || 0;
|
|
90
101
|
|
|
91
|
-
|
|
92
|
-
for (let i = 0; i < monomers.length; i++)
|
|
102
|
+
const palette: { [monomer: string]: string } = {};
|
|
103
|
+
for (let i = 0; i < monomers.length; i++)
|
|
93
104
|
palette[monomers[i]] = logPs[i] < avg ? '#4682B4' : '#DC143C';
|
|
94
|
-
}
|
|
95
105
|
|
|
96
106
|
hydrophobPalette = new SeqPaletteCustom(palette);
|
|
97
107
|
}
|
|
98
108
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
else {
|
|
115
|
-
monomerLib!.update(await readLibrary(LIB_PATH, value));
|
|
116
|
-
}
|
|
109
|
+
//name: sequenceTooltip
|
|
110
|
+
//tags: tooltip
|
|
111
|
+
//input: column col {semType: Macromolecule}
|
|
112
|
+
//output: widget result
|
|
113
|
+
export async function sequenceTooltip(col: DG.Column): Promise<DG.Widget<any>> {
|
|
114
|
+
const tv = grok.shell.tv;
|
|
115
|
+
const viewer = await tv.dataFrame.plot.fromType('WebLogo', {
|
|
116
|
+
sequenceColumnName: col.name,
|
|
117
|
+
backgroundColor: 0xFFfdffe5,
|
|
118
|
+
fitArea: false,
|
|
119
|
+
positionHeight: 'Entropy',
|
|
120
|
+
fixWidth: true
|
|
121
|
+
});
|
|
122
|
+
viewer.root.style.height = '50px';
|
|
123
|
+
return viewer;
|
|
117
124
|
}
|
|
118
125
|
|
|
119
126
|
//name: getBioLib
|
|
120
127
|
//output: object monomerLib
|
|
121
|
-
export function getBioLib(): IMonomerLib
|
|
122
|
-
return
|
|
128
|
+
export function getBioLib(): IMonomerLib {
|
|
129
|
+
return MonomerLibHelper.instance.getBioLib();
|
|
123
130
|
}
|
|
124
131
|
|
|
125
132
|
//name: manageFiles
|
|
@@ -132,30 +139,38 @@ export async function manageFiles() {
|
|
|
132
139
|
}
|
|
133
140
|
|
|
134
141
|
//name: Manage Libraries
|
|
135
|
-
//tags: panel, widgets
|
|
136
142
|
//input: column seqColumn {semType: Macromolecule}
|
|
143
|
+
//tags: panel, exclude-actions-panel
|
|
137
144
|
//output: widget result
|
|
138
145
|
export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
|
|
139
146
|
//@ts-ignore
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
147
|
+
const filesButton: HTMLButtonElement = ui.button('Manage', manageFiles);
|
|
148
|
+
const divInputs: HTMLDivElement = ui.div();
|
|
149
|
+
const libFileNameList: string[] = (await grok.dapi.files.list(`${LIB_PATH}`, false, ''))
|
|
150
|
+
.map((it) => it.fileName);
|
|
151
|
+
const librariesUserSettingsSet: Set<string> = new Set<string>(Object.keys(
|
|
152
|
+
await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true)));
|
|
153
|
+
|
|
154
|
+
let userStoragePromise: Promise<void> = Promise.resolve();
|
|
155
|
+
for (const libFileName of libFileNameList) {
|
|
156
|
+
const libInput: DG.InputBase<boolean | null> = ui.boolInput(libFileName, librariesUserSettingsSet.has(libFileName),
|
|
157
|
+
() => {
|
|
158
|
+
userStoragePromise = userStoragePromise.then(async () => {
|
|
159
|
+
if (libInput.value == true) {
|
|
160
|
+
// Save checked library to user settings 'Libraries'
|
|
161
|
+
await grok.dapi.userDataStorage.postValue(LIB_STORAGE_NAME, libFileName, libFileName, true);
|
|
162
|
+
await MonomerLibHelper.instance.loadLibraries(); // from libraryPanel()
|
|
163
|
+
} else {
|
|
164
|
+
// Remove unchecked library from user settings 'Libraries'
|
|
165
|
+
await grok.dapi.userDataStorage.remove(LIB_STORAGE_NAME, libFileName, true);
|
|
166
|
+
await MonomerLibHelper.instance.loadLibraries(true); // from libraryPanel()
|
|
167
|
+
}
|
|
168
|
+
grok.shell.info('Monomer library user settings saved.');
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
divInputs.append(libInput.root);
|
|
158
172
|
}
|
|
173
|
+
|
|
159
174
|
return new DG.Widget(ui.splitV([
|
|
160
175
|
divInputs,
|
|
161
176
|
ui.divV([filesButton])
|
|
@@ -198,11 +213,10 @@ export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCe
|
|
|
198
213
|
}
|
|
199
214
|
|
|
200
215
|
|
|
201
|
-
function checkInputColumnUi(
|
|
202
|
-
|
|
203
|
-
): boolean {
|
|
216
|
+
function checkInputColumnUi(col: DG.Column, name: string, allowedNotations: string[] = [],
|
|
217
|
+
allowedAlphabets: string[] = [], notify: boolean = true): boolean {
|
|
204
218
|
const [res, msg]: [boolean, string] = checkInputColumn(col, name, allowedNotations, allowedAlphabets);
|
|
205
|
-
if (!res)
|
|
219
|
+
if (notify && !res)
|
|
206
220
|
grok.shell.warning(msg);
|
|
207
221
|
return res;
|
|
208
222
|
}
|
|
@@ -259,9 +273,10 @@ export function sequenceAlignment(alignType: string, alignTable: string, gap: nu
|
|
|
259
273
|
}
|
|
260
274
|
|
|
261
275
|
//name: WebLogo
|
|
262
|
-
//description: WebLogo
|
|
276
|
+
//description: WebLogo
|
|
263
277
|
//tags: viewer, panel
|
|
264
278
|
//output: viewer result
|
|
279
|
+
//meta.icon: files/icons/weblogo-viewer.svg
|
|
265
280
|
export function webLogoViewer() {
|
|
266
281
|
return new WebLogoViewer();
|
|
267
282
|
}
|
|
@@ -269,32 +284,47 @@ export function webLogoViewer() {
|
|
|
269
284
|
//name: VdRegions
|
|
270
285
|
//description: V-Domain regions viewer
|
|
271
286
|
//tags: viewer, panel
|
|
287
|
+
//meta.icon: files/icons/vdregions-viewer.svg
|
|
272
288
|
//output: viewer result
|
|
273
289
|
export function vdRegionViewer() {
|
|
274
290
|
return new VdRegionsViewer();
|
|
275
291
|
}
|
|
276
292
|
|
|
293
|
+
//name: SeqActivityCliffsEditor
|
|
294
|
+
//tags: editor
|
|
295
|
+
//input: funccall call
|
|
296
|
+
export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
297
|
+
const funcEditor = new ActivityCliffsFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
|
|
298
|
+
ui.dialog({title: 'Activity Cliffs'})
|
|
299
|
+
.add(funcEditor.paramsUI)
|
|
300
|
+
.onOK(async () => {
|
|
301
|
+
call.func.prepare(funcEditor.funcParams).call(true);
|
|
302
|
+
})
|
|
303
|
+
.show();
|
|
304
|
+
}
|
|
305
|
+
|
|
277
306
|
//top-menu: Bio | Sequence Activity Cliffs...
|
|
278
307
|
//name: Sequence Activity Cliffs
|
|
279
308
|
//description: detect activity cliffs
|
|
280
309
|
//input: dataframe table [Input data table]
|
|
281
|
-
//input: column
|
|
310
|
+
//input: column molecules {semType: Macromolecule}
|
|
282
311
|
//input: column activities
|
|
283
312
|
//input: double similarity = 80 [Similarity cutoff]
|
|
284
|
-
//input: string methodName { choices:["UMAP", "t-SNE"
|
|
313
|
+
//input: string methodName { choices:["UMAP", "t-SNE"] }
|
|
314
|
+
//input: object options {optional: true}
|
|
315
|
+
//output: viewer result
|
|
316
|
+
//editor: Bio:SeqActivityCliffsEditor
|
|
285
317
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
286
|
-
similarity: number, methodName: string
|
|
318
|
+
similarity: number, methodName: string, options?: IUMAPOptions | ITSNEOptions
|
|
319
|
+
): Promise<DG.Viewer | undefined> {
|
|
287
320
|
if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
|
|
288
321
|
return;
|
|
289
322
|
const axesNames = getEmbeddingColsNames(df);
|
|
290
|
-
const options = {
|
|
291
|
-
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
292
|
-
};
|
|
293
323
|
const tags = {
|
|
294
324
|
'units': macroMolecule.getTag(DG.TAGS.UNITS),
|
|
295
|
-
'aligned': macroMolecule.getTag(
|
|
296
|
-
'separator': macroMolecule.getTag(
|
|
297
|
-
'alphabet': macroMolecule.getTag(
|
|
325
|
+
'aligned': macroMolecule.getTag(bioTAGS.aligned),
|
|
326
|
+
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
327
|
+
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
298
328
|
};
|
|
299
329
|
const sp = await getActivityCliffs(
|
|
300
330
|
df,
|
|
@@ -309,24 +339,40 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
309
339
|
DG.SEMTYPE.MACROMOLECULE,
|
|
310
340
|
tags,
|
|
311
341
|
sequenceSpaceByFingerprints,
|
|
312
|
-
|
|
342
|
+
getChemSimilaritiesMatrix,
|
|
313
343
|
createTooltipElement,
|
|
314
344
|
createPropPanelElement,
|
|
315
345
|
createLinesGrid,
|
|
316
|
-
|
|
346
|
+
options);
|
|
317
347
|
return sp;
|
|
318
348
|
}
|
|
319
349
|
|
|
350
|
+
//name: SequenceSpaceEditor
|
|
351
|
+
//tags: editor
|
|
352
|
+
//input: funccall call
|
|
353
|
+
export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
354
|
+
const funcEditor = new SequenceSpaceFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
|
|
355
|
+
ui.dialog({title: 'Sequence Space'})
|
|
356
|
+
.add(funcEditor.paramsUI)
|
|
357
|
+
.onOK(async () => {
|
|
358
|
+
call.func.prepare(funcEditor.funcParams).call(true);
|
|
359
|
+
})
|
|
360
|
+
.show();
|
|
361
|
+
}
|
|
362
|
+
|
|
320
363
|
//top-menu: Bio | Sequence Space...
|
|
321
364
|
//name: Sequence Space
|
|
322
365
|
//input: dataframe table
|
|
323
|
-
//input: column
|
|
324
|
-
//input: string methodName { choices:["UMAP", "t-SNE"
|
|
366
|
+
//input: column molecules { semType: Macromolecule }
|
|
367
|
+
//input: string methodName { choices:["UMAP", "t-SNE"] }
|
|
325
368
|
//input: string similarityMetric { choices:["Tanimoto", "Asymmetric", "Cosine", "Sokal"] }
|
|
326
369
|
//input: bool plotEmbeddings = true
|
|
370
|
+
//input: object options {optional: true}
|
|
371
|
+
//editor: Bio:SequenceSpaceEditor
|
|
327
372
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
328
|
-
similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean): Promise<DG.Viewer | undefined> {
|
|
329
|
-
//
|
|
373
|
+
similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean, options?: IUMAPOptions | ITSNEOptions): Promise<DG.Viewer | undefined> {
|
|
374
|
+
// Delay is required for initial function dialog to close before starting invalidating of molfiles.
|
|
375
|
+
// Otherwise, dialog is freezing
|
|
330
376
|
await delay(10);
|
|
331
377
|
if (!checkInputColumnUi(macroMolecule, 'Sequence space'))
|
|
332
378
|
return;
|
|
@@ -339,21 +385,21 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
339
385
|
seqCol: withoutEmptyValues.col(macroMolecule.name)!,
|
|
340
386
|
methodName: methodName,
|
|
341
387
|
similarityMetric: similarityMetric,
|
|
342
|
-
embedAxesNames: embedColsNames
|
|
388
|
+
embedAxesNames: embedColsNames,
|
|
389
|
+
options: options
|
|
343
390
|
};
|
|
344
391
|
const sequenceSpaceRes = await sequenceSpaceByFingerprints(chemSpaceParams);
|
|
345
392
|
const embeddings = sequenceSpaceRes.coordinates;
|
|
346
393
|
for (const col of embeddings) {
|
|
347
394
|
const listValues = col.toList();
|
|
348
395
|
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
349
|
-
table.columns.add(DG.Column.
|
|
396
|
+
table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
|
|
350
397
|
}
|
|
351
398
|
if (plotEmbeddings) {
|
|
352
399
|
return grok.shell
|
|
353
400
|
.tableView(table.name)
|
|
354
401
|
.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
355
402
|
}
|
|
356
|
-
;
|
|
357
403
|
|
|
358
404
|
/* const encodedCol = encodeMonomers(macroMolecule);
|
|
359
405
|
if (!encodedCol)
|
|
@@ -402,38 +448,76 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
402
448
|
}
|
|
403
449
|
|
|
404
450
|
//top-menu: Bio | MSA...
|
|
405
|
-
//name: MSA
|
|
406
|
-
//input: dataframe table
|
|
407
|
-
//input: column sequence { semType: Macromolecule, units: ['fasta'], alphabet: ['DNA', 'RNA', 'PT'] }
|
|
408
|
-
//output: column result
|
|
409
|
-
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, sequence: DG.Column): Promise<DG.Column | null> {
|
|
410
|
-
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
411
|
-
|
|
412
|
-
if (!checkInputColumnUi(sequence, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
|
|
413
|
-
return null;
|
|
414
|
-
|
|
415
|
-
const unUsedName = table.columns.getUnusedName(`msa(${sequence.name})`);
|
|
416
|
-
const msaCol = await runKalign(sequence, false, unUsedName);
|
|
417
|
-
table.columns.add(msaCol);
|
|
418
|
-
|
|
419
|
-
// This call is required to enable cell renderer activation
|
|
420
|
-
await grok.data.detectSemanticTypes(table);
|
|
421
|
-
|
|
422
|
-
// const tv: DG.TableView = grok.shell.tv;
|
|
423
|
-
// tv.grid.invalidate();
|
|
424
|
-
return msaCol;
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
//name: Bio | MSA
|
|
451
|
+
//name: MSA...
|
|
428
452
|
//tags: bio, panel
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
453
|
+
export function multipleSequenceAlignmentAny(col: DG.Column<string> | null = null): void {
|
|
454
|
+
const table = col?.dataFrame ?? grok.shell.t;
|
|
455
|
+
const seqCol = col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
456
|
+
if (seqCol == null) {
|
|
457
|
+
grok.shell.warning(`MSAError: dataset doesn't conain any Macromolecule column`);
|
|
458
|
+
return;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
let performAlignment: () => Promise<DG.Column<string> | null> = async () => null;
|
|
462
|
+
const methodInput = ui.choiceInput('Method', pepseaMethods[0], pepseaMethods);
|
|
463
|
+
methodInput.setTooltip('Alignment method');
|
|
464
|
+
const gapOpenInput = ui.floatInput('Gap open', 1.53);
|
|
465
|
+
gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
|
|
466
|
+
const gapExtendInput = ui.floatInput('Gap extend', 0);
|
|
467
|
+
gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
|
|
468
|
+
const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
|
|
469
|
+
|
|
470
|
+
const colInput = ui.columnInput('Sequence', table, seqCol, () => {
|
|
471
|
+
const potentialCol = colInput.value;
|
|
472
|
+
const unusedName = table.columns.getUnusedName(`msa(${potentialCol.name})`);
|
|
473
|
+
|
|
474
|
+
if (checkInputColumnUi(
|
|
475
|
+
potentialCol, potentialCol.name, [NOTATION.FASTA], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT], false)) {
|
|
476
|
+
for (const inputRootStyle of inputRootStyles)
|
|
477
|
+
inputRootStyle.display = 'none';
|
|
478
|
+
|
|
479
|
+
performAlignment = () => runKalign(potentialCol, false, unusedName, clustersColInput.value);
|
|
480
|
+
} else if (checkInputColumnUi(potentialCol, potentialCol.name, [NOTATION.HELM], [], false)) {
|
|
481
|
+
for (const inputRootStyle of inputRootStyles)
|
|
482
|
+
inputRootStyle.display = 'initial';
|
|
483
|
+
|
|
484
|
+
performAlignment = () => runPepsea(potentialCol, unusedName, methodInput.value!, gapOpenInput.value!,
|
|
485
|
+
gapExtendInput.value!, clustersColInput.value);
|
|
486
|
+
} else {
|
|
487
|
+
for (const inputRootStyle of inputRootStyles)
|
|
488
|
+
inputRootStyle.display = 'none';
|
|
489
|
+
|
|
490
|
+
performAlignment = async () => null;
|
|
491
|
+
}
|
|
492
|
+
}) as DG.InputBase<DG.Column<string>>;
|
|
493
|
+
colInput.setTooltip('Sequences column to use for alignment');
|
|
494
|
+
colInput.fireChanged();
|
|
495
|
+
|
|
496
|
+
const clustersColInput = ui.columnInput('Clusters', table, null);
|
|
497
|
+
clustersColInput.nullable = true;
|
|
498
|
+
|
|
499
|
+
let msaCol: DG.Column<string> | null = null;
|
|
500
|
+
ui.dialog('MSA')
|
|
501
|
+
.add(colInput)
|
|
502
|
+
.add(clustersColInput)
|
|
503
|
+
.add(methodInput)
|
|
504
|
+
.add(gapOpenInput)
|
|
505
|
+
.add(gapExtendInput)
|
|
506
|
+
.onOK(async () => {
|
|
507
|
+
colInput.fireChanged();
|
|
508
|
+
msaCol = await performAlignment();
|
|
509
|
+
if (msaCol == null)
|
|
510
|
+
return grok.shell.warning('Wrong column format');
|
|
511
|
+
|
|
512
|
+
table.columns.add(msaCol);
|
|
513
|
+
await grok.data.detectSemanticTypes(table);
|
|
514
|
+
})
|
|
515
|
+
.show();
|
|
433
516
|
}
|
|
434
517
|
|
|
435
518
|
//name: Composition Analysis
|
|
436
519
|
//top-menu: Bio | Composition Analysis
|
|
520
|
+
//meta.icon: files/icons/composition-analysis.svg
|
|
437
521
|
//output: viewer result
|
|
438
522
|
export async function compositionAnalysis(): Promise<void> {
|
|
439
523
|
// Higher priority for columns with MSA data to show with WebLogo.
|
|
@@ -490,8 +574,8 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
490
574
|
await handler(col);
|
|
491
575
|
}
|
|
492
576
|
|
|
493
|
-
//top-menu: Bio |
|
|
494
|
-
//name:
|
|
577
|
+
//top-menu: Bio | SDF to JSON lib...
|
|
578
|
+
//name: SDF to JSON Lib
|
|
495
579
|
//input: dataframe table
|
|
496
580
|
export async function sdfToJsonLib(table: DG.DataFrame) {
|
|
497
581
|
const jsonMonomerLibrary = createJsonMonomerLibFromSdf(table);
|
|
@@ -519,11 +603,10 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
519
603
|
return ffh.importFasta();
|
|
520
604
|
}
|
|
521
605
|
|
|
522
|
-
//
|
|
523
|
-
//
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
export function convertPanel(col: DG.Column): void {
|
|
606
|
+
//top-menu: Bio | Convert...
|
|
607
|
+
//name: convertDialog
|
|
608
|
+
export function convertDialog() {
|
|
609
|
+
const col = getMacromoleculeColumn();
|
|
527
610
|
convert(col);
|
|
528
611
|
}
|
|
529
612
|
|
|
@@ -586,17 +669,17 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
586
669
|
return resDf;
|
|
587
670
|
}
|
|
588
671
|
|
|
589
|
-
//
|
|
590
|
-
//
|
|
591
|
-
|
|
592
|
-
|
|
672
|
+
//top-menu: Bio | Split to monomers
|
|
673
|
+
//name: splitToMonomers
|
|
674
|
+
export function splitToMonomers(): void {
|
|
675
|
+
const col = getMacromoleculeColumn();
|
|
593
676
|
const tempDf = splitAlignedSequences(col);
|
|
594
677
|
const originalDf = col.dataFrame;
|
|
595
678
|
for (const tempCol of tempDf.columns) {
|
|
596
679
|
const newCol = originalDf.columns.add(tempCol);
|
|
597
680
|
newCol.semType = C.SEM_TYPES.MONOMER;
|
|
598
681
|
newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
|
|
599
|
-
newCol.setTag(
|
|
682
|
+
newCol.setTag(bioTAGS.alphabet, col.getTag(bioTAGS.alphabet));
|
|
600
683
|
}
|
|
601
684
|
grok.shell.tv.grid.invalidate();
|
|
602
685
|
}
|
|
@@ -609,44 +692,46 @@ export function getHelmMonomers(sequence: DG.Column<string>): string[] {
|
|
|
609
692
|
}
|
|
610
693
|
|
|
611
694
|
|
|
612
|
-
//name:
|
|
695
|
+
//name: Sequence Similarity Search
|
|
613
696
|
//tags: viewer
|
|
697
|
+
//meta.icon: files/icons/sequence-similarity-viewer.svg
|
|
614
698
|
//output: viewer result
|
|
615
699
|
export function similaritySearchViewer(): SequenceSimilarityViewer {
|
|
616
700
|
return new SequenceSimilarityViewer();
|
|
617
701
|
}
|
|
618
702
|
|
|
619
|
-
//top-menu: Bio | Similarity Search
|
|
703
|
+
//top-menu: Bio | Search | Similarity Search
|
|
620
704
|
//name: similaritySearch
|
|
621
705
|
//description: finds the most similar sequence
|
|
622
706
|
//output: viewer result
|
|
623
707
|
export function similaritySearchTopMenu(): void {
|
|
624
708
|
const view = (grok.shell.v as DG.TableView);
|
|
625
|
-
const viewer = view.addViewer('
|
|
709
|
+
const viewer = view.addViewer('Sequence Similarity Search');
|
|
626
710
|
view.dockManager.dock(viewer, 'down');
|
|
627
711
|
}
|
|
628
712
|
|
|
629
|
-
//name:
|
|
713
|
+
//name: Sequence Diversity Search
|
|
630
714
|
//tags: viewer
|
|
715
|
+
//meta.icon: files/icons/sequence-diversity-viewer.svg
|
|
631
716
|
//output: viewer result
|
|
632
717
|
export function diversitySearchViewer(): SequenceDiversityViewer {
|
|
633
718
|
return new SequenceDiversityViewer();
|
|
634
719
|
}
|
|
635
720
|
|
|
636
|
-
//top-menu: Bio | Diversity Search
|
|
721
|
+
//top-menu: Bio | Search | Diversity Search
|
|
637
722
|
//name: diversitySearch
|
|
638
723
|
//description: finds the most diverse molecules
|
|
639
724
|
//output: viewer result
|
|
640
725
|
export function diversitySearchTopMenu() {
|
|
641
726
|
const view = (grok.shell.v as DG.TableView);
|
|
642
|
-
const viewer = view.addViewer('
|
|
727
|
+
const viewer = view.addViewer('Sequence Diversity Search');
|
|
643
728
|
view.dockManager.dock(viewer, 'down');
|
|
644
729
|
}
|
|
645
730
|
|
|
646
|
-
//
|
|
647
|
-
//
|
|
648
|
-
|
|
649
|
-
|
|
731
|
+
//top-menu: Bio | Substructure Search ...
|
|
732
|
+
//name: bioSubstructureSearch
|
|
733
|
+
export function bioSubstructureSearch(): void {
|
|
734
|
+
const col = getMacromoleculeColumn();
|
|
650
735
|
substructureSearchDialog(col);
|
|
651
736
|
}
|
|
652
737
|
|