@datagrok/bio 2.1.12 → 2.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -1
- package/README.md +11 -12
- package/css/helm.css +10 -0
- package/detectors.js +97 -69
- package/dist/package-test.js +2 -13168
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -10560
- package/dist/package.js.map +1 -0
- package/dockerfiles/Dockerfile +86 -0
- package/files/icons/composition-analysis.svg +17 -0
- package/files/icons/sequence-diversity-viewer.svg +4 -0
- package/files/icons/sequence-similarity-viewer.svg +4 -0
- package/files/icons/vdregions-viewer.svg +22 -0
- package/files/icons/weblogo-viewer.svg +7 -0
- package/files/tests/testUrl.csv +11 -0
- package/files/tests/toAtomicLevelTest.csv +4 -0
- package/package.json +24 -25
- package/src/analysis/sequence-activity-cliffs.ts +11 -9
- package/src/analysis/sequence-search-base-viewer.ts +2 -1
- package/src/analysis/sequence-similarity-viewer.ts +3 -3
- package/src/analysis/sequence-space.ts +2 -1
- package/src/calculations/monomerLevelMols.ts +4 -4
- package/src/package-test.ts +10 -2
- package/src/package.ts +215 -131
- package/src/substructure-search/substructure-search.ts +19 -16
- package/src/tests/Palettes-test.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +113 -57
- package/src/tests/_first-tests.ts +9 -0
- package/src/tests/activity-cliffs-tests.ts +8 -7
- package/src/tests/activity-cliffs-utils.ts +17 -9
- package/src/tests/bio-tests.ts +4 -5
- package/src/tests/checkInputColumn-tests.ts +1 -1
- package/src/tests/converters-test.ts +52 -17
- package/src/tests/detectors-benchmark-tests.ts +3 -2
- package/src/tests/detectors-tests.ts +177 -172
- package/src/tests/detectors-weak-and-likely-tests.ts +129 -0
- package/src/tests/fasta-export-tests.ts +1 -1
- package/src/tests/monomer-libraries-tests.ts +34 -0
- package/src/tests/pepsea-tests.ts +21 -0
- package/src/tests/renderers-test.ts +21 -19
- package/src/tests/sequence-space-test.ts +6 -4
- package/src/tests/similarity-diversity-tests.ts +4 -4
- package/src/tests/splitters-test.ts +4 -5
- package/src/tests/substructure-filters-tests.ts +23 -1
- package/src/tests/utils/sequences-generators.ts +1 -1
- package/src/tests/utils.ts +2 -1
- package/src/tests/viewers.ts +16 -0
- package/src/utils/cell-renderer.ts +88 -35
- package/src/utils/constants.ts +7 -6
- package/src/utils/convert.ts +8 -2
- package/src/utils/monomer-lib.ts +174 -0
- package/src/utils/multiple-sequence-alignment.ts +44 -20
- package/src/utils/pepsea.ts +78 -0
- package/src/utils/save-as-fasta.ts +2 -1
- package/src/utils/ui-utils.ts +15 -3
- package/src/viewers/vd-regions-viewer.ts +113 -72
- package/src/viewers/web-logo-viewer.ts +1031 -0
- package/src/widgets/bio-substructure-filter.ts +38 -24
- package/tsconfig.json +71 -72
- package/webpack.config.js +4 -11
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -9039
package/src/package.ts
CHANGED
|
@@ -7,24 +7,14 @@ export const _package = new DG.Package();
|
|
|
7
7
|
|
|
8
8
|
import {MacromoleculeDifferenceCellRenderer, MonomerCellRenderer} from './utils/cell-renderer';
|
|
9
9
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
10
|
-
import {runKalign
|
|
11
|
-
import {SequenceAlignment
|
|
12
|
-
import {getEmbeddingColsNames,
|
|
10
|
+
import {runKalign} from './utils/multiple-sequence-alignment';
|
|
11
|
+
import {SequenceAlignment} from './seq_align';
|
|
12
|
+
import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
|
|
13
13
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
14
14
|
import {
|
|
15
|
-
createLinesGrid,
|
|
16
|
-
createPropPanelElement,
|
|
17
|
-
createTooltipElement,
|
|
18
|
-
getChemSimilaritiesMarix,
|
|
19
|
-
getSimilaritiesMarix
|
|
15
|
+
createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
|
|
20
16
|
} from './analysis/sequence-activity-cliffs';
|
|
21
|
-
import {
|
|
22
|
-
createJsonMonomerLibFromSdf,
|
|
23
|
-
encodeMonomers,
|
|
24
|
-
getMolfilesFromSeq
|
|
25
|
-
} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
26
17
|
import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
|
|
27
|
-
import {getMacroMol} from './utils/atomic-works';
|
|
28
18
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
29
19
|
import {convert} from './utils/convert';
|
|
30
20
|
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
@@ -36,23 +26,39 @@ import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter'
|
|
|
36
26
|
import * as C from './utils/constants';
|
|
37
27
|
import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
38
28
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
39
|
-
import {
|
|
29
|
+
import {substructureSearchDialog} from './substructure-search/substructure-search';
|
|
40
30
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
41
31
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
42
|
-
import {getMonomericMols} from './calculations/monomerLevelMols';
|
|
43
32
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
44
|
-
import {from, Observable, Subject} from 'rxjs';
|
|
45
33
|
import {
|
|
34
|
+
getStats,
|
|
35
|
+
NOTATION,
|
|
36
|
+
splitterAsHelm,
|
|
46
37
|
TAGS as bioTAGS,
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
} from '
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
38
|
+
ALPHABET
|
|
39
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
40
|
+
import {pepseaMethods, runPepsea} from './utils/pepsea';
|
|
41
|
+
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
42
|
+
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
43
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
44
|
+
import {WebLogoViewer} from './viewers/web-logo-viewer';
|
|
45
|
+
import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
46
|
+
import {LIB_PATH, LIB_STORAGE_NAME, MonomerLibHelper} from './utils/monomer-lib';
|
|
47
|
+
import {getMacromoleculeColumn} from './utils/ui-utils';
|
|
48
|
+
import {IUMAPOptions, ITSNEOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
49
|
+
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
50
|
+
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
51
|
+
|
|
52
|
+
// /** Avoid reassinging {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
|
|
53
|
+
// let monomerLib: MonomerLib | null = null;
|
|
54
|
+
|
|
55
|
+
//name: getMonomerLibHelper
|
|
56
|
+
//description:
|
|
57
|
+
//output: object result
|
|
58
|
+
export function getMonomerLibHelper(): IMonomerLibHelper {
|
|
59
|
+
return MonomerLibHelper.instance;
|
|
60
|
+
}
|
|
54
61
|
|
|
55
|
-
let monomerLib: IMonomerLib | null = null;
|
|
56
62
|
export let hydrophobPalette: SeqPaletteCustom | null = null;
|
|
57
63
|
|
|
58
64
|
export class SeqPaletteCustom implements SeqPalette {
|
|
@@ -67,14 +73,19 @@ export class SeqPaletteCustom implements SeqPalette {
|
|
|
67
73
|
}
|
|
68
74
|
}
|
|
69
75
|
|
|
76
|
+
// let loadLibrariesPromise: Promise<void> = Promise.resolve();
|
|
77
|
+
|
|
70
78
|
//tags: init
|
|
71
79
|
export async function initBio() {
|
|
72
|
-
|
|
80
|
+
// loadLibrariesPromise = loadLibrariesPromise.then(() => {
|
|
81
|
+
await MonomerLibHelper.instance.loadLibraries(); // from initBio()
|
|
82
|
+
// });
|
|
83
|
+
// await loadLibrariesPromise;
|
|
84
|
+
const monomerLib = MonomerLibHelper.instance.getBioLib();
|
|
73
85
|
const monomers: string[] = [];
|
|
74
86
|
const logPs: number[] = [];
|
|
75
87
|
const module = await grok.functions.call('Chem:getRdKitModule');
|
|
76
88
|
|
|
77
|
-
|
|
78
89
|
const series = monomerLib!.getMonomerMolsByType('PEPTIDE')!;
|
|
79
90
|
Object.keys(series).forEach((symbol) => {
|
|
80
91
|
monomers.push(symbol);
|
|
@@ -95,29 +106,27 @@ export async function initBio() {
|
|
|
95
106
|
hydrophobPalette = new SeqPaletteCustom(palette);
|
|
96
107
|
}
|
|
97
108
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
else
|
|
114
|
-
monomerLib!.update(await readLibrary(LIB_PATH, value));
|
|
109
|
+
//name: sequenceTooltip
|
|
110
|
+
//tags: tooltip
|
|
111
|
+
//input: column col {semType: Macromolecule}
|
|
112
|
+
//output: widget result
|
|
113
|
+
export async function sequenceTooltip(col: DG.Column): Promise<DG.Widget<any>> {
|
|
114
|
+
const tv = grok.shell.tv;
|
|
115
|
+
const viewer = await tv.dataFrame.plot.fromType('WebLogo', {
|
|
116
|
+
sequenceColumnName: col.name,
|
|
117
|
+
backgroundColor: 0xFFfdffe5,
|
|
118
|
+
fitArea: false,
|
|
119
|
+
positionHeight: 'Entropy',
|
|
120
|
+
fixWidth: true
|
|
121
|
+
});
|
|
122
|
+
viewer.root.style.height = '50px';
|
|
123
|
+
return viewer;
|
|
115
124
|
}
|
|
116
125
|
|
|
117
126
|
//name: getBioLib
|
|
118
127
|
//output: object monomerLib
|
|
119
|
-
export function getBioLib(): IMonomerLib
|
|
120
|
-
return
|
|
128
|
+
export function getBioLib(): IMonomerLib {
|
|
129
|
+
return MonomerLibHelper.instance.getBioLib();
|
|
121
130
|
}
|
|
122
131
|
|
|
123
132
|
//name: manageFiles
|
|
@@ -130,31 +139,38 @@ export async function manageFiles() {
|
|
|
130
139
|
}
|
|
131
140
|
|
|
132
141
|
//name: Manage Libraries
|
|
133
|
-
//tags: panel, widgets
|
|
134
142
|
//input: column seqColumn {semType: Macromolecule}
|
|
143
|
+
//tags: panel, exclude-actions-panel
|
|
135
144
|
//output: widget result
|
|
136
145
|
export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
|
|
137
146
|
//@ts-ignore
|
|
138
147
|
const filesButton: HTMLButtonElement = ui.button('Manage', manageFiles);
|
|
139
148
|
const divInputs: HTMLDivElement = ui.div();
|
|
140
|
-
const
|
|
149
|
+
const libFileNameList: string[] = (await grok.dapi.files.list(`${LIB_PATH}`, false, ''))
|
|
141
150
|
.map((it) => it.fileName);
|
|
142
|
-
const
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
151
|
+
const librariesUserSettingsSet: Set<string> = new Set<string>(Object.keys(
|
|
152
|
+
await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true)));
|
|
153
|
+
|
|
154
|
+
let userStoragePromise: Promise<void> = Promise.resolve();
|
|
155
|
+
for (const libFileName of libFileNameList) {
|
|
156
|
+
const libInput: DG.InputBase<boolean | null> = ui.boolInput(libFileName, librariesUserSettingsSet.has(libFileName),
|
|
157
|
+
() => {
|
|
158
|
+
userStoragePromise = userStoragePromise.then(async () => {
|
|
159
|
+
if (libInput.value == true) {
|
|
160
|
+
// Save checked library to user settings 'Libraries'
|
|
161
|
+
await grok.dapi.userDataStorage.postValue(LIB_STORAGE_NAME, libFileName, libFileName, true);
|
|
162
|
+
await MonomerLibHelper.instance.loadLibraries(); // from libraryPanel()
|
|
163
|
+
} else {
|
|
164
|
+
// Remove unchecked library from user settings 'Libraries'
|
|
165
|
+
await grok.dapi.userDataStorage.remove(LIB_STORAGE_NAME, libFileName, true);
|
|
166
|
+
await MonomerLibHelper.instance.loadLibraries(true); // from libraryPanel()
|
|
167
|
+
}
|
|
168
|
+
grok.shell.info('Monomer library user settings saved.');
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
divInputs.append(libInput.root);
|
|
157
172
|
}
|
|
173
|
+
|
|
158
174
|
return new DG.Widget(ui.splitV([
|
|
159
175
|
divInputs,
|
|
160
176
|
ui.divV([filesButton])
|
|
@@ -197,11 +213,10 @@ export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCe
|
|
|
197
213
|
}
|
|
198
214
|
|
|
199
215
|
|
|
200
|
-
function checkInputColumnUi(
|
|
201
|
-
|
|
202
|
-
): boolean {
|
|
216
|
+
function checkInputColumnUi(col: DG.Column, name: string, allowedNotations: string[] = [],
|
|
217
|
+
allowedAlphabets: string[] = [], notify: boolean = true): boolean {
|
|
203
218
|
const [res, msg]: [boolean, string] = checkInputColumn(col, name, allowedNotations, allowedAlphabets);
|
|
204
|
-
if (!res)
|
|
219
|
+
if (notify && !res)
|
|
205
220
|
grok.shell.warning(msg);
|
|
206
221
|
return res;
|
|
207
222
|
}
|
|
@@ -258,9 +273,10 @@ export function sequenceAlignment(alignType: string, alignTable: string, gap: nu
|
|
|
258
273
|
}
|
|
259
274
|
|
|
260
275
|
//name: WebLogo
|
|
261
|
-
//description: WebLogo
|
|
276
|
+
//description: WebLogo
|
|
262
277
|
//tags: viewer, panel
|
|
263
278
|
//output: viewer result
|
|
279
|
+
//meta.icon: files/icons/weblogo-viewer.svg
|
|
264
280
|
export function webLogoViewer() {
|
|
265
281
|
return new WebLogoViewer();
|
|
266
282
|
}
|
|
@@ -268,27 +284,42 @@ export function webLogoViewer() {
|
|
|
268
284
|
//name: VdRegions
|
|
269
285
|
//description: V-Domain regions viewer
|
|
270
286
|
//tags: viewer, panel
|
|
287
|
+
//meta.icon: files/icons/vdregions-viewer.svg
|
|
271
288
|
//output: viewer result
|
|
272
289
|
export function vdRegionViewer() {
|
|
273
290
|
return new VdRegionsViewer();
|
|
274
291
|
}
|
|
275
292
|
|
|
293
|
+
//name: SeqActivityCliffsEditor
|
|
294
|
+
//tags: editor
|
|
295
|
+
//input: funccall call
|
|
296
|
+
export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
297
|
+
const funcEditor = new ActivityCliffsFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
|
|
298
|
+
ui.dialog({title: 'Activity Cliffs'})
|
|
299
|
+
.add(funcEditor.paramsUI)
|
|
300
|
+
.onOK(async () => {
|
|
301
|
+
call.func.prepare(funcEditor.funcParams).call(true);
|
|
302
|
+
})
|
|
303
|
+
.show();
|
|
304
|
+
}
|
|
305
|
+
|
|
276
306
|
//top-menu: Bio | Sequence Activity Cliffs...
|
|
277
307
|
//name: Sequence Activity Cliffs
|
|
278
308
|
//description: detect activity cliffs
|
|
279
309
|
//input: dataframe table [Input data table]
|
|
280
|
-
//input: column
|
|
310
|
+
//input: column molecules {semType: Macromolecule}
|
|
281
311
|
//input: column activities
|
|
282
312
|
//input: double similarity = 80 [Similarity cutoff]
|
|
283
|
-
//input: string methodName { choices:["UMAP", "t-SNE"
|
|
313
|
+
//input: string methodName { choices:["UMAP", "t-SNE"] }
|
|
314
|
+
//input: object options {optional: true}
|
|
315
|
+
//output: viewer result
|
|
316
|
+
//editor: Bio:SeqActivityCliffsEditor
|
|
284
317
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
285
|
-
similarity: number, methodName: string
|
|
318
|
+
similarity: number, methodName: string, options?: IUMAPOptions | ITSNEOptions
|
|
319
|
+
): Promise<DG.Viewer | undefined> {
|
|
286
320
|
if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
|
|
287
321
|
return;
|
|
288
322
|
const axesNames = getEmbeddingColsNames(df);
|
|
289
|
-
const options = {
|
|
290
|
-
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
291
|
-
};
|
|
292
323
|
const tags = {
|
|
293
324
|
'units': macroMolecule.getTag(DG.TAGS.UNITS),
|
|
294
325
|
'aligned': macroMolecule.getTag(bioTAGS.aligned),
|
|
@@ -308,23 +339,38 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
308
339
|
DG.SEMTYPE.MACROMOLECULE,
|
|
309
340
|
tags,
|
|
310
341
|
sequenceSpaceByFingerprints,
|
|
311
|
-
|
|
342
|
+
getChemSimilaritiesMatrix,
|
|
312
343
|
createTooltipElement,
|
|
313
344
|
createPropPanelElement,
|
|
314
345
|
createLinesGrid,
|
|
315
|
-
|
|
346
|
+
options);
|
|
316
347
|
return sp;
|
|
317
348
|
}
|
|
318
349
|
|
|
350
|
+
//name: SequenceSpaceEditor
|
|
351
|
+
//tags: editor
|
|
352
|
+
//input: funccall call
|
|
353
|
+
export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
354
|
+
const funcEditor = new SequenceSpaceFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
|
|
355
|
+
ui.dialog({title: 'Sequence Space'})
|
|
356
|
+
.add(funcEditor.paramsUI)
|
|
357
|
+
.onOK(async () => {
|
|
358
|
+
call.func.prepare(funcEditor.funcParams).call(true);
|
|
359
|
+
})
|
|
360
|
+
.show();
|
|
361
|
+
}
|
|
362
|
+
|
|
319
363
|
//top-menu: Bio | Sequence Space...
|
|
320
364
|
//name: Sequence Space
|
|
321
365
|
//input: dataframe table
|
|
322
|
-
//input: column
|
|
323
|
-
//input: string methodName { choices:["UMAP", "t-SNE"
|
|
366
|
+
//input: column molecules { semType: Macromolecule }
|
|
367
|
+
//input: string methodName { choices:["UMAP", "t-SNE"] }
|
|
324
368
|
//input: string similarityMetric { choices:["Tanimoto", "Asymmetric", "Cosine", "Sokal"] }
|
|
325
369
|
//input: bool plotEmbeddings = true
|
|
370
|
+
//input: object options {optional: true}
|
|
371
|
+
//editor: Bio:SequenceSpaceEditor
|
|
326
372
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
327
|
-
similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean): Promise<DG.Viewer | undefined> {
|
|
373
|
+
similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean, options?: IUMAPOptions | ITSNEOptions): Promise<DG.Viewer | undefined> {
|
|
328
374
|
// Delay is required for initial function dialog to close before starting invalidating of molfiles.
|
|
329
375
|
// Otherwise, dialog is freezing
|
|
330
376
|
await delay(10);
|
|
@@ -339,7 +385,8 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
339
385
|
seqCol: withoutEmptyValues.col(macroMolecule.name)!,
|
|
340
386
|
methodName: methodName,
|
|
341
387
|
similarityMetric: similarityMetric,
|
|
342
|
-
embedAxesNames: embedColsNames
|
|
388
|
+
embedAxesNames: embedColsNames,
|
|
389
|
+
options: options
|
|
343
390
|
};
|
|
344
391
|
const sequenceSpaceRes = await sequenceSpaceByFingerprints(chemSpaceParams);
|
|
345
392
|
const embeddings = sequenceSpaceRes.coordinates;
|
|
@@ -401,40 +448,76 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
401
448
|
}
|
|
402
449
|
|
|
403
450
|
//top-menu: Bio | MSA...
|
|
404
|
-
//name: MSA
|
|
405
|
-
//input: dataframe table
|
|
406
|
-
//input: column sequence { semType: Macromolecule, units: ['fasta'], alphabet: ['DNA', 'RNA', 'PT'] }
|
|
407
|
-
//output: column result
|
|
408
|
-
export async function multipleSequenceAlignmentAny(
|
|
409
|
-
table: DG.DataFrame, sequence: DG.Column
|
|
410
|
-
): Promise<DG.Column | null> {
|
|
411
|
-
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
412
|
-
|
|
413
|
-
if (!checkInputColumnUi(sequence, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
|
|
414
|
-
return null;
|
|
415
|
-
|
|
416
|
-
const unUsedName = table.columns.getUnusedName(`msa(${sequence.name})`);
|
|
417
|
-
const msaCol = await runKalign(sequence, false, unUsedName);
|
|
418
|
-
table.columns.add(msaCol);
|
|
419
|
-
|
|
420
|
-
// This call is required to enable cell renderer activation
|
|
421
|
-
await grok.data.detectSemanticTypes(table);
|
|
422
|
-
|
|
423
|
-
// const tv: DG.TableView = grok.shell.tv;
|
|
424
|
-
// tv.grid.invalidate();
|
|
425
|
-
return msaCol;
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
//name: Bio | MSA
|
|
451
|
+
//name: MSA...
|
|
429
452
|
//tags: bio, panel
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
453
|
+
export function multipleSequenceAlignmentAny(col: DG.Column<string> | null = null): void {
|
|
454
|
+
const table = col?.dataFrame ?? grok.shell.t;
|
|
455
|
+
const seqCol = col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
456
|
+
if (seqCol == null) {
|
|
457
|
+
grok.shell.warning(`MSAError: dataset doesn't conain any Macromolecule column`);
|
|
458
|
+
return;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
let performAlignment: () => Promise<DG.Column<string> | null> = async () => null;
|
|
462
|
+
const methodInput = ui.choiceInput('Method', pepseaMethods[0], pepseaMethods);
|
|
463
|
+
methodInput.setTooltip('Alignment method');
|
|
464
|
+
const gapOpenInput = ui.floatInput('Gap open', 1.53);
|
|
465
|
+
gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
|
|
466
|
+
const gapExtendInput = ui.floatInput('Gap extend', 0);
|
|
467
|
+
gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
|
|
468
|
+
const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
|
|
469
|
+
|
|
470
|
+
const colInput = ui.columnInput('Sequence', table, seqCol, () => {
|
|
471
|
+
const potentialCol = colInput.value;
|
|
472
|
+
const unusedName = table.columns.getUnusedName(`msa(${potentialCol.name})`);
|
|
473
|
+
|
|
474
|
+
if (checkInputColumnUi(
|
|
475
|
+
potentialCol, potentialCol.name, [NOTATION.FASTA], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT], false)) {
|
|
476
|
+
for (const inputRootStyle of inputRootStyles)
|
|
477
|
+
inputRootStyle.display = 'none';
|
|
478
|
+
|
|
479
|
+
performAlignment = () => runKalign(potentialCol, false, unusedName, clustersColInput.value);
|
|
480
|
+
} else if (checkInputColumnUi(potentialCol, potentialCol.name, [NOTATION.HELM], [], false)) {
|
|
481
|
+
for (const inputRootStyle of inputRootStyles)
|
|
482
|
+
inputRootStyle.display = 'initial';
|
|
483
|
+
|
|
484
|
+
performAlignment = () => runPepsea(potentialCol, unusedName, methodInput.value!, gapOpenInput.value!,
|
|
485
|
+
gapExtendInput.value!, clustersColInput.value);
|
|
486
|
+
} else {
|
|
487
|
+
for (const inputRootStyle of inputRootStyles)
|
|
488
|
+
inputRootStyle.display = 'none';
|
|
489
|
+
|
|
490
|
+
performAlignment = async () => null;
|
|
491
|
+
}
|
|
492
|
+
}) as DG.InputBase<DG.Column<string>>;
|
|
493
|
+
colInput.setTooltip('Sequences column to use for alignment');
|
|
494
|
+
colInput.fireChanged();
|
|
495
|
+
|
|
496
|
+
const clustersColInput = ui.columnInput('Clusters', table, null);
|
|
497
|
+
clustersColInput.nullable = true;
|
|
498
|
+
|
|
499
|
+
let msaCol: DG.Column<string> | null = null;
|
|
500
|
+
ui.dialog('MSA')
|
|
501
|
+
.add(colInput)
|
|
502
|
+
.add(clustersColInput)
|
|
503
|
+
.add(methodInput)
|
|
504
|
+
.add(gapOpenInput)
|
|
505
|
+
.add(gapExtendInput)
|
|
506
|
+
.onOK(async () => {
|
|
507
|
+
colInput.fireChanged();
|
|
508
|
+
msaCol = await performAlignment();
|
|
509
|
+
if (msaCol == null)
|
|
510
|
+
return grok.shell.warning('Wrong column format');
|
|
511
|
+
|
|
512
|
+
table.columns.add(msaCol);
|
|
513
|
+
await grok.data.detectSemanticTypes(table);
|
|
514
|
+
})
|
|
515
|
+
.show();
|
|
434
516
|
}
|
|
435
517
|
|
|
436
518
|
//name: Composition Analysis
|
|
437
519
|
//top-menu: Bio | Composition Analysis
|
|
520
|
+
//meta.icon: files/icons/composition-analysis.svg
|
|
438
521
|
//output: viewer result
|
|
439
522
|
export async function compositionAnalysis(): Promise<void> {
|
|
440
523
|
// Higher priority for columns with MSA data to show with WebLogo.
|
|
@@ -491,8 +574,8 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
491
574
|
await handler(col);
|
|
492
575
|
}
|
|
493
576
|
|
|
494
|
-
//top-menu: Bio |
|
|
495
|
-
//name:
|
|
577
|
+
//top-menu: Bio | SDF to JSON lib...
|
|
578
|
+
//name: SDF to JSON Lib
|
|
496
579
|
//input: dataframe table
|
|
497
580
|
export async function sdfToJsonLib(table: DG.DataFrame) {
|
|
498
581
|
const jsonMonomerLibrary = createJsonMonomerLibFromSdf(table);
|
|
@@ -520,11 +603,10 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
520
603
|
return ffh.importFasta();
|
|
521
604
|
}
|
|
522
605
|
|
|
523
|
-
//
|
|
524
|
-
//
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
export function convertPanel(col: DG.Column): void {
|
|
606
|
+
//top-menu: Bio | Convert...
|
|
607
|
+
//name: convertDialog
|
|
608
|
+
export function convertDialog() {
|
|
609
|
+
const col = getMacromoleculeColumn();
|
|
528
610
|
convert(col);
|
|
529
611
|
}
|
|
530
612
|
|
|
@@ -587,17 +669,17 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
587
669
|
return resDf;
|
|
588
670
|
}
|
|
589
671
|
|
|
590
|
-
//
|
|
591
|
-
//
|
|
592
|
-
|
|
593
|
-
|
|
672
|
+
//top-menu: Bio | Split to monomers
|
|
673
|
+
//name: splitToMonomers
|
|
674
|
+
export function splitToMonomers(): void {
|
|
675
|
+
const col = getMacromoleculeColumn();
|
|
594
676
|
const tempDf = splitAlignedSequences(col);
|
|
595
677
|
const originalDf = col.dataFrame;
|
|
596
678
|
for (const tempCol of tempDf.columns) {
|
|
597
679
|
const newCol = originalDf.columns.add(tempCol);
|
|
598
680
|
newCol.semType = C.SEM_TYPES.MONOMER;
|
|
599
681
|
newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
|
|
600
|
-
newCol.setTag(
|
|
682
|
+
newCol.setTag(bioTAGS.alphabet, col.getTag(bioTAGS.alphabet));
|
|
601
683
|
}
|
|
602
684
|
grok.shell.tv.grid.invalidate();
|
|
603
685
|
}
|
|
@@ -610,44 +692,46 @@ export function getHelmMonomers(sequence: DG.Column<string>): string[] {
|
|
|
610
692
|
}
|
|
611
693
|
|
|
612
694
|
|
|
613
|
-
//name:
|
|
695
|
+
//name: Sequence Similarity Search
|
|
614
696
|
//tags: viewer
|
|
697
|
+
//meta.icon: files/icons/sequence-similarity-viewer.svg
|
|
615
698
|
//output: viewer result
|
|
616
699
|
export function similaritySearchViewer(): SequenceSimilarityViewer {
|
|
617
700
|
return new SequenceSimilarityViewer();
|
|
618
701
|
}
|
|
619
702
|
|
|
620
|
-
//top-menu: Bio | Similarity Search
|
|
703
|
+
//top-menu: Bio | Search | Similarity Search
|
|
621
704
|
//name: similaritySearch
|
|
622
705
|
//description: finds the most similar sequence
|
|
623
706
|
//output: viewer result
|
|
624
707
|
export function similaritySearchTopMenu(): void {
|
|
625
708
|
const view = (grok.shell.v as DG.TableView);
|
|
626
|
-
const viewer = view.addViewer('
|
|
709
|
+
const viewer = view.addViewer('Sequence Similarity Search');
|
|
627
710
|
view.dockManager.dock(viewer, 'down');
|
|
628
711
|
}
|
|
629
712
|
|
|
630
|
-
//name:
|
|
713
|
+
//name: Sequence Diversity Search
|
|
631
714
|
//tags: viewer
|
|
715
|
+
//meta.icon: files/icons/sequence-diversity-viewer.svg
|
|
632
716
|
//output: viewer result
|
|
633
717
|
export function diversitySearchViewer(): SequenceDiversityViewer {
|
|
634
718
|
return new SequenceDiversityViewer();
|
|
635
719
|
}
|
|
636
720
|
|
|
637
|
-
//top-menu: Bio | Diversity Search
|
|
721
|
+
//top-menu: Bio | Search | Diversity Search
|
|
638
722
|
//name: diversitySearch
|
|
639
723
|
//description: finds the most diverse molecules
|
|
640
724
|
//output: viewer result
|
|
641
725
|
export function diversitySearchTopMenu() {
|
|
642
726
|
const view = (grok.shell.v as DG.TableView);
|
|
643
|
-
const viewer = view.addViewer('
|
|
727
|
+
const viewer = view.addViewer('Sequence Diversity Search');
|
|
644
728
|
view.dockManager.dock(viewer, 'down');
|
|
645
729
|
}
|
|
646
730
|
|
|
647
|
-
//
|
|
648
|
-
//
|
|
649
|
-
|
|
650
|
-
|
|
731
|
+
//top-menu: Bio | Substructure Search ...
|
|
732
|
+
//name: bioSubstructureSearch
|
|
733
|
+
export function bioSubstructureSearch(): void {
|
|
734
|
+
const col = getMacromoleculeColumn();
|
|
651
735
|
substructureSearchDialog(col);
|
|
652
736
|
}
|
|
653
737
|
|
|
@@ -2,11 +2,10 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import * as C from '../utils/constants';
|
|
6
5
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
7
6
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
8
7
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
9
|
-
import {NOTATION} from '@datagrok-libraries/bio';
|
|
8
|
+
import {TAGS as bioTAGS, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
9
|
|
|
11
10
|
export const MONOMER_MOLS_COL = 'monomeric-mols';
|
|
12
11
|
|
|
@@ -16,14 +15,16 @@ export const enum MONOMERIC_COL_TAGS {
|
|
|
16
15
|
MONOMERS_DICT = 'monomers-dict'
|
|
17
16
|
}
|
|
18
17
|
|
|
18
|
+
const SUBSTR_HELM_COL_NAME = 'substr_helm';
|
|
19
|
+
|
|
19
20
|
/**
|
|
20
21
|
* Searches substructure in each row of Macromolecule column
|
|
21
22
|
*
|
|
22
23
|
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
23
24
|
*/
|
|
24
|
-
export function substructureSearchDialog(col: DG.Column): void {
|
|
25
|
+
export function substructureSearchDialog(col: DG.Column<string>): void {
|
|
25
26
|
const units = col.getTag(DG.TAGS.UNITS);
|
|
26
|
-
const separator = col.getTag(
|
|
27
|
+
const separator = col.getTag(bioTAGS.separator);
|
|
27
28
|
// const notations = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];
|
|
28
29
|
|
|
29
30
|
const substructureInput = ui.textInput('Substructure', '');
|
|
@@ -32,14 +33,15 @@ export function substructureSearchDialog(col: DG.Column): void {
|
|
|
32
33
|
updateDivInnerHTML(inputsDiv, grid.root);
|
|
33
34
|
await ui.tools.waitForElementInDom(grid.root);
|
|
34
35
|
setTimeout(() => {
|
|
35
|
-
grid.cell(
|
|
36
|
+
grid.cell(SUBSTR_HELM_COL_NAME, 0).element.children[0].dispatchEvent(
|
|
37
|
+
new KeyboardEvent('keydown', {key: 'Enter'}));
|
|
36
38
|
}, 100);
|
|
37
39
|
});
|
|
38
40
|
|
|
39
41
|
const df = DG.DataFrame.create(1);
|
|
40
|
-
df.columns.addNewString(
|
|
41
|
-
df.col(
|
|
42
|
-
df.col(
|
|
42
|
+
df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((i) => '');
|
|
43
|
+
df.col(SUBSTR_HELM_COL_NAME)!.semType = col.semType;
|
|
44
|
+
df.col(SUBSTR_HELM_COL_NAME)!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
43
45
|
const grid = df.plot.grid();
|
|
44
46
|
const separatorInput = ui.textInput('Separator', separator);
|
|
45
47
|
|
|
@@ -51,13 +53,13 @@ export function substructureSearchDialog(col: DG.Column): void {
|
|
|
51
53
|
|
|
52
54
|
updateDivInnerHTML(inputsDiv, inputs);
|
|
53
55
|
|
|
54
|
-
ui.dialog('Substructure
|
|
56
|
+
ui.dialog('Substructure Search')
|
|
55
57
|
.add(ui.divV([
|
|
56
58
|
ui.divText(`Notation: ${units}`),
|
|
57
59
|
inputsDiv
|
|
58
60
|
]))
|
|
59
61
|
.onOK(async () => {
|
|
60
|
-
let substructure = units === NOTATION.HELM ? df.get(
|
|
62
|
+
let substructure = units === NOTATION.HELM ? df.get(SUBSTR_HELM_COL_NAME, 0) : substructureInput.value;
|
|
61
63
|
if (units === NOTATION.SEPARATOR && separatorInput.value !== separator && separatorInput.value !== '')
|
|
62
64
|
substructure = substructure.replaceAll(separatorInput.value, separator);
|
|
63
65
|
const matchesColName = `Matches: ${substructure}`;
|
|
@@ -75,11 +77,11 @@ export function substructureSearchDialog(col: DG.Column): void {
|
|
|
75
77
|
.show();
|
|
76
78
|
}
|
|
77
79
|
|
|
78
|
-
export function linearSubstructureSearch(substructure: string, col: DG.Column
|
|
80
|
+
export function linearSubstructureSearch(substructure: string, col: DG.Column<string>, separator?: string): DG.BitSet {
|
|
79
81
|
const re = separator ? prepareSubstructureRegex(substructure, separator) : substructure;
|
|
80
82
|
const resultArray = DG.BitSet.create(col.length);
|
|
81
83
|
for (let i = 0; i < col.length; i++) {
|
|
82
|
-
const macromolecule = col.get(i)
|
|
84
|
+
const macromolecule: string = col.get(i)!;
|
|
83
85
|
if (macromolecule.match(re) || macromolecule === substructure)
|
|
84
86
|
resultArray.set(i, true, false);
|
|
85
87
|
}
|
|
@@ -91,17 +93,18 @@ function prepareSubstructureRegex(substructure: string, separator: string) {
|
|
|
91
93
|
const startsWithSep = substructure.charAt(0) === separator;
|
|
92
94
|
const endsWithSep = substructure.charAt(substructure.length - 1) === separator;
|
|
93
95
|
const substrWithoutSep = substructure.replace(new RegExp(`^${char}|${char}$`, 'g'), '');
|
|
94
|
-
const re = startsWithSep ?
|
|
96
|
+
const re = startsWithSep ?
|
|
97
|
+
endsWithSep ? `${char}${substrWithoutSep}${char}` :
|
|
95
98
|
`${char}${substrWithoutSep}${char}|${char}${substrWithoutSep}$` :
|
|
96
99
|
endsWithSep ? `^${substrWithoutSep}${char}|${char}${substrWithoutSep}${char}` :
|
|
97
100
|
`^${substrWithoutSep}${char}|${char}${substrWithoutSep}${char}|${char}${substrWithoutSep}$`;
|
|
98
101
|
return re;
|
|
99
102
|
}
|
|
100
103
|
|
|
101
|
-
export async function helmSubstructureSearch(substructure: string, col: DG.Column): Promise<DG.BitSet> {
|
|
104
|
+
export async function helmSubstructureSearch(substructure: string, col: DG.Column<string>): Promise<DG.BitSet> {
|
|
102
105
|
if (col.version !== col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
103
106
|
await invalidateMols(col, true);
|
|
104
|
-
const substructureCol = DG.Column.string('helm', 1).init((i) => substructure);
|
|
107
|
+
const substructureCol: DG.Column<string> = DG.Column.string('helm', 1).init((i) => substructure);
|
|
105
108
|
substructureCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
106
109
|
const substructureMolsCol =
|
|
107
110
|
await getMonomericMols(substructureCol, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
|
|
@@ -113,7 +116,7 @@ export async function helmSubstructureSearch(substructure: string, col: DG.Colum
|
|
|
113
116
|
return matchesCol.get(0);
|
|
114
117
|
}
|
|
115
118
|
|
|
116
|
-
export async function invalidateMols(col: DG.Column
|
|
119
|
+
export async function invalidateMols(col: DG.Column<string>, pattern: boolean) {
|
|
117
120
|
const progressBar = DG.TaskBarProgressIndicator.create(`Invalidating molfiles for ${col.name}`);
|
|
118
121
|
await delay(10);
|
|
119
122
|
const monomersDict = new Map();
|
|
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
|
|
7
7
|
import {_testPaletteN, _testPaletteAA} from '@datagrok-libraries/bio/src/tests/palettes-tests';
|
|
8
|
-
import {
|
|
8
|
+
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
9
9
|
|
|
10
10
|
category('Palettes', () => {
|
|
11
11
|
test('testPaletteN', async () => { await _testPaletteN(); });
|