@datagrok/bio 2.4.30 → 2.4.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +6 -8
- package/README.md +22 -7
- package/detectors.js +21 -12
- package/dist/1.js +2 -0
- package/dist/1.js.map +1 -0
- package/dist/18.js +2 -0
- package/dist/18.js.map +1 -0
- package/dist/190.js +2 -0
- package/dist/190.js.map +1 -0
- package/dist/452.js +2 -0
- package/dist/452.js.map +1 -0
- package/dist/729.js +2 -0
- package/dist/729.js.map +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/libraries/broken-lib.sdf +136 -0
- package/files/libraries/group1/mock-lib-3.json +74 -0
- package/files/libraries/mock-lib-2.json +48 -0
- package/files/tests/100_3_clustests.csv +100 -0
- package/files/tests/100_3_clustests_empty_vals.csv +100 -0
- package/files/tests/peptides_motif-with-random_10000.csv +9998 -0
- package/package.json +4 -4
- package/scripts/sequence_generator.py +185 -48
- package/src/analysis/sequence-activity-cliffs.ts +9 -11
- package/src/analysis/sequence-diversity-viewer.ts +8 -3
- package/src/analysis/sequence-search-base-viewer.ts +4 -3
- package/src/analysis/sequence-similarity-viewer.ts +13 -7
- package/src/analysis/sequence-space.ts +15 -12
- package/src/analysis/workers/mm-distance-array-service.ts +48 -0
- package/src/analysis/workers/mm-distance-array-worker.ts +29 -0
- package/src/analysis/workers/mm-distance-worker-creator.ts +6 -9
- package/src/apps/web-logo-app.ts +34 -0
- package/src/calculations/monomerLevelMols.ts +10 -12
- package/src/demo/bio01-similarity-diversity.ts +4 -5
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +6 -7
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +8 -8
- package/src/demo/bio03-atomic-level.ts +1 -4
- package/src/demo/bio05-helm-msa-sequence-space.ts +8 -5
- package/src/demo/utils.ts +4 -3
- package/src/package-test.ts +1 -2
- package/src/package.ts +138 -83
- package/src/seq_align.ts +482 -483
- package/src/substructure-search/substructure-search.ts +3 -3
- package/src/tests/Palettes-test.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +12 -35
- package/src/tests/_first-tests.ts +1 -1
- package/src/tests/activity-cliffs-tests.ts +10 -6
- package/src/tests/activity-cliffs-utils.ts +6 -4
- package/src/tests/bio-tests.ts +20 -25
- package/src/tests/checkInputColumn-tests.ts +5 -11
- package/src/tests/converters-test.ts +19 -37
- package/src/tests/detectors-benchmark-tests.ts +35 -37
- package/src/tests/detectors-tests.ts +29 -34
- package/src/tests/detectors-weak-and-likely-tests.ts +11 -21
- package/src/tests/fasta-export-tests.ts +3 -3
- package/src/tests/fasta-handler-test.ts +2 -3
- package/src/tests/lib-tests.ts +2 -4
- package/src/tests/mm-distance-tests.ts +25 -17
- package/src/tests/monomer-libraries-tests.ts +1 -1
- package/src/tests/msa-tests.ts +12 -9
- package/src/tests/pepsea-tests.ts +6 -3
- package/src/tests/renderers-test.ts +13 -11
- package/src/tests/sequence-space-test.ts +10 -7
- package/src/tests/sequence-space-utils.ts +7 -3
- package/src/tests/similarity-diversity-tests.ts +47 -61
- package/src/tests/splitters-test.ts +14 -20
- package/src/tests/to-atomic-level-tests.ts +9 -17
- package/src/tests/units-handler-splitted-tests.ts +106 -0
- package/src/tests/units-handler-tests.ts +22 -26
- package/src/tests/utils/sequences-generators.ts +6 -2
- package/src/tests/utils.ts +10 -4
- package/src/tests/viewers.ts +1 -1
- package/src/utils/atomic-works.ts +49 -57
- package/src/utils/cell-renderer.ts +25 -8
- package/src/utils/check-input-column.ts +19 -4
- package/src/utils/constants.ts +3 -3
- package/src/utils/convert.ts +56 -23
- package/src/utils/monomer-lib.ts +83 -64
- package/src/utils/multiple-sequence-alignment-ui.ts +24 -21
- package/src/utils/multiple-sequence-alignment.ts +2 -2
- package/src/utils/pepsea.ts +17 -7
- package/src/utils/save-as-fasta.ts +11 -4
- package/src/utils/ui-utils.ts +1 -1
- package/src/viewers/vd-regions-viewer.ts +21 -22
- package/src/viewers/web-logo-viewer.ts +189 -154
- package/src/widgets/bio-substructure-filter.ts +9 -6
- package/src/widgets/representations.ts +11 -12
- package/tsconfig.json +1 -1
- package/dist/258.js +0 -2
- package/dist/258.js.map +0 -1
- package/dist/562.js +0 -2
- package/dist/562.js.map +0 -1
- package/dist/705.js +0 -2
- package/dist/705.js.map +0 -1
- package/dist/925.js +0 -2
- package/dist/925.js.map +0 -1
- package/src/analysis/workers/mm-distance-worker.ts +0 -16
package/src/package.ts
CHANGED
|
@@ -3,23 +3,17 @@ import * as grok from 'datagrok-api/grok';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
5
|
import {
|
|
6
|
-
MacromoleculeDifferenceCellRenderer,
|
|
7
|
-
MacromoleculeSequenceCellRenderer,
|
|
8
|
-
MonomerCellRenderer
|
|
6
|
+
MacromoleculeDifferenceCellRenderer, MacromoleculeSequenceCellRenderer, MonomerCellRenderer,
|
|
9
7
|
} from './utils/cell-renderer';
|
|
10
8
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
9
|
import {SequenceAlignment} from './seq_align';
|
|
12
|
-
import {
|
|
13
|
-
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
10
|
+
import {ISequenceSpaceResult, getEmbeddingColsNames, getSequenceSpace} from './analysis/sequence-space';
|
|
11
|
+
import {ISequenceSpaceParams, getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
14
12
|
import {
|
|
15
|
-
createLinesGrid,
|
|
16
|
-
createPropPanelElement,
|
|
17
|
-
createTooltipElement,
|
|
18
|
-
getChemSimilaritiesMatrix,
|
|
13
|
+
createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
|
|
19
14
|
} from './analysis/sequence-activity-cliffs';
|
|
20
|
-
import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
|
|
21
15
|
import {convert} from './utils/convert';
|
|
22
|
-
import {getMacroMolColumnPropertyPanel
|
|
16
|
+
import {getMacroMolColumnPropertyPanel} from './widgets/representations';
|
|
23
17
|
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
24
18
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
25
19
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
@@ -32,18 +26,17 @@ import {substructureSearchDialog} from './substructure-search/substructure-searc
|
|
|
32
26
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
33
27
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
34
28
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
35
|
-
import {
|
|
29
|
+
import {
|
|
30
|
+
TAGS as bioTAGS, ALPHABET, NOTATION,
|
|
31
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
36
32
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
37
33
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
38
34
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
39
35
|
import {WebLogoViewer} from './viewers/web-logo-viewer';
|
|
40
36
|
import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
41
|
-
import {
|
|
42
|
-
LIB_PATH, MonomerLibHelper,
|
|
43
|
-
LIB_STORAGE_NAME, LibSettings, getUserLibSettings, setUserLibSetting, getLibFileNameList
|
|
44
|
-
} from './utils/monomer-lib';
|
|
37
|
+
import {MonomerLibHelper, getUserLibSettings, setUserLibSetting, getLibFileNameList} from './utils/monomer-lib';
|
|
45
38
|
import {getMacromoleculeColumn} from './utils/ui-utils';
|
|
46
|
-
import {ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
39
|
+
import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
47
40
|
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
48
41
|
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
49
42
|
import {demoBio01UI} from './demo/bio01-similarity-diversity';
|
|
@@ -53,7 +46,10 @@ import {demoBio03UI} from './demo/bio03-atomic-level';
|
|
|
53
46
|
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
54
47
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
55
48
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
56
|
-
import {
|
|
49
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
50
|
+
import {BitArrayMetrics, BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
51
|
+
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
52
|
+
import {WebLogoApp} from './apps/web-logo-app';
|
|
57
53
|
|
|
58
54
|
export const _package = new DG.Package();
|
|
59
55
|
|
|
@@ -81,20 +77,15 @@ export class SeqPaletteCustom implements SeqPalette {
|
|
|
81
77
|
}
|
|
82
78
|
}
|
|
83
79
|
|
|
84
|
-
// let loadLibrariesPromise: Promise<void> = Promise.resolve();
|
|
85
|
-
|
|
86
80
|
//tags: init
|
|
87
81
|
export async function initBio() {
|
|
88
|
-
|
|
89
|
-
await MonomerLibHelper.instance.loadLibraries(); // from initBio()
|
|
90
|
-
// });
|
|
91
|
-
// await loadLibrariesPromise;
|
|
82
|
+
await MonomerLibHelper.instance.loadLibraries();
|
|
92
83
|
const monomerLib = MonomerLibHelper.instance.getBioLib();
|
|
93
84
|
const monomers: string[] = [];
|
|
94
85
|
const logPs: number[] = [];
|
|
95
86
|
const module = await grok.functions.call('Chem:getRdKitModule');
|
|
96
87
|
|
|
97
|
-
const series = monomerLib!.
|
|
88
|
+
const series = monomerLib!.getMonomerMolsByPolymerType('PEPTIDE')!;
|
|
98
89
|
Object.keys(series).forEach((symbol) => {
|
|
99
90
|
monomers.push(symbol);
|
|
100
91
|
const block = series[symbol].replaceAll('#R', 'O ');
|
|
@@ -125,7 +116,7 @@ export async function sequenceTooltip(col: DG.Column): Promise<DG.Widget<any>> {
|
|
|
125
116
|
backgroundColor: 0xFFfdffe5,
|
|
126
117
|
fitArea: false,
|
|
127
118
|
positionHeight: 'Entropy',
|
|
128
|
-
fixWidth: true
|
|
119
|
+
fixWidth: true,
|
|
129
120
|
});
|
|
130
121
|
viewer.root.style.height = '50px';
|
|
131
122
|
return viewer;
|
|
@@ -150,13 +141,11 @@ export async function manageFiles() {
|
|
|
150
141
|
//input: column seqColumn {semType: Macromolecule}
|
|
151
142
|
//tags: panel, exclude-actions-panel
|
|
152
143
|
//output: widget result
|
|
153
|
-
export async function libraryPanel(
|
|
144
|
+
export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
|
|
154
145
|
//@ts-ignore
|
|
155
146
|
const filesButton: HTMLButtonElement = ui.button('Manage', manageFiles);
|
|
156
|
-
const
|
|
147
|
+
const inputsForm: HTMLDivElement = ui.inputs([]);
|
|
157
148
|
const libFileNameList: string[] = await getLibFileNameList();
|
|
158
|
-
const librariesUserSettingsSet: Set<string> = new Set<string>(Object.keys(
|
|
159
|
-
await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true)));
|
|
160
149
|
|
|
161
150
|
let userStoragePromise: Promise<void> = Promise.resolve();
|
|
162
151
|
for (const libFileName of libFileNameList) {
|
|
@@ -176,13 +165,9 @@ export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
|
|
|
176
165
|
grok.shell.info('Monomer library user settings saved.');
|
|
177
166
|
});
|
|
178
167
|
});
|
|
179
|
-
|
|
168
|
+
inputsForm.append(libInput.root);
|
|
180
169
|
}
|
|
181
|
-
|
|
182
|
-
return new DG.Widget(ui.splitV([
|
|
183
|
-
divInputs,
|
|
184
|
-
ui.divV([filesButton])
|
|
185
|
-
]));
|
|
170
|
+
return new DG.Widget(ui.divV([inputsForm, ui.div(filesButton)]));
|
|
186
171
|
}
|
|
187
172
|
|
|
188
173
|
//name: fastaSequenceCellRenderer
|
|
@@ -224,7 +209,7 @@ export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCe
|
|
|
224
209
|
//name: sequenceAlignment
|
|
225
210
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
226
211
|
// eslint-disable-next-line max-len
|
|
227
|
-
//input: string alignTable {choices: ['AUTO', 'NUCLEOTIDES', 'BLOSUM45', 'BLOSUM50','BLOSUM62','BLOSUM80','BLOSUM90','PAM30','PAM70','PAM250','SCHNEIDER','TRANS']}
|
|
212
|
+
//input: string alignTable {choices: ['AUTO', 'NUCLEOTIDES', 'BLOSUM45', 'BLOSUM50', 'BLOSUM62','BLOSUM80','BLOSUM90','PAM30','PAM70','PAM250','SCHNEIDER','TRANS']}
|
|
228
213
|
//input: double gap
|
|
229
214
|
//input: string seq1
|
|
230
215
|
//input: string seq2
|
|
@@ -280,7 +265,7 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
|
280
265
|
//output: viewer result
|
|
281
266
|
//editor: Bio:SeqActivityCliffsEditor
|
|
282
267
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
283
|
-
similarity: number, methodName:
|
|
268
|
+
similarity: number, methodName: DimReductionMethods, options?: IUMAPOptions | ITSNEOptions,
|
|
284
269
|
): Promise<DG.Viewer | undefined> {
|
|
285
270
|
if (!checkInputColumnUI(macroMolecule, 'Activity Cliffs'))
|
|
286
271
|
return;
|
|
@@ -292,37 +277,61 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
292
277
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
293
278
|
};
|
|
294
279
|
const nc = new NotationConverter(macroMolecule);
|
|
295
|
-
let columnDistanceMetric =
|
|
280
|
+
let columnDistanceMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto;
|
|
296
281
|
let seqCol = macroMolecule;
|
|
297
|
-
if (nc.isFasta() || (nc.isSeparator() && nc.alphabet && nc.alphabet !== ALPHABET.UN)){
|
|
298
|
-
if (nc.isFasta()){
|
|
282
|
+
if (nc.isFasta() || (nc.isSeparator() && nc.alphabet && nc.alphabet !== ALPHABET.UN)) {
|
|
283
|
+
if (nc.isFasta()) {
|
|
299
284
|
columnDistanceMetric = nc.getDistanceFunctionName();
|
|
300
285
|
} else {
|
|
301
286
|
seqCol = nc.convert(NOTATION.FASTA);
|
|
302
|
-
const uh =
|
|
287
|
+
const uh = UnitsHandler.getOrCreate(seqCol);
|
|
303
288
|
columnDistanceMetric = uh.getDistanceFunctionName();
|
|
304
289
|
tags.units = NOTATION.FASTA;
|
|
305
290
|
}
|
|
306
291
|
}
|
|
307
|
-
const
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
292
|
+
const runCliffs = async () => {
|
|
293
|
+
const sp = await getActivityCliffs(
|
|
294
|
+
df,
|
|
295
|
+
seqCol,
|
|
296
|
+
null,
|
|
297
|
+
axesNames,
|
|
298
|
+
'Activity cliffs', //scatterTitle
|
|
299
|
+
activities,
|
|
300
|
+
similarity,
|
|
301
|
+
columnDistanceMetric, //similarityMetric
|
|
302
|
+
methodName,
|
|
303
|
+
DG.SEMTYPE.MACROMOLECULE,
|
|
304
|
+
tags,
|
|
305
|
+
getSequenceSpace,
|
|
306
|
+
getChemSimilaritiesMatrix,
|
|
307
|
+
createTooltipElement,
|
|
308
|
+
createPropPanelElement,
|
|
309
|
+
createLinesGrid,
|
|
310
|
+
options);
|
|
311
|
+
return sp;
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
const allowedRowCount = 20000;
|
|
315
|
+
const fastRowCount = methodName === DimReductionMethods.UMAP ? 5000 : 2000;
|
|
316
|
+
if (df.rowCount > allowedRowCount) {
|
|
317
|
+
grok.shell.warning(`Too many rows, maximum for sequence activity cliffs is ${allowedRowCount}`);
|
|
318
|
+
return;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
if (df.rowCount > fastRowCount) {
|
|
322
|
+
ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
|
|
323
|
+
Do you want to continue?`))
|
|
324
|
+
.onOK(async () => {
|
|
325
|
+
const progressBar = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
|
|
326
|
+
const res = await runCliffs();
|
|
327
|
+
progressBar.close();
|
|
328
|
+
return res;
|
|
329
|
+
})
|
|
330
|
+
.show();
|
|
331
|
+
} else {
|
|
332
|
+
const res = await runCliffs();
|
|
333
|
+
return res;
|
|
334
|
+
}
|
|
326
335
|
}
|
|
327
336
|
|
|
328
337
|
//name: SequenceSpaceEditor
|
|
@@ -347,8 +356,10 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
|
347
356
|
//input: bool plotEmbeddings = true
|
|
348
357
|
//input: object options {optional: true}
|
|
349
358
|
//editor: Bio:SequenceSpaceEditor
|
|
350
|
-
export async function sequenceSpaceTopMenu(
|
|
351
|
-
|
|
359
|
+
export async function sequenceSpaceTopMenu(
|
|
360
|
+
table: DG.DataFrame, macroMolecule: DG.Column, methodName: DimReductionMethods,
|
|
361
|
+
similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto,
|
|
362
|
+
plotEmbeddings: boolean, options?: IUMAPOptions | ITSNEOptions,
|
|
352
363
|
): Promise<DG.Viewer | undefined> {
|
|
353
364
|
// Delay is required for initial function dialog to close before starting invalidating of molfiles.
|
|
354
365
|
// Otherwise, dialog is freezing
|
|
@@ -360,26 +371,52 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
360
371
|
const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
|
|
361
372
|
const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, macroMolecule);
|
|
362
373
|
|
|
363
|
-
const chemSpaceParams = {
|
|
374
|
+
const chemSpaceParams: ISequenceSpaceParams = {
|
|
364
375
|
seqCol: withoutEmptyValues.col(macroMolecule.name)!,
|
|
365
376
|
methodName: methodName,
|
|
366
377
|
similarityMetric: similarityMetric,
|
|
367
378
|
embedAxesNames: embedColsNames,
|
|
368
|
-
options: options
|
|
379
|
+
options: options,
|
|
369
380
|
};
|
|
370
|
-
|
|
371
|
-
const
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
381
|
+
|
|
382
|
+
const allowedRowCount = methodName === DimReductionMethods.UMAP ? 100000 : 15000;
|
|
383
|
+
// number of rows which will be processed relatively fast
|
|
384
|
+
const fastRowCount = methodName === DimReductionMethods.UMAP ? 5000 : 2000;
|
|
385
|
+
if (table.rowCount > allowedRowCount) {
|
|
386
|
+
grok.shell.warning(`Too many rows, maximum for sequence space is ${allowedRowCount}`);
|
|
387
|
+
return;
|
|
376
388
|
}
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
389
|
+
|
|
390
|
+
if (table.rowCount > fastRowCount) {
|
|
391
|
+
ui.dialog().add(ui.divText(`Sequence space analysis might take several minutes.
|
|
392
|
+
Do you want to continue?`))
|
|
393
|
+
.onOK(async () => {
|
|
394
|
+
const progressBar = DG.TaskBarProgressIndicator.create(`Running Sequence space...`);
|
|
395
|
+
const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
|
|
396
|
+
progressBar.close();
|
|
397
|
+
return processResult(sequenceSpaceRes);
|
|
398
|
+
})
|
|
399
|
+
.show();
|
|
400
|
+
} else {
|
|
401
|
+
const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
|
|
402
|
+
return processResult(sequenceSpaceRes);
|
|
381
403
|
}
|
|
382
404
|
|
|
405
|
+
function processResult(sequenceSpaceRes: ISequenceSpaceResult): DG.ScatterPlotViewer | undefined {
|
|
406
|
+
const embeddings = sequenceSpaceRes.coordinates;
|
|
407
|
+
for (const col of embeddings) {
|
|
408
|
+
const listValues = col.toList();
|
|
409
|
+
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
410
|
+
table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
|
|
411
|
+
}
|
|
412
|
+
if (plotEmbeddings) {
|
|
413
|
+
return grok.shell
|
|
414
|
+
.tableView(table.name)
|
|
415
|
+
.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
|
|
383
420
|
/* const encodedCol = encodeMonomers(macroMolecule);
|
|
384
421
|
if (!encodedCol)
|
|
385
422
|
return;
|
|
@@ -463,7 +500,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
463
500
|
if (col.semType != DG.SEMTYPE.MACROMOLECULE)
|
|
464
501
|
return false;
|
|
465
502
|
|
|
466
|
-
const
|
|
503
|
+
const _colUH = UnitsHandler.getOrCreate(col);
|
|
467
504
|
// TODO: prevent for cyclic, branched or multiple chains in Helm
|
|
468
505
|
return true;
|
|
469
506
|
});
|
|
@@ -482,12 +519,12 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
482
519
|
return;
|
|
483
520
|
} else if (colList.length > 1) {
|
|
484
521
|
const colListNames: string [] = colList.map((col) => col.name);
|
|
485
|
-
const selectedCol = colList.find((c) => { return
|
|
522
|
+
const selectedCol = colList.find((c) => { return UnitsHandler.getOrCreate(c).isMsa(); });
|
|
486
523
|
const colInput: DG.InputBase = ui.choiceInput(
|
|
487
524
|
'Column', selectedCol ? selectedCol.name : colListNames[0], colListNames);
|
|
488
525
|
ui.dialog({
|
|
489
526
|
title: 'Composition Analysis',
|
|
490
|
-
helpUrl: '/help/domains/bio/macromolecules.md#composition-analysis'
|
|
527
|
+
helpUrl: '/help/domains/bio/macromolecules.md#composition-analysis',
|
|
491
528
|
})
|
|
492
529
|
.add(ui.div([
|
|
493
530
|
colInput,
|
|
@@ -513,7 +550,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
513
550
|
//name: SDF to JSON Library
|
|
514
551
|
//input: dataframe table
|
|
515
552
|
export async function sdfToJsonLib(table: DG.DataFrame) {
|
|
516
|
-
const
|
|
553
|
+
const _jsonMonomerLibrary = createJsonMonomerLibFromSdf(table);
|
|
517
554
|
}
|
|
518
555
|
|
|
519
556
|
// 2023-05-17 Representations does not work at BioIT
|
|
@@ -582,7 +619,7 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
582
619
|
|
|
583
620
|
res.push({
|
|
584
621
|
file: fileInfo.path, result: 'detected', column: col.name,
|
|
585
|
-
message: `units: ${col.getTag(DG.TAGS.UNITS)}
|
|
622
|
+
message: `units: ${col.getTag(DG.TAGS.UNITS)}`,
|
|
586
623
|
});
|
|
587
624
|
}
|
|
588
625
|
}
|
|
@@ -614,7 +651,8 @@ export function splitToMonomers(): void {
|
|
|
614
651
|
for (const tempCol of tempDf.columns) {
|
|
615
652
|
const newCol = originalDf.columns.add(tempCol);
|
|
616
653
|
newCol.semType = C.SEM_TYPES.MONOMER;
|
|
617
|
-
|
|
654
|
+
// TODO: GROK-
|
|
655
|
+
//newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
|
|
618
656
|
newCol.setTag(bioTAGS.alphabet, col.getTag(bioTAGS.alphabet));
|
|
619
657
|
}
|
|
620
658
|
grok.shell.tv.grid.invalidate();
|
|
@@ -623,7 +661,8 @@ export function splitToMonomers(): void {
|
|
|
623
661
|
//name: Bio: getHelmMonomers
|
|
624
662
|
//input: column sequence {semType: Macromolecule}
|
|
625
663
|
export function getHelmMonomers(sequence: DG.Column<string>): string[] {
|
|
626
|
-
const
|
|
664
|
+
const uh = UnitsHandler.getOrCreate(sequence);
|
|
665
|
+
const stats = uh.stats;
|
|
627
666
|
return Object.keys(stats.freq);
|
|
628
667
|
}
|
|
629
668
|
|
|
@@ -678,7 +717,7 @@ export function saveAsFasta() {
|
|
|
678
717
|
saveAsFastaUI();
|
|
679
718
|
}
|
|
680
719
|
|
|
681
|
-
//name:
|
|
720
|
+
//name: Bio Substructure Filter
|
|
682
721
|
//description: Substructure filter for macromolecules
|
|
683
722
|
//tags: filter
|
|
684
723
|
//output: filter result
|
|
@@ -687,6 +726,22 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
|
687
726
|
return new BioSubstructureFilter();
|
|
688
727
|
}
|
|
689
728
|
|
|
729
|
+
// -- Test apps --
|
|
730
|
+
|
|
731
|
+
//name: webLogoLargeApp
|
|
732
|
+
export async function webLogoLargeApp(): Promise<void> {
|
|
733
|
+
const pi = DG.TaskBarProgressIndicator.create('WebLogo');
|
|
734
|
+
try {
|
|
735
|
+
const app = new WebLogoApp();
|
|
736
|
+
const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_10000.csv');
|
|
737
|
+
await grok.data.detectSemanticTypes(df);
|
|
738
|
+
await app.init(df, 'webLogoLargeApp');
|
|
739
|
+
} finally {
|
|
740
|
+
pi.close();
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
|
|
690
745
|
// -- Demo --
|
|
691
746
|
|
|
692
747
|
// demoBio01
|