@datagrok/bio 2.4.31 → 2.4.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +6 -8
- package/README.md +22 -7
- package/css/msa.css +3 -0
- package/detectors.js +21 -12
- package/dist/1.js +2 -0
- package/dist/1.js.map +1 -0
- package/dist/18.js +2 -0
- package/dist/18.js.map +1 -0
- package/dist/190.js +2 -0
- package/dist/190.js.map +1 -0
- package/dist/452.js +2 -0
- package/dist/452.js.map +1 -0
- package/dist/729.js +2 -0
- package/dist/729.js.map +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/libraries/broken-lib.sdf +136 -0
- package/files/libraries/group1/mock-lib-3.json +74 -0
- package/files/libraries/mock-lib-2.json +48 -0
- package/files/tests/100_3_clustests.csv +100 -0
- package/files/tests/100_3_clustests_empty_vals.csv +100 -0
- package/files/tests/peptides_motif-with-random_10000.csv +9998 -0
- package/package.json +4 -4
- package/scripts/sequence_generator.py +164 -48
- package/src/analysis/sequence-activity-cliffs.ts +7 -9
- package/src/analysis/sequence-diversity-viewer.ts +8 -3
- package/src/analysis/sequence-search-base-viewer.ts +4 -3
- package/src/analysis/sequence-similarity-viewer.ts +13 -7
- package/src/analysis/sequence-space.ts +15 -12
- package/src/analysis/workers/mm-distance-array-service.ts +48 -0
- package/src/analysis/workers/mm-distance-array-worker.ts +29 -0
- package/src/analysis/workers/mm-distance-worker-creator.ts +6 -9
- package/src/apps/web-logo-app.ts +34 -0
- package/src/calculations/monomerLevelMols.ts +10 -12
- package/src/demo/bio01-similarity-diversity.ts +4 -5
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +6 -7
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +7 -8
- package/src/demo/bio03-atomic-level.ts +1 -4
- package/src/demo/bio05-helm-msa-sequence-space.ts +6 -4
- package/src/demo/utils.ts +3 -4
- package/src/package-test.ts +1 -2
- package/src/package.ts +135 -82
- package/src/seq_align.ts +482 -483
- package/src/substructure-search/substructure-search.ts +3 -3
- package/src/tests/Palettes-test.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +12 -35
- package/src/tests/_first-tests.ts +1 -1
- package/src/tests/activity-cliffs-tests.ts +10 -7
- package/src/tests/activity-cliffs-utils.ts +6 -5
- package/src/tests/bio-tests.ts +20 -25
- package/src/tests/checkInputColumn-tests.ts +5 -11
- package/src/tests/converters-test.ts +19 -37
- package/src/tests/detectors-benchmark-tests.ts +35 -37
- package/src/tests/detectors-tests.ts +29 -34
- package/src/tests/detectors-weak-and-likely-tests.ts +11 -21
- package/src/tests/fasta-export-tests.ts +3 -3
- package/src/tests/fasta-handler-test.ts +2 -3
- package/src/tests/lib-tests.ts +2 -4
- package/src/tests/mm-distance-tests.ts +25 -17
- package/src/tests/monomer-libraries-tests.ts +1 -1
- package/src/tests/msa-tests.ts +12 -9
- package/src/tests/pepsea-tests.ts +6 -3
- package/src/tests/renderers-test.ts +13 -11
- package/src/tests/sequence-space-test.ts +10 -8
- package/src/tests/sequence-space-utils.ts +6 -4
- package/src/tests/similarity-diversity-tests.ts +47 -61
- package/src/tests/splitters-test.ts +14 -20
- package/src/tests/to-atomic-level-tests.ts +9 -17
- package/src/tests/units-handler-splitted-tests.ts +106 -0
- package/src/tests/units-handler-tests.ts +22 -26
- package/src/tests/utils/sequences-generators.ts +6 -2
- package/src/tests/utils.ts +10 -4
- package/src/tests/viewers.ts +1 -1
- package/src/utils/atomic-works.ts +49 -57
- package/src/utils/cell-renderer.ts +25 -8
- package/src/utils/check-input-column.ts +19 -4
- package/src/utils/constants.ts +3 -3
- package/src/utils/convert.ts +56 -23
- package/src/utils/monomer-lib.ts +83 -64
- package/src/utils/multiple-sequence-alignment-ui.ts +35 -21
- package/src/utils/multiple-sequence-alignment.ts +2 -2
- package/src/utils/pepsea.ts +17 -7
- package/src/utils/save-as-fasta.ts +11 -4
- package/src/utils/ui-utils.ts +1 -1
- package/src/viewers/vd-regions-viewer.ts +21 -22
- package/src/viewers/web-logo-viewer.ts +189 -154
- package/src/widgets/bio-substructure-filter.ts +9 -6
- package/src/widgets/representations.ts +11 -12
- package/tsconfig.json +1 -1
- package/dist/258.js +0 -2
- package/dist/258.js.map +0 -1
- package/dist/457.js +0 -2
- package/dist/457.js.map +0 -1
- package/dist/562.js +0 -2
- package/dist/562.js.map +0 -1
- package/dist/925.js +0 -2
- package/dist/925.js.map +0 -1
- package/src/analysis/workers/mm-distance-worker.ts +0 -16
package/src/package.ts
CHANGED
|
@@ -3,23 +3,17 @@ import * as grok from 'datagrok-api/grok';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
5
|
import {
|
|
6
|
-
MacromoleculeDifferenceCellRenderer,
|
|
7
|
-
MacromoleculeSequenceCellRenderer,
|
|
8
|
-
MonomerCellRenderer
|
|
6
|
+
MacromoleculeDifferenceCellRenderer, MacromoleculeSequenceCellRenderer, MonomerCellRenderer,
|
|
9
7
|
} from './utils/cell-renderer';
|
|
10
8
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
9
|
import {SequenceAlignment} from './seq_align';
|
|
12
|
-
import {
|
|
10
|
+
import {ISequenceSpaceResult, getEmbeddingColsNames, getSequenceSpace} from './analysis/sequence-space';
|
|
13
11
|
import {ISequenceSpaceParams, getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
14
12
|
import {
|
|
15
|
-
createLinesGrid,
|
|
16
|
-
createPropPanelElement,
|
|
17
|
-
createTooltipElement,
|
|
18
|
-
getChemSimilaritiesMatrix,
|
|
13
|
+
createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
|
|
19
14
|
} from './analysis/sequence-activity-cliffs';
|
|
20
|
-
import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
|
|
21
15
|
import {convert} from './utils/convert';
|
|
22
|
-
import {getMacroMolColumnPropertyPanel
|
|
16
|
+
import {getMacroMolColumnPropertyPanel} from './widgets/representations';
|
|
23
17
|
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
24
18
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
25
19
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
@@ -32,16 +26,15 @@ import {substructureSearchDialog} from './substructure-search/substructure-searc
|
|
|
32
26
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
33
27
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
34
28
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
35
|
-
import {
|
|
29
|
+
import {
|
|
30
|
+
TAGS as bioTAGS, ALPHABET, NOTATION,
|
|
31
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
36
32
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
37
33
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
38
34
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
39
35
|
import {WebLogoViewer} from './viewers/web-logo-viewer';
|
|
40
36
|
import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
41
|
-
import {
|
|
42
|
-
LIB_PATH, MonomerLibHelper,
|
|
43
|
-
LIB_STORAGE_NAME, LibSettings, getUserLibSettings, setUserLibSetting, getLibFileNameList
|
|
44
|
-
} from './utils/monomer-lib';
|
|
37
|
+
import {MonomerLibHelper, getUserLibSettings, setUserLibSetting, getLibFileNameList} from './utils/monomer-lib';
|
|
45
38
|
import {getMacromoleculeColumn} from './utils/ui-utils';
|
|
46
39
|
import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
47
40
|
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
@@ -53,9 +46,10 @@ import {demoBio03UI} from './demo/bio03-atomic-level';
|
|
|
53
46
|
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
54
47
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
55
48
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
56
|
-
import {
|
|
57
|
-
import {
|
|
58
|
-
import {
|
|
49
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
50
|
+
import {BitArrayMetrics, BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
51
|
+
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
52
|
+
import {WebLogoApp} from './apps/web-logo-app';
|
|
59
53
|
|
|
60
54
|
export const _package = new DG.Package();
|
|
61
55
|
|
|
@@ -83,20 +77,15 @@ export class SeqPaletteCustom implements SeqPalette {
|
|
|
83
77
|
}
|
|
84
78
|
}
|
|
85
79
|
|
|
86
|
-
// let loadLibrariesPromise: Promise<void> = Promise.resolve();
|
|
87
|
-
|
|
88
80
|
//tags: init
|
|
89
81
|
export async function initBio() {
|
|
90
|
-
|
|
91
|
-
await MonomerLibHelper.instance.loadLibraries(); // from initBio()
|
|
92
|
-
// });
|
|
93
|
-
// await loadLibrariesPromise;
|
|
82
|
+
await MonomerLibHelper.instance.loadLibraries();
|
|
94
83
|
const monomerLib = MonomerLibHelper.instance.getBioLib();
|
|
95
84
|
const monomers: string[] = [];
|
|
96
85
|
const logPs: number[] = [];
|
|
97
86
|
const module = await grok.functions.call('Chem:getRdKitModule');
|
|
98
87
|
|
|
99
|
-
const series = monomerLib!.
|
|
88
|
+
const series = monomerLib!.getMonomerMolsByPolymerType('PEPTIDE')!;
|
|
100
89
|
Object.keys(series).forEach((symbol) => {
|
|
101
90
|
monomers.push(symbol);
|
|
102
91
|
const block = series[symbol].replaceAll('#R', 'O ');
|
|
@@ -127,7 +116,7 @@ export async function sequenceTooltip(col: DG.Column): Promise<DG.Widget<any>> {
|
|
|
127
116
|
backgroundColor: 0xFFfdffe5,
|
|
128
117
|
fitArea: false,
|
|
129
118
|
positionHeight: 'Entropy',
|
|
130
|
-
fixWidth: true
|
|
119
|
+
fixWidth: true,
|
|
131
120
|
});
|
|
132
121
|
viewer.root.style.height = '50px';
|
|
133
122
|
return viewer;
|
|
@@ -152,13 +141,11 @@ export async function manageFiles() {
|
|
|
152
141
|
//input: column seqColumn {semType: Macromolecule}
|
|
153
142
|
//tags: panel, exclude-actions-panel
|
|
154
143
|
//output: widget result
|
|
155
|
-
export async function libraryPanel(
|
|
144
|
+
export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
|
|
156
145
|
//@ts-ignore
|
|
157
146
|
const filesButton: HTMLButtonElement = ui.button('Manage', manageFiles);
|
|
158
|
-
const
|
|
147
|
+
const inputsForm: HTMLDivElement = ui.inputs([]);
|
|
159
148
|
const libFileNameList: string[] = await getLibFileNameList();
|
|
160
|
-
const librariesUserSettingsSet: Set<string> = new Set<string>(Object.keys(
|
|
161
|
-
await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true)));
|
|
162
149
|
|
|
163
150
|
let userStoragePromise: Promise<void> = Promise.resolve();
|
|
164
151
|
for (const libFileName of libFileNameList) {
|
|
@@ -178,13 +165,9 @@ export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
|
|
|
178
165
|
grok.shell.info('Monomer library user settings saved.');
|
|
179
166
|
});
|
|
180
167
|
});
|
|
181
|
-
|
|
168
|
+
inputsForm.append(libInput.root);
|
|
182
169
|
}
|
|
183
|
-
|
|
184
|
-
return new DG.Widget(ui.splitV([
|
|
185
|
-
divInputs,
|
|
186
|
-
ui.divV([filesButton])
|
|
187
|
-
]));
|
|
170
|
+
return new DG.Widget(ui.divV([inputsForm, ui.div(filesButton)]));
|
|
188
171
|
}
|
|
189
172
|
|
|
190
173
|
//name: fastaSequenceCellRenderer
|
|
@@ -226,7 +209,7 @@ export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCe
|
|
|
226
209
|
//name: sequenceAlignment
|
|
227
210
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
228
211
|
// eslint-disable-next-line max-len
|
|
229
|
-
//input: string alignTable {choices: ['AUTO', 'NUCLEOTIDES', 'BLOSUM45', 'BLOSUM50','BLOSUM62','BLOSUM80','BLOSUM90','PAM30','PAM70','PAM250','SCHNEIDER','TRANS']}
|
|
212
|
+
//input: string alignTable {choices: ['AUTO', 'NUCLEOTIDES', 'BLOSUM45', 'BLOSUM50', 'BLOSUM62','BLOSUM80','BLOSUM90','PAM30','PAM70','PAM250','SCHNEIDER','TRANS']}
|
|
230
213
|
//input: double gap
|
|
231
214
|
//input: string seq1
|
|
232
215
|
//input: string seq2
|
|
@@ -282,7 +265,7 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
|
282
265
|
//output: viewer result
|
|
283
266
|
//editor: Bio:SeqActivityCliffsEditor
|
|
284
267
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
285
|
-
similarity: number, methodName: DimReductionMethods, options?: IUMAPOptions | ITSNEOptions
|
|
268
|
+
similarity: number, methodName: DimReductionMethods, options?: IUMAPOptions | ITSNEOptions,
|
|
286
269
|
): Promise<DG.Viewer | undefined> {
|
|
287
270
|
if (!checkInputColumnUI(macroMolecule, 'Activity Cliffs'))
|
|
288
271
|
return;
|
|
@@ -294,37 +277,61 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
294
277
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
295
278
|
};
|
|
296
279
|
const nc = new NotationConverter(macroMolecule);
|
|
297
|
-
let columnDistanceMetric:
|
|
280
|
+
let columnDistanceMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto;
|
|
298
281
|
let seqCol = macroMolecule;
|
|
299
|
-
if (nc.isFasta() || (nc.isSeparator() && nc.alphabet && nc.alphabet !== ALPHABET.UN)){
|
|
300
|
-
if (nc.isFasta()){
|
|
282
|
+
if (nc.isFasta() || (nc.isSeparator() && nc.alphabet && nc.alphabet !== ALPHABET.UN)) {
|
|
283
|
+
if (nc.isFasta()) {
|
|
301
284
|
columnDistanceMetric = nc.getDistanceFunctionName();
|
|
302
285
|
} else {
|
|
303
286
|
seqCol = nc.convert(NOTATION.FASTA);
|
|
304
|
-
const uh =
|
|
287
|
+
const uh = UnitsHandler.getOrCreate(seqCol);
|
|
305
288
|
columnDistanceMetric = uh.getDistanceFunctionName();
|
|
306
289
|
tags.units = NOTATION.FASTA;
|
|
307
290
|
}
|
|
308
291
|
}
|
|
309
|
-
const
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
292
|
+
const runCliffs = async () => {
|
|
293
|
+
const sp = await getActivityCliffs(
|
|
294
|
+
df,
|
|
295
|
+
seqCol,
|
|
296
|
+
null,
|
|
297
|
+
axesNames,
|
|
298
|
+
'Activity cliffs', //scatterTitle
|
|
299
|
+
activities,
|
|
300
|
+
similarity,
|
|
301
|
+
columnDistanceMetric, //similarityMetric
|
|
302
|
+
methodName,
|
|
303
|
+
DG.SEMTYPE.MACROMOLECULE,
|
|
304
|
+
tags,
|
|
305
|
+
getSequenceSpace,
|
|
306
|
+
getChemSimilaritiesMatrix,
|
|
307
|
+
createTooltipElement,
|
|
308
|
+
createPropPanelElement,
|
|
309
|
+
createLinesGrid,
|
|
310
|
+
options);
|
|
311
|
+
return sp;
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
const allowedRowCount = 20000;
|
|
315
|
+
const fastRowCount = methodName === DimReductionMethods.UMAP ? 5000 : 2000;
|
|
316
|
+
if (df.rowCount > allowedRowCount) {
|
|
317
|
+
grok.shell.warning(`Too many rows, maximum for sequence activity cliffs is ${allowedRowCount}`);
|
|
318
|
+
return;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
if (df.rowCount > fastRowCount) {
|
|
322
|
+
ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
|
|
323
|
+
Do you want to continue?`))
|
|
324
|
+
.onOK(async () => {
|
|
325
|
+
const progressBar = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
|
|
326
|
+
const res = await runCliffs();
|
|
327
|
+
progressBar.close();
|
|
328
|
+
return res;
|
|
329
|
+
})
|
|
330
|
+
.show();
|
|
331
|
+
} else {
|
|
332
|
+
const res = await runCliffs();
|
|
333
|
+
return res;
|
|
334
|
+
}
|
|
328
335
|
}
|
|
329
336
|
|
|
330
337
|
//name: SequenceSpaceEditor
|
|
@@ -349,8 +356,10 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
|
349
356
|
//input: bool plotEmbeddings = true
|
|
350
357
|
//input: object options {optional: true}
|
|
351
358
|
//editor: Bio:SequenceSpaceEditor
|
|
352
|
-
export async function sequenceSpaceTopMenu(
|
|
353
|
-
|
|
359
|
+
export async function sequenceSpaceTopMenu(
|
|
360
|
+
table: DG.DataFrame, macroMolecule: DG.Column, methodName: DimReductionMethods,
|
|
361
|
+
similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto,
|
|
362
|
+
plotEmbeddings: boolean, options?: IUMAPOptions | ITSNEOptions,
|
|
354
363
|
): Promise<DG.Viewer | undefined> {
|
|
355
364
|
// Delay is required for initial function dialog to close before starting invalidating of molfiles.
|
|
356
365
|
// Otherwise, dialog is freezing
|
|
@@ -367,21 +376,47 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
367
376
|
methodName: methodName,
|
|
368
377
|
similarityMetric: similarityMetric,
|
|
369
378
|
embedAxesNames: embedColsNames,
|
|
370
|
-
options: options
|
|
379
|
+
options: options,
|
|
371
380
|
};
|
|
372
|
-
|
|
373
|
-
const
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
381
|
+
|
|
382
|
+
const allowedRowCount = methodName === DimReductionMethods.UMAP ? 100000 : 15000;
|
|
383
|
+
// number of rows which will be processed relatively fast
|
|
384
|
+
const fastRowCount = methodName === DimReductionMethods.UMAP ? 5000 : 2000;
|
|
385
|
+
if (table.rowCount > allowedRowCount) {
|
|
386
|
+
grok.shell.warning(`Too many rows, maximum for sequence space is ${allowedRowCount}`);
|
|
387
|
+
return;
|
|
378
388
|
}
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
389
|
+
|
|
390
|
+
if (table.rowCount > fastRowCount) {
|
|
391
|
+
ui.dialog().add(ui.divText(`Sequence space analysis might take several minutes.
|
|
392
|
+
Do you want to continue?`))
|
|
393
|
+
.onOK(async () => {
|
|
394
|
+
const progressBar = DG.TaskBarProgressIndicator.create(`Running Sequence space...`);
|
|
395
|
+
const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
|
|
396
|
+
progressBar.close();
|
|
397
|
+
return processResult(sequenceSpaceRes);
|
|
398
|
+
})
|
|
399
|
+
.show();
|
|
400
|
+
} else {
|
|
401
|
+
const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
|
|
402
|
+
return processResult(sequenceSpaceRes);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
function processResult(sequenceSpaceRes: ISequenceSpaceResult): DG.ScatterPlotViewer | undefined {
|
|
406
|
+
const embeddings = sequenceSpaceRes.coordinates;
|
|
407
|
+
for (const col of embeddings) {
|
|
408
|
+
const listValues = col.toList();
|
|
409
|
+
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
410
|
+
table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
|
|
411
|
+
}
|
|
412
|
+
if (plotEmbeddings) {
|
|
413
|
+
return grok.shell
|
|
414
|
+
.tableView(table.name)
|
|
415
|
+
.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
416
|
+
}
|
|
383
417
|
}
|
|
384
418
|
|
|
419
|
+
|
|
385
420
|
/* const encodedCol = encodeMonomers(macroMolecule);
|
|
386
421
|
if (!encodedCol)
|
|
387
422
|
return;
|
|
@@ -465,7 +500,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
465
500
|
if (col.semType != DG.SEMTYPE.MACROMOLECULE)
|
|
466
501
|
return false;
|
|
467
502
|
|
|
468
|
-
const
|
|
503
|
+
const _colUH = UnitsHandler.getOrCreate(col);
|
|
469
504
|
// TODO: prevent for cyclic, branched or multiple chains in Helm
|
|
470
505
|
return true;
|
|
471
506
|
});
|
|
@@ -484,12 +519,12 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
484
519
|
return;
|
|
485
520
|
} else if (colList.length > 1) {
|
|
486
521
|
const colListNames: string [] = colList.map((col) => col.name);
|
|
487
|
-
const selectedCol = colList.find((c) => { return
|
|
522
|
+
const selectedCol = colList.find((c) => { return UnitsHandler.getOrCreate(c).isMsa(); });
|
|
488
523
|
const colInput: DG.InputBase = ui.choiceInput(
|
|
489
524
|
'Column', selectedCol ? selectedCol.name : colListNames[0], colListNames);
|
|
490
525
|
ui.dialog({
|
|
491
526
|
title: 'Composition Analysis',
|
|
492
|
-
helpUrl: '/help/domains/bio/macromolecules.md#composition-analysis'
|
|
527
|
+
helpUrl: '/help/domains/bio/macromolecules.md#composition-analysis',
|
|
493
528
|
})
|
|
494
529
|
.add(ui.div([
|
|
495
530
|
colInput,
|
|
@@ -515,7 +550,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
515
550
|
//name: SDF to JSON Library
|
|
516
551
|
//input: dataframe table
|
|
517
552
|
export async function sdfToJsonLib(table: DG.DataFrame) {
|
|
518
|
-
const
|
|
553
|
+
const _jsonMonomerLibrary = createJsonMonomerLibFromSdf(table);
|
|
519
554
|
}
|
|
520
555
|
|
|
521
556
|
// 2023-05-17 Representations does not work at BioIT
|
|
@@ -584,7 +619,7 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
584
619
|
|
|
585
620
|
res.push({
|
|
586
621
|
file: fileInfo.path, result: 'detected', column: col.name,
|
|
587
|
-
message: `units: ${col.getTag(DG.TAGS.UNITS)}
|
|
622
|
+
message: `units: ${col.getTag(DG.TAGS.UNITS)}`,
|
|
588
623
|
});
|
|
589
624
|
}
|
|
590
625
|
}
|
|
@@ -616,7 +651,8 @@ export function splitToMonomers(): void {
|
|
|
616
651
|
for (const tempCol of tempDf.columns) {
|
|
617
652
|
const newCol = originalDf.columns.add(tempCol);
|
|
618
653
|
newCol.semType = C.SEM_TYPES.MONOMER;
|
|
619
|
-
|
|
654
|
+
// TODO: GROK-
|
|
655
|
+
//newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
|
|
620
656
|
newCol.setTag(bioTAGS.alphabet, col.getTag(bioTAGS.alphabet));
|
|
621
657
|
}
|
|
622
658
|
grok.shell.tv.grid.invalidate();
|
|
@@ -625,7 +661,8 @@ export function splitToMonomers(): void {
|
|
|
625
661
|
//name: Bio: getHelmMonomers
|
|
626
662
|
//input: column sequence {semType: Macromolecule}
|
|
627
663
|
export function getHelmMonomers(sequence: DG.Column<string>): string[] {
|
|
628
|
-
const
|
|
664
|
+
const uh = UnitsHandler.getOrCreate(sequence);
|
|
665
|
+
const stats = uh.stats;
|
|
629
666
|
return Object.keys(stats.freq);
|
|
630
667
|
}
|
|
631
668
|
|
|
@@ -680,7 +717,7 @@ export function saveAsFasta() {
|
|
|
680
717
|
saveAsFastaUI();
|
|
681
718
|
}
|
|
682
719
|
|
|
683
|
-
//name:
|
|
720
|
+
//name: Bio Substructure Filter
|
|
684
721
|
//description: Substructure filter for macromolecules
|
|
685
722
|
//tags: filter
|
|
686
723
|
//output: filter result
|
|
@@ -689,6 +726,22 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
|
689
726
|
return new BioSubstructureFilter();
|
|
690
727
|
}
|
|
691
728
|
|
|
729
|
+
// -- Test apps --
|
|
730
|
+
|
|
731
|
+
//name: webLogoLargeApp
|
|
732
|
+
export async function webLogoLargeApp(): Promise<void> {
|
|
733
|
+
const pi = DG.TaskBarProgressIndicator.create('WebLogo');
|
|
734
|
+
try {
|
|
735
|
+
const app = new WebLogoApp();
|
|
736
|
+
const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_10000.csv');
|
|
737
|
+
await grok.data.detectSemanticTypes(df);
|
|
738
|
+
await app.init(df, 'webLogoLargeApp');
|
|
739
|
+
} finally {
|
|
740
|
+
pi.close();
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
|
|
692
745
|
// -- Demo --
|
|
693
746
|
|
|
694
747
|
// demoBio01
|