@datagrok/bio 2.4.19 → 2.4.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +2 -8
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/data/sample_FASTA_PT_activity.csv +100 -0
- package/files/tests/to-atomic-level-dna-fasta-input.csv +11 -0
- package/files/tests/to-atomic-level-dna-output.csv +15299 -0
- package/files/tests/to-atomic-level-msa-output.csv +3594 -0
- package/files/tests/to-atomic-level-msa-separator-input.csv +12 -0
- package/files/tests/to-atomic-level-peptides-fasta-input.csv +65 -0
- package/files/tests/to-atomic-level-peptides-output.csv +34901 -0
- package/package.json +3 -3
- package/src/demo/bio01-similarity-diversity.ts +7 -3
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +10 -4
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +7 -8
- package/src/demo/bio05-helm-msa-sequence-space.ts +3 -3
- package/src/demo/utils.ts +0 -12
- package/src/package-test.ts +1 -0
- package/src/package.ts +23 -8
- package/src/tests/converters-test.ts +24 -24
- package/src/tests/to-atomic-level-tests.ts +187 -0
- package/src/utils/multiple-sequence-alignment-ui.ts +18 -5
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.4.
|
|
8
|
+
"version": "2.4.24",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,10 +14,10 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.
|
|
17
|
+
"@datagrok-libraries/bio": "^5.30.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
19
19
|
"@datagrok-libraries/ml": "^6.3.23",
|
|
20
|
-
"@datagrok-libraries/tutorials": "^1.3.
|
|
20
|
+
"@datagrok-libraries/tutorials": "^1.3.2",
|
|
21
21
|
"@datagrok-libraries/utils": "^2.1.3",
|
|
22
22
|
"cash-dom": "^8.0.0",
|
|
23
23
|
"css-loader": "^6.7.3",
|
|
@@ -9,7 +9,7 @@ import {handleError} from './utils';
|
|
|
9
9
|
import {SequenceDiversityViewer} from '../analysis/sequence-diversity-viewer';
|
|
10
10
|
import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
|
|
11
11
|
|
|
12
|
-
const dataFn: string = 'data/
|
|
12
|
+
const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
|
|
13
13
|
|
|
14
14
|
export async function demoBio01UI() {
|
|
15
15
|
let view: DG.TableView;
|
|
@@ -27,8 +27,12 @@ export async function demoBio01UI() {
|
|
|
27
27
|
df = await _package.files.readCsv(dataFn);
|
|
28
28
|
view = grok.shell.addTableView(df);
|
|
29
29
|
|
|
30
|
-
view.grid.columns.byName('
|
|
31
|
-
view.grid.columns.byName('
|
|
30
|
+
view.grid.columns.byName('cluster')!.visible = false;
|
|
31
|
+
view.grid.columns.byName('sequence_id')!.visible = false;
|
|
32
|
+
view.grid.columns.byName('sequence')!.width = 300;
|
|
33
|
+
view.grid.columns.byName('activity')!.visible = false;
|
|
34
|
+
view.grid.columns.byName('is_cliff')!.visible = false;
|
|
35
|
+
|
|
32
36
|
// TODO: Fix column width
|
|
33
37
|
}, {
|
|
34
38
|
description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
|
|
@@ -11,7 +11,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
|
|
|
11
11
|
import {demoSequenceSpace, handleError} from './utils';
|
|
12
12
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
13
13
|
|
|
14
|
-
const dataFn = 'data/
|
|
14
|
+
const dataFn = 'data/sample_FASTA_PT_activity.csv';
|
|
15
15
|
const seqColName = 'sequence';
|
|
16
16
|
|
|
17
17
|
export async function demoBio01aUI() {
|
|
@@ -21,7 +21,7 @@ export async function demoBio01aUI() {
|
|
|
21
21
|
let df: DG.DataFrame;
|
|
22
22
|
let spViewer: DG.ScatterPlotViewer;
|
|
23
23
|
|
|
24
|
-
const
|
|
24
|
+
const dimRedMethod: string = 'UMAP';
|
|
25
25
|
const idRows: { [id: number]: number } = {};
|
|
26
26
|
const embedCols: { [colName: string]: DG.Column<number> } = {};
|
|
27
27
|
|
|
@@ -38,6 +38,9 @@ export async function demoBio01aUI() {
|
|
|
38
38
|
]);
|
|
39
39
|
view = grok.shell.addTableView(df);
|
|
40
40
|
view.grid.props.rowHeight = 22;
|
|
41
|
+
view.grid.columns.byName('cluster')!.visible = false;
|
|
42
|
+
view.grid.columns.byName('sequence')!.width = 200;
|
|
43
|
+
view.grid.columns.byName('is_cliff')!.visible = false;
|
|
41
44
|
|
|
42
45
|
grok.shell.windows.showContextPanel = false;
|
|
43
46
|
grok.shell.windows.showProperties = false;
|
|
@@ -46,7 +49,7 @@ export async function demoBio01aUI() {
|
|
|
46
49
|
delay: 2000,
|
|
47
50
|
})
|
|
48
51
|
.step('Build sequence space', async () => {
|
|
49
|
-
spViewer = await demoSequenceSpace(view, df, seqColName,
|
|
52
|
+
spViewer = await demoSequenceSpace(view, df, seqColName, dimRedMethod);
|
|
50
53
|
}, {
|
|
51
54
|
description: `Reduce sequence space dimensionality to display on 2D representation.`,
|
|
52
55
|
delay: 2000
|
|
@@ -71,7 +74,10 @@ export async function demoBio01aUI() {
|
|
|
71
74
|
delay: 2000,
|
|
72
75
|
})
|
|
73
76
|
.step('Select a bunch of sequences', async () => {
|
|
74
|
-
|
|
77
|
+
const seqIdCol: DG.Column<string> = df.getCol('sequence_id');
|
|
78
|
+
df.selection.init((rowI: number) => {
|
|
79
|
+
return ['c0_seq120', 'c0_seq105', 'c0_seq121', 'c0_seq93'].includes(seqIdCol.get(rowI)!);
|
|
80
|
+
});
|
|
75
81
|
df.currentRowIdx = 27;
|
|
76
82
|
}, {
|
|
77
83
|
description: 'Selecting a group of rows from a data frame to show their similarity and proximity to each other on a viewer..',
|
|
@@ -13,7 +13,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
|
|
|
13
13
|
import {handleError} from './utils';
|
|
14
14
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
15
15
|
|
|
16
|
-
const dataFn: string = '
|
|
16
|
+
const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
|
|
17
17
|
|
|
18
18
|
export async function demoBio01bUI() {
|
|
19
19
|
let treeHelper: ITreeHelper;
|
|
@@ -23,7 +23,7 @@ export async function demoBio01bUI() {
|
|
|
23
23
|
let view: DG.TableView;
|
|
24
24
|
let activityCliffsViewer: DG.ScatterPlotViewer;
|
|
25
25
|
|
|
26
|
-
const
|
|
26
|
+
const dimRedMethod: string = 'UMAP';
|
|
27
27
|
const idRows: { [id: number]: number } = {};
|
|
28
28
|
|
|
29
29
|
try {
|
|
@@ -43,10 +43,9 @@ export async function demoBio01bUI() {
|
|
|
43
43
|
|
|
44
44
|
view = grok.shell.addTableView(df);
|
|
45
45
|
view.grid.props.rowHeight = 22;
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
lengthGCol.width = 0;
|
|
46
|
+
view.grid.columns.byName('cluster')!.visible = false;
|
|
47
|
+
view.grid.columns.byName('sequence')!.width = 300;
|
|
48
|
+
view.grid.columns.byName('is_cliff')!.visible = false;
|
|
50
49
|
}, {
|
|
51
50
|
description: 'Load dataset with macromolecules of \'fasta\' notation, \'DNA\' alphabet.',
|
|
52
51
|
delay: 2000,
|
|
@@ -54,7 +53,7 @@ export async function demoBio01bUI() {
|
|
|
54
53
|
.step('Find activity cliffs', async () => {
|
|
55
54
|
activityCliffsViewer = (await activityCliffs(
|
|
56
55
|
df, df.getCol('Sequence'), df.getCol('Activity'),
|
|
57
|
-
80,
|
|
56
|
+
80, dimRedMethod)) as DG.ScatterPlotViewer;
|
|
58
57
|
view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
|
|
59
58
|
|
|
60
59
|
// Show grid viewer with the cliffs
|
|
@@ -86,7 +85,7 @@ export async function demoBio01bUI() {
|
|
|
86
85
|
//cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
|
|
87
86
|
const cliffsDfGrid: DG.Grid = activityCliffsViewer.dataFrame.temp[acTEMPS.cliffsDfGrid];
|
|
88
87
|
//cliffsDfGrid.dataFrame.selection.init((i) => i == currentCliffIdx);
|
|
89
|
-
cliffsDfGrid.dataFrame.currentRowIdx = 0;
|
|
88
|
+
if (cliffsDfGrid.dataFrame.rowCount > 0) cliffsDfGrid.dataFrame.currentRowIdx = 0;
|
|
90
89
|
//cliffsDfGrid.dataFrame.selection.set(currentCliffIdx, true, true);
|
|
91
90
|
|
|
92
91
|
// /* workaround to select rows of the cliff */
|
|
@@ -22,6 +22,7 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
22
22
|
|
|
23
23
|
const helmColName: string = 'HELM';
|
|
24
24
|
const msaHelmColName: string = 'msa(HELM)';
|
|
25
|
+
const dimRedMethod: string = 'UMAP';
|
|
25
26
|
|
|
26
27
|
try {
|
|
27
28
|
const demoScript = new DemoScript(
|
|
@@ -37,7 +38,7 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
37
38
|
description: 'Load dataset with macromolecules of \'Helm\' notation.',
|
|
38
39
|
delay: 2000,
|
|
39
40
|
})
|
|
40
|
-
.step('Align
|
|
41
|
+
.step('Align peptides with non-natural aminoacids with PepSeA', async () => {
|
|
41
42
|
helmCol = df.getCol(helmColName);
|
|
42
43
|
const method: string = pepseaMethods[0];
|
|
43
44
|
const gapOpen: number = 1.53;
|
|
@@ -50,9 +51,8 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
50
51
|
delay: 2000,
|
|
51
52
|
})
|
|
52
53
|
.step('Build sequence space', async () => {
|
|
53
|
-
const method: string = 'UMAP';
|
|
54
54
|
ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
|
|
55
|
-
|
|
55
|
+
dimRedMethod, StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
|
|
56
56
|
view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
|
|
57
57
|
}, {
|
|
58
58
|
description: 'Reduce sequence space dimensionality to display on 2D representation.',
|
package/src/demo/utils.ts
CHANGED
|
@@ -52,18 +52,6 @@ export async function demoSequenceSpace(
|
|
|
52
52
|
embedCol.init((rowI) => { return embedColData[rowI]; });
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
-
const rowCount: number = df.rowCount;
|
|
56
|
-
const idCol: DG.Column = df.getCol('id');
|
|
57
|
-
for (let idRowI = 0; idRowI < rowCount; idRowI++) {
|
|
58
|
-
const id = idCol.get(idRowI);
|
|
59
|
-
//idRows[id] = idRowI;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
for (const embedColName of Object.values(EMBED_COL_NAMES)) {
|
|
63
|
-
const embedCol: DG.Column<number> = df.getCol(embedColName);
|
|
64
|
-
//embedCols[embedColName] = embedCol;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
55
|
const t3: number = Date.now();
|
|
68
56
|
_package.logger.debug('MLB: MlbVrSpaceBrowser.buildView(), postprocess reduceDimensionality ' +
|
|
69
57
|
`ET: ${((t3 - t2) / 1000)} s`);
|
package/src/package-test.ts
CHANGED
|
@@ -22,6 +22,7 @@ import './tests/substructure-filters-tests';
|
|
|
22
22
|
import './tests/pepsea-tests';
|
|
23
23
|
import './tests/viewers';
|
|
24
24
|
import './tests/units-handler-tests';
|
|
25
|
+
import './tests/to-atomic-level-tests';
|
|
25
26
|
import './tests/mm-distance-tests';
|
|
26
27
|
|
|
27
28
|
// Tests hanging github CI
|
package/src/package.ts
CHANGED
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
} from './utils/cell-renderer';
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {SequenceAlignment} from './seq_align';
|
|
12
|
-
import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
|
|
12
|
+
import {getEmbeddingColsNames, sequenceSpaceByFingerprints, getSequenceSpace} from './analysis/sequence-space';
|
|
13
13
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
14
14
|
import {
|
|
15
15
|
createLinesGrid,
|
|
@@ -290,19 +290,23 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
290
290
|
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
291
291
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
292
292
|
};
|
|
293
|
+
const uh = new UnitsHandler(macroMolecule);
|
|
294
|
+
let columnDistanceMetric = 'Tanimoto';
|
|
295
|
+
if (uh.isFasta())
|
|
296
|
+
columnDistanceMetric = uh.getDistanceFunctionName();
|
|
293
297
|
const sp = await getActivityCliffs(
|
|
294
298
|
df,
|
|
295
299
|
macroMolecule,
|
|
296
300
|
null,
|
|
297
301
|
axesNames,
|
|
298
|
-
'Activity cliffs',
|
|
302
|
+
'Activity cliffs', //scatterTitle
|
|
299
303
|
activities,
|
|
300
304
|
similarity,
|
|
301
|
-
|
|
305
|
+
columnDistanceMetric, //similarityMetric
|
|
302
306
|
methodName,
|
|
303
307
|
DG.SEMTYPE.MACROMOLECULE,
|
|
304
308
|
tags,
|
|
305
|
-
|
|
309
|
+
getSequenceSpace,
|
|
306
310
|
getChemSimilaritiesMatrix,
|
|
307
311
|
createTooltipElement,
|
|
308
312
|
createPropPanelElement,
|
|
@@ -353,7 +357,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
353
357
|
embedAxesNames: embedColsNames,
|
|
354
358
|
options: options
|
|
355
359
|
};
|
|
356
|
-
const sequenceSpaceRes = await
|
|
360
|
+
const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
|
|
357
361
|
const embeddings = sequenceSpaceRes.coordinates;
|
|
358
362
|
for (const col of embeddings) {
|
|
359
363
|
const listValues = col.toList();
|
|
@@ -407,9 +411,15 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
407
411
|
}
|
|
408
412
|
if (!checkInputColumnUI(macroMolecule, 'To Atomic Level'))
|
|
409
413
|
return;
|
|
410
|
-
const
|
|
411
|
-
const
|
|
412
|
-
|
|
414
|
+
const monomerLib: IMonomerLib = (await getMonomerLibHelper()).getBioLib();
|
|
415
|
+
const atomicLevelRes = await _toAtomicLevel(df, macroMolecule, monomerLib);
|
|
416
|
+
if (atomicLevelRes.col !== null) {
|
|
417
|
+
df.columns.add(atomicLevelRes.col, true);
|
|
418
|
+
await grok.data.detectSemanticTypes(df);
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
if (atomicLevelRes.warnings && atomicLevelRes.warnings.length > 0)
|
|
422
|
+
grok.shell.warning(ui.list(atomicLevelRes.warnings));
|
|
413
423
|
}
|
|
414
424
|
|
|
415
425
|
//top-menu: Bio | Alignment | MSA...
|
|
@@ -673,6 +683,7 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
|
673
683
|
//meta.demoPath: Bioinformatics | Similarity, Diversity
|
|
674
684
|
//description: Sequence similarity tracking and evaluation dataset diversity
|
|
675
685
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity
|
|
686
|
+
//meta.isDemoScript: True
|
|
676
687
|
export async function demoBioSimilarityDiversity(): Promise<void> {
|
|
677
688
|
await demoBio01UI();
|
|
678
689
|
}
|
|
@@ -682,6 +693,7 @@ export async function demoBioSimilarityDiversity(): Promise<void> {
|
|
|
682
693
|
//meta.demoPath: Bioinformatics | Sequence Space
|
|
683
694
|
//description: Exploring sequence space of Macromolecules, comparison with hierarchical clustering results
|
|
684
695
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Sequence%20Space
|
|
696
|
+
//meta.isDemoScript: True
|
|
685
697
|
export async function demoBioSequenceSpace(): Promise<void> {
|
|
686
698
|
await demoBio01aUI();
|
|
687
699
|
}
|
|
@@ -691,6 +703,7 @@ export async function demoBioSequenceSpace(): Promise<void> {
|
|
|
691
703
|
//meta.demoPath: Bioinformatics | Activity Cliffs
|
|
692
704
|
//description: Activity Cliffs analysis on Macromolecules data
|
|
693
705
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs
|
|
706
|
+
//meta.isDemoScript: True
|
|
694
707
|
export async function demoBioActivityCliffs(): Promise<void> {
|
|
695
708
|
await demoBio01bUI();
|
|
696
709
|
}
|
|
@@ -700,6 +713,7 @@ export async function demoBioActivityCliffs(): Promise<void> {
|
|
|
700
713
|
//meta.demoPath: Bioinformatics | Atomic Level
|
|
701
714
|
//description: Atomic level structure of Macromolecules
|
|
702
715
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Atomic%20Level
|
|
716
|
+
//meta.isDemoScript: True
|
|
703
717
|
export async function demoBioAtomicLevel(): Promise<void> {
|
|
704
718
|
await demoBio03UI();
|
|
705
719
|
}
|
|
@@ -709,6 +723,7 @@ export async function demoBioAtomicLevel(): Promise<void> {
|
|
|
709
723
|
//meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
|
|
710
724
|
//description: MSA and composition analysis on Helm data
|
|
711
725
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Helm,%20MSA,%20Sequence%20Space
|
|
726
|
+
//meta.isDemoScript: True
|
|
712
727
|
export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
|
|
713
728
|
await demoBio05UI();
|
|
714
729
|
}
|
|
@@ -53,9 +53,9 @@ Y-N-R-Q-W-Y-V
|
|
|
53
53
|
M-K-P-S-E-Y-V
|
|
54
54
|
`,
|
|
55
55
|
helmPt: `seq
|
|
56
|
-
PEPTIDE1{F.W.P.H.E.Y}
|
|
57
|
-
PEPTIDE1{Y.N.R.Q.W.Y.V}
|
|
58
|
-
PEPTIDE1{M.K.P.S.E.Y.V}
|
|
56
|
+
PEPTIDE1{F.W.P.H.E.Y}$$$$
|
|
57
|
+
PEPTIDE1{Y.N.R.Q.W.Y.V}$$$$
|
|
58
|
+
PEPTIDE1{M.K.P.S.E.Y.V}$$$$
|
|
59
59
|
`,
|
|
60
60
|
fastaDna: `seq
|
|
61
61
|
ACGTC
|
|
@@ -68,9 +68,9 @@ C/A/G/T/G/T
|
|
|
68
68
|
T/T/C/A/A/C
|
|
69
69
|
`,
|
|
70
70
|
helmDna: `seq
|
|
71
|
-
DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}
|
|
72
|
-
DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}
|
|
73
|
-
DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}
|
|
71
|
+
DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}$$$$
|
|
72
|
+
DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}$$$$
|
|
73
|
+
DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$$
|
|
74
74
|
`,
|
|
75
75
|
fastaRna: `seq
|
|
76
76
|
ACGUC
|
|
@@ -83,9 +83,9 @@ C*A*G*U*G*U
|
|
|
83
83
|
U*U*C*A*A*C
|
|
84
84
|
`,
|
|
85
85
|
helmRna: `seq
|
|
86
|
-
RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}
|
|
87
|
-
RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}
|
|
88
|
-
RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}
|
|
86
|
+
RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
|
|
87
|
+
RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
|
|
88
|
+
RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$$
|
|
89
89
|
`,
|
|
90
90
|
fastaGaps: `seq
|
|
91
91
|
FW-PH-EYY
|
|
@@ -98,9 +98,9 @@ F/Y/N/R/Q/W/Y/V/
|
|
|
98
98
|
F/K/P//Q//S/E/Y/V
|
|
99
99
|
`,
|
|
100
100
|
helmGaps: `seq
|
|
101
|
-
PEPTIDE1{F.W.*.P.H.*.E.Y.Y}
|
|
102
|
-
PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}
|
|
103
|
-
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}
|
|
101
|
+
PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$$
|
|
102
|
+
PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$$
|
|
103
|
+
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$$
|
|
104
104
|
`,
|
|
105
105
|
|
|
106
106
|
fastaUn: `seq
|
|
@@ -114,24 +114,24 @@ meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
|
114
114
|
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
115
115
|
`,
|
|
116
116
|
helmUn: `seq
|
|
117
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}
|
|
118
|
-
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}
|
|
119
|
-
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}
|
|
117
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$$
|
|
118
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
|
|
119
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
|
|
120
120
|
`,
|
|
121
121
|
helmLoneDeoxyribose: `seq
|
|
122
|
-
DNA1{D(A).D(C).D(G).D(T).D(C)}
|
|
123
|
-
DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}
|
|
124
|
-
DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}
|
|
122
|
+
DNA1{D(A).D(C).D(G).D(T).D(C)}$$$$
|
|
123
|
+
DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}$$$$
|
|
124
|
+
DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}$$$$
|
|
125
125
|
`,
|
|
126
126
|
helmLoneRibose: `seq
|
|
127
|
-
RNA1{R(A).R(C).R(G).R(U).R(C)}
|
|
128
|
-
RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}
|
|
129
|
-
RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}
|
|
127
|
+
RNA1{R(A).R(C).R(G).R(U).R(C)}$$$$
|
|
128
|
+
RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}$$$$
|
|
129
|
+
RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}$$$$
|
|
130
130
|
`,
|
|
131
131
|
helmLonePhosphorus: `seq
|
|
132
|
-
RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}
|
|
133
|
-
RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}
|
|
134
|
-
RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}
|
|
132
|
+
RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
|
|
133
|
+
RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
|
|
134
|
+
RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$
|
|
135
135
|
`,
|
|
136
136
|
};
|
|
137
137
|
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {before, after, category, test, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
7
|
+
|
|
8
|
+
import {getMonomerLibHelper, toAtomicLevel} from '../package';
|
|
9
|
+
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
10
|
+
import {IMonomerLib} from '@datagrok-libraries/bio/src/types/index';
|
|
11
|
+
import {IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
12
|
+
import {LIB_STORAGE_NAME} from '../utils/monomer-lib';
|
|
13
|
+
|
|
14
|
+
const appPath = 'System:AppData/Bio';
|
|
15
|
+
const fileSource = new DG.FileSource(appPath);
|
|
16
|
+
|
|
17
|
+
const testNames: { [k: string]: string } = {
|
|
18
|
+
PT: 'peptides fasta',
|
|
19
|
+
DNA: 'dna fasta',
|
|
20
|
+
MSA: 'msa separator',
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const inputPath: { [k: string]: string } = {
|
|
24
|
+
PT: 'tests/to-atomic-level-peptides-fasta-input.csv',
|
|
25
|
+
DNA: 'tests/to-atomic-level-dna-fasta-input.csv',
|
|
26
|
+
MSA: 'tests/to-atomic-level-msa-separator-input.csv',
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
const outputPath: { [k: string]: string } = {
|
|
30
|
+
PT: 'tests/to-atomic-level-peptides-output.csv',
|
|
31
|
+
DNA: 'tests/to-atomic-level-dna-output.csv',
|
|
32
|
+
MSA: 'tests/to-atomic-level-msa-output.csv',
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const inputColName = 'sequence';
|
|
36
|
+
const outputColName = 'molfile(sequence)';
|
|
37
|
+
|
|
38
|
+
category('toAtomicLevel', async () => {
|
|
39
|
+
const sourceDf: { [key: string]: DG.DataFrame } = {};
|
|
40
|
+
const targetDf: { [key: string]: DG.DataFrame } = {};
|
|
41
|
+
|
|
42
|
+
let monomerLibHelper: IMonomerLibHelper;
|
|
43
|
+
/** Backup actual user's monomer libraries settings */
|
|
44
|
+
let userLibrariesSettings: any = null;
|
|
45
|
+
|
|
46
|
+
before(async () => {
|
|
47
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
48
|
+
userLibrariesSettings = await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true);
|
|
49
|
+
// Clear settings to test default
|
|
50
|
+
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
|
|
51
|
+
await monomerLibHelper.loadLibraries(true);
|
|
52
|
+
|
|
53
|
+
for (const key in testNames) {
|
|
54
|
+
sourceDf[key] = await fileSource.readCsv(inputPath[key]);
|
|
55
|
+
await grok.data.detectSemanticTypes(sourceDf[key]);
|
|
56
|
+
targetDf[key] = await fileSource.readCsv(outputPath[key]);
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
after(async () => {
|
|
61
|
+
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, userLibrariesSettings, true);
|
|
62
|
+
await monomerLibHelper.loadLibraries(true);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
async function getTestResult(source: DG.DataFrame, target: DG.DataFrame): Promise<void> {
|
|
66
|
+
const inputCol = source.getCol(inputColName);
|
|
67
|
+
await toAtomicLevel(source, inputCol);
|
|
68
|
+
const obtainedCol = source.getCol(outputColName);
|
|
69
|
+
const expectedCol = target.getCol(outputColName);
|
|
70
|
+
const obtainedArray = [...obtainedCol.values()];
|
|
71
|
+
const expectedArray = [...expectedCol.values()];
|
|
72
|
+
expectArray(obtainedArray, expectedArray);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
for (const key in testNames) {
|
|
76
|
+
test(`${testNames[key]}`, async () => {
|
|
77
|
+
await getTestResult(sourceDf[key], targetDf[key]);
|
|
78
|
+
}, {skipReason: 'GROK-13100'});
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
enum csvTests {
|
|
82
|
+
fastaDna = 'fastaDna',
|
|
83
|
+
fastaRna = 'fastaRna',
|
|
84
|
+
fastaPt = 'fastaPt',
|
|
85
|
+
|
|
86
|
+
separatorDna = 'separatorDna',
|
|
87
|
+
separatorRna = 'separatorRna',
|
|
88
|
+
separatorPt = 'separatorPt',
|
|
89
|
+
separatorUn = 'separatorUn',
|
|
90
|
+
|
|
91
|
+
helm = 'helm',
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const csvData: { [key in csvTests]: string } = {
|
|
95
|
+
[csvTests.fastaDna]: `seq
|
|
96
|
+
ACGTC
|
|
97
|
+
CAGTGT
|
|
98
|
+
TTCAAC
|
|
99
|
+
`,
|
|
100
|
+
[csvTests.fastaRna]: `seq
|
|
101
|
+
ACGUC
|
|
102
|
+
CAGUGU
|
|
103
|
+
UUCAAC
|
|
104
|
+
`,
|
|
105
|
+
[csvTests.fastaPt]: `seq
|
|
106
|
+
FWPHEY
|
|
107
|
+
YNRQWYV
|
|
108
|
+
MKPSEYV
|
|
109
|
+
`,
|
|
110
|
+
[csvTests.separatorDna]: `seq
|
|
111
|
+
A/C/G/T/C
|
|
112
|
+
C/A/G/T/G/T
|
|
113
|
+
T/T/C/A/A/C
|
|
114
|
+
`,
|
|
115
|
+
[csvTests.separatorRna]: `seq
|
|
116
|
+
A*C*G*U*C
|
|
117
|
+
C*A*G*U*G*U
|
|
118
|
+
U*U*C*A*A*C
|
|
119
|
+
`,
|
|
120
|
+
[csvTests.separatorPt]: `seq
|
|
121
|
+
F-W-P-H-E-Y
|
|
122
|
+
Y-N-R-Q-W-Y-V
|
|
123
|
+
M-K-P-S-E-Y-V
|
|
124
|
+
`,
|
|
125
|
+
[csvTests.separatorUn]: `seq
|
|
126
|
+
meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
|
|
127
|
+
meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
128
|
+
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
129
|
+
`,
|
|
130
|
+
|
|
131
|
+
[csvTests.helm]: `seq
|
|
132
|
+
PEPTIDE1{meI.D-gGlu.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$
|
|
133
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
134
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
135
|
+
`,
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
/** Also detects semantic types
|
|
139
|
+
* @param {string} key
|
|
140
|
+
* @return {Promise<DG.DataFrame>}
|
|
141
|
+
*/
|
|
142
|
+
async function readCsv(key: csvTests): Promise<DG.DataFrame> {
|
|
143
|
+
// Always recreate test data frame from CSV for reproducible detector behavior in tests.
|
|
144
|
+
const csv: string = csvData[key];
|
|
145
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
146
|
+
await grok.data.detectSemanticTypes(df);
|
|
147
|
+
return df;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
test('fastaDna', async () => {
|
|
151
|
+
await _testToAtomicLevel(await readCsv(csvTests.fastaDna), 'seq', monomerLibHelper);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
test('fastaRna', async () => {
|
|
155
|
+
await _testToAtomicLevel(await readCsv(csvTests.fastaRna), 'seq', monomerLibHelper);
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
test('fastaPt', async () => {
|
|
159
|
+
await _testToAtomicLevel(await readCsv(csvTests.fastaPt), 'seq', monomerLibHelper);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
test('separatorDna', async () => {
|
|
163
|
+
await _testToAtomicLevel(await readCsv(csvTests.separatorDna), 'seq', monomerLibHelper);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
test('separatorDna', async () => {
|
|
167
|
+
await _testToAtomicLevel(await readCsv(csvTests.separatorRna), 'seq', monomerLibHelper);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test('separatorPt', async () => {
|
|
171
|
+
await _testToAtomicLevel(await readCsv(csvTests.separatorPt), 'seq', monomerLibHelper);
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
test('separatorUn', async () => {
|
|
175
|
+
await _testToAtomicLevel(await readCsv(csvTests.separatorUn), 'seq', monomerLibHelper);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
test('helm', async () => {
|
|
179
|
+
await _testToAtomicLevel(await readCsv(csvTests.helm), 'seq', monomerLibHelper);
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
async function _testToAtomicLevel(df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper) {
|
|
184
|
+
const seqCol: DG.Column<string> = df.getCol(seqColName);
|
|
185
|
+
const monomerLib: IMonomerLib = monomerLibHelper.getBioLib();
|
|
186
|
+
const resCol = await _toAtomicLevel(df, seqCol, monomerLib);
|
|
187
|
+
}
|
|
@@ -44,8 +44,8 @@ export async function multipleSequenceAlignmentUI(options: multipleSequenceAlgin
|
|
|
44
44
|
let performAlignment: (() => Promise<DG.Column<string>>) | undefined;
|
|
45
45
|
|
|
46
46
|
// TODO: allow only macromolecule colums to be chosen
|
|
47
|
-
const colInput = ui.columnInput('Sequence', table, seqCol, () => {
|
|
48
|
-
performAlignment = onColInputChange(
|
|
47
|
+
const colInput = ui.columnInput('Sequence', table, seqCol, async () => {
|
|
48
|
+
performAlignment = await onColInputChange(
|
|
49
49
|
colInput.value, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput);
|
|
50
50
|
}
|
|
51
51
|
) as DG.InputBase<DG.Column<string>>;
|
|
@@ -55,7 +55,7 @@ export async function multipleSequenceAlignmentUI(options: multipleSequenceAlgin
|
|
|
55
55
|
colInput.fireChanged();
|
|
56
56
|
//if column is specified (from tests), run alignment and resolve with the result
|
|
57
57
|
if (options.col) {
|
|
58
|
-
performAlignment = onColInputChange(
|
|
58
|
+
performAlignment = await onColInputChange(
|
|
59
59
|
options.col, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput);
|
|
60
60
|
|
|
61
61
|
await onDialogOk(colInput, table, performAlignment, resolve, reject);
|
|
@@ -105,7 +105,7 @@ async function onDialogOk(
|
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
|
|
108
|
-
function onColInputChange(
|
|
108
|
+
async function onColInputChange(
|
|
109
109
|
col: DG.Column<string>,
|
|
110
110
|
table: DG.DataFrame,
|
|
111
111
|
inputRootStyles: CSSStyleDeclaration[],
|
|
@@ -113,7 +113,7 @@ function onColInputChange(
|
|
|
113
113
|
clustersColInput: DG.InputBase<DG.Column<any> | null>,
|
|
114
114
|
gapOpenInput: DG.InputBase<number | null>,
|
|
115
115
|
gapExtendInput: DG.InputBase<number | null>
|
|
116
|
-
): (() => Promise<DG.Column<string>>) | undefined {
|
|
116
|
+
): Promise<(() => Promise<DG.Column<string>>) | undefined> {
|
|
117
117
|
try {
|
|
118
118
|
if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
119
119
|
return;
|
|
@@ -136,6 +136,19 @@ function onColInputChange(
|
|
|
136
136
|
|
|
137
137
|
return async () => await runPepsea(col, unusedName, methodInput.value!,
|
|
138
138
|
gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value);
|
|
139
|
+
} else if (checkInputColumnUI(col, col.name, [NOTATION.SEPARATOR], [ALPHABET.UN], false)) {
|
|
140
|
+
//if the column is separator with unknown alphabet, it might be helm. check if it can be converted to helm
|
|
141
|
+
const potentialColNC = new NotationConverter(col);
|
|
142
|
+
if (!await potentialColNC.checkHelmCompatibility())
|
|
143
|
+
return;
|
|
144
|
+
const helmCol = potentialColNC.convert(NOTATION.HELM);
|
|
145
|
+
for (const inputRootStyle of inputRootStyles)
|
|
146
|
+
inputRootStyle.removeProperty('display');
|
|
147
|
+
console.log(helmCol.toList());
|
|
148
|
+
// convert to helm and assign alignment function to PepSea
|
|
149
|
+
|
|
150
|
+
return async () => await runPepsea(helmCol, unusedName, methodInput.value!,
|
|
151
|
+
gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value);
|
|
139
152
|
} else {
|
|
140
153
|
for (const inputRootStyle of inputRootStyles)
|
|
141
154
|
inputRootStyle.display = 'none';
|