@datagrok/bio 2.4.19 → 2.4.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +2 -8
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/data/sample_FASTA_PT_activity.csv +100 -0
- package/files/tests/to-atomic-level-dna-fasta-input.csv +11 -0
- package/files/tests/to-atomic-level-dna-output.csv +15299 -0
- package/files/tests/to-atomic-level-msa-output.csv +3594 -0
- package/files/tests/to-atomic-level-msa-separator-input.csv +12 -0
- package/files/tests/to-atomic-level-peptides-fasta-input.csv +65 -0
- package/files/tests/to-atomic-level-peptides-output.csv +34901 -0
- package/package.json +3 -3
- package/src/demo/bio01-similarity-diversity.ts +7 -3
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +10 -4
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +7 -8
- package/src/demo/bio05-helm-msa-sequence-space.ts +3 -3
- package/src/demo/utils.ts +0 -12
- package/src/package-test.ts +1 -0
- package/src/package.ts +18 -8
- package/src/tests/converters-test.ts +24 -24
- package/src/tests/to-atomic-level-tests.ts +187 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.4.
|
|
8
|
+
"version": "2.4.23",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,10 +14,10 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.
|
|
17
|
+
"@datagrok-libraries/bio": "^5.30.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
19
19
|
"@datagrok-libraries/ml": "^6.3.23",
|
|
20
|
-
"@datagrok-libraries/tutorials": "^1.3.
|
|
20
|
+
"@datagrok-libraries/tutorials": "^1.3.2",
|
|
21
21
|
"@datagrok-libraries/utils": "^2.1.3",
|
|
22
22
|
"cash-dom": "^8.0.0",
|
|
23
23
|
"css-loader": "^6.7.3",
|
|
@@ -9,7 +9,7 @@ import {handleError} from './utils';
|
|
|
9
9
|
import {SequenceDiversityViewer} from '../analysis/sequence-diversity-viewer';
|
|
10
10
|
import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
|
|
11
11
|
|
|
12
|
-
const dataFn: string = 'data/
|
|
12
|
+
const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
|
|
13
13
|
|
|
14
14
|
export async function demoBio01UI() {
|
|
15
15
|
let view: DG.TableView;
|
|
@@ -27,8 +27,12 @@ export async function demoBio01UI() {
|
|
|
27
27
|
df = await _package.files.readCsv(dataFn);
|
|
28
28
|
view = grok.shell.addTableView(df);
|
|
29
29
|
|
|
30
|
-
view.grid.columns.byName('
|
|
31
|
-
view.grid.columns.byName('
|
|
30
|
+
view.grid.columns.byName('cluster')!.visible = false;
|
|
31
|
+
view.grid.columns.byName('sequence_id')!.visible = false;
|
|
32
|
+
view.grid.columns.byName('sequence')!.width = 300;
|
|
33
|
+
view.grid.columns.byName('activity')!.visible = false;
|
|
34
|
+
view.grid.columns.byName('is_cliff')!.visible = false;
|
|
35
|
+
|
|
32
36
|
// TODO: Fix column width
|
|
33
37
|
}, {
|
|
34
38
|
description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
|
|
@@ -11,7 +11,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
|
|
|
11
11
|
import {demoSequenceSpace, handleError} from './utils';
|
|
12
12
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
13
13
|
|
|
14
|
-
const dataFn = 'data/
|
|
14
|
+
const dataFn = 'data/sample_FASTA_PT_activity.csv';
|
|
15
15
|
const seqColName = 'sequence';
|
|
16
16
|
|
|
17
17
|
export async function demoBio01aUI() {
|
|
@@ -21,7 +21,7 @@ export async function demoBio01aUI() {
|
|
|
21
21
|
let df: DG.DataFrame;
|
|
22
22
|
let spViewer: DG.ScatterPlotViewer;
|
|
23
23
|
|
|
24
|
-
const
|
|
24
|
+
const dimRedMethod: string = 'UMAP';
|
|
25
25
|
const idRows: { [id: number]: number } = {};
|
|
26
26
|
const embedCols: { [colName: string]: DG.Column<number> } = {};
|
|
27
27
|
|
|
@@ -38,6 +38,9 @@ export async function demoBio01aUI() {
|
|
|
38
38
|
]);
|
|
39
39
|
view = grok.shell.addTableView(df);
|
|
40
40
|
view.grid.props.rowHeight = 22;
|
|
41
|
+
view.grid.columns.byName('cluster')!.visible = false;
|
|
42
|
+
view.grid.columns.byName('sequence')!.width = 200;
|
|
43
|
+
view.grid.columns.byName('is_cliff')!.visible = false;
|
|
41
44
|
|
|
42
45
|
grok.shell.windows.showContextPanel = false;
|
|
43
46
|
grok.shell.windows.showProperties = false;
|
|
@@ -46,7 +49,7 @@ export async function demoBio01aUI() {
|
|
|
46
49
|
delay: 2000,
|
|
47
50
|
})
|
|
48
51
|
.step('Build sequence space', async () => {
|
|
49
|
-
spViewer = await demoSequenceSpace(view, df, seqColName,
|
|
52
|
+
spViewer = await demoSequenceSpace(view, df, seqColName, dimRedMethod);
|
|
50
53
|
}, {
|
|
51
54
|
description: `Reduce sequence space dimensionality to display on 2D representation.`,
|
|
52
55
|
delay: 2000
|
|
@@ -71,7 +74,10 @@ export async function demoBio01aUI() {
|
|
|
71
74
|
delay: 2000,
|
|
72
75
|
})
|
|
73
76
|
.step('Select a bunch of sequences', async () => {
|
|
74
|
-
|
|
77
|
+
const seqIdCol: DG.Column<string> = df.getCol('sequence_id');
|
|
78
|
+
df.selection.init((rowI: number) => {
|
|
79
|
+
return ['c0_seq120', 'c0_seq105', 'c0_seq121', 'c0_seq93'].includes(seqIdCol.get(rowI)!);
|
|
80
|
+
});
|
|
75
81
|
df.currentRowIdx = 27;
|
|
76
82
|
}, {
|
|
77
83
|
description: 'Selecting a group of rows from a data frame to show their similarity and proximity to each other on a viewer..',
|
|
@@ -13,7 +13,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
|
|
|
13
13
|
import {handleError} from './utils';
|
|
14
14
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
15
15
|
|
|
16
|
-
const dataFn: string = '
|
|
16
|
+
const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
|
|
17
17
|
|
|
18
18
|
export async function demoBio01bUI() {
|
|
19
19
|
let treeHelper: ITreeHelper;
|
|
@@ -23,7 +23,7 @@ export async function demoBio01bUI() {
|
|
|
23
23
|
let view: DG.TableView;
|
|
24
24
|
let activityCliffsViewer: DG.ScatterPlotViewer;
|
|
25
25
|
|
|
26
|
-
const
|
|
26
|
+
const dimRedMethod: string = 'UMAP';
|
|
27
27
|
const idRows: { [id: number]: number } = {};
|
|
28
28
|
|
|
29
29
|
try {
|
|
@@ -43,10 +43,9 @@ export async function demoBio01bUI() {
|
|
|
43
43
|
|
|
44
44
|
view = grok.shell.addTableView(df);
|
|
45
45
|
view.grid.props.rowHeight = 22;
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
lengthGCol.width = 0;
|
|
46
|
+
view.grid.columns.byName('cluster')!.visible = false;
|
|
47
|
+
view.grid.columns.byName('sequence')!.width = 300;
|
|
48
|
+
view.grid.columns.byName('is_cliff')!.visible = false;
|
|
50
49
|
}, {
|
|
51
50
|
description: 'Load dataset with macromolecules of \'fasta\' notation, \'DNA\' alphabet.',
|
|
52
51
|
delay: 2000,
|
|
@@ -54,7 +53,7 @@ export async function demoBio01bUI() {
|
|
|
54
53
|
.step('Find activity cliffs', async () => {
|
|
55
54
|
activityCliffsViewer = (await activityCliffs(
|
|
56
55
|
df, df.getCol('Sequence'), df.getCol('Activity'),
|
|
57
|
-
80,
|
|
56
|
+
80, dimRedMethod)) as DG.ScatterPlotViewer;
|
|
58
57
|
view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
|
|
59
58
|
|
|
60
59
|
// Show grid viewer with the cliffs
|
|
@@ -86,7 +85,7 @@ export async function demoBio01bUI() {
|
|
|
86
85
|
//cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
|
|
87
86
|
const cliffsDfGrid: DG.Grid = activityCliffsViewer.dataFrame.temp[acTEMPS.cliffsDfGrid];
|
|
88
87
|
//cliffsDfGrid.dataFrame.selection.init((i) => i == currentCliffIdx);
|
|
89
|
-
cliffsDfGrid.dataFrame.currentRowIdx = 0;
|
|
88
|
+
if (cliffsDfGrid.dataFrame.rowCount > 0) cliffsDfGrid.dataFrame.currentRowIdx = 0;
|
|
90
89
|
//cliffsDfGrid.dataFrame.selection.set(currentCliffIdx, true, true);
|
|
91
90
|
|
|
92
91
|
// /* workaround to select rows of the cliff */
|
|
@@ -22,6 +22,7 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
22
22
|
|
|
23
23
|
const helmColName: string = 'HELM';
|
|
24
24
|
const msaHelmColName: string = 'msa(HELM)';
|
|
25
|
+
const dimRedMethod: string = 'UMAP';
|
|
25
26
|
|
|
26
27
|
try {
|
|
27
28
|
const demoScript = new DemoScript(
|
|
@@ -37,7 +38,7 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
37
38
|
description: 'Load dataset with macromolecules of \'Helm\' notation.',
|
|
38
39
|
delay: 2000,
|
|
39
40
|
})
|
|
40
|
-
.step('Align
|
|
41
|
+
.step('Align peptides with non-natural aminoacids with PepSeA', async () => {
|
|
41
42
|
helmCol = df.getCol(helmColName);
|
|
42
43
|
const method: string = pepseaMethods[0];
|
|
43
44
|
const gapOpen: number = 1.53;
|
|
@@ -50,9 +51,8 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
50
51
|
delay: 2000,
|
|
51
52
|
})
|
|
52
53
|
.step('Build sequence space', async () => {
|
|
53
|
-
const method: string = 'UMAP';
|
|
54
54
|
ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
|
|
55
|
-
|
|
55
|
+
dimRedMethod, StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
|
|
56
56
|
view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
|
|
57
57
|
}, {
|
|
58
58
|
description: 'Reduce sequence space dimensionality to display on 2D representation.',
|
package/src/demo/utils.ts
CHANGED
|
@@ -52,18 +52,6 @@ export async function demoSequenceSpace(
|
|
|
52
52
|
embedCol.init((rowI) => { return embedColData[rowI]; });
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
-
const rowCount: number = df.rowCount;
|
|
56
|
-
const idCol: DG.Column = df.getCol('id');
|
|
57
|
-
for (let idRowI = 0; idRowI < rowCount; idRowI++) {
|
|
58
|
-
const id = idCol.get(idRowI);
|
|
59
|
-
//idRows[id] = idRowI;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
for (const embedColName of Object.values(EMBED_COL_NAMES)) {
|
|
63
|
-
const embedCol: DG.Column<number> = df.getCol(embedColName);
|
|
64
|
-
//embedCols[embedColName] = embedCol;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
55
|
const t3: number = Date.now();
|
|
68
56
|
_package.logger.debug('MLB: MlbVrSpaceBrowser.buildView(), postprocess reduceDimensionality ' +
|
|
69
57
|
`ET: ${((t3 - t2) / 1000)} s`);
|
package/src/package-test.ts
CHANGED
|
@@ -22,6 +22,7 @@ import './tests/substructure-filters-tests';
|
|
|
22
22
|
import './tests/pepsea-tests';
|
|
23
23
|
import './tests/viewers';
|
|
24
24
|
import './tests/units-handler-tests';
|
|
25
|
+
import './tests/to-atomic-level-tests';
|
|
25
26
|
import './tests/mm-distance-tests';
|
|
26
27
|
|
|
27
28
|
// Tests hanging github CI
|
package/src/package.ts
CHANGED
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
} from './utils/cell-renderer';
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {SequenceAlignment} from './seq_align';
|
|
12
|
-
import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
|
|
12
|
+
import {getEmbeddingColsNames, sequenceSpaceByFingerprints, getSequenceSpace} from './analysis/sequence-space';
|
|
13
13
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
14
14
|
import {
|
|
15
15
|
createLinesGrid,
|
|
@@ -290,19 +290,23 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
290
290
|
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
291
291
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
292
292
|
};
|
|
293
|
+
const uh = new UnitsHandler(macroMolecule);
|
|
294
|
+
let columnDistanceMetric = 'Tanimoto';
|
|
295
|
+
if (uh.isFasta())
|
|
296
|
+
columnDistanceMetric = uh.getDistanceFunctionName();
|
|
293
297
|
const sp = await getActivityCliffs(
|
|
294
298
|
df,
|
|
295
299
|
macroMolecule,
|
|
296
300
|
null,
|
|
297
301
|
axesNames,
|
|
298
|
-
'Activity cliffs',
|
|
302
|
+
'Activity cliffs', //scatterTitle
|
|
299
303
|
activities,
|
|
300
304
|
similarity,
|
|
301
|
-
|
|
305
|
+
columnDistanceMetric, //similarityMetric
|
|
302
306
|
methodName,
|
|
303
307
|
DG.SEMTYPE.MACROMOLECULE,
|
|
304
308
|
tags,
|
|
305
|
-
|
|
309
|
+
getSequenceSpace,
|
|
306
310
|
getChemSimilaritiesMatrix,
|
|
307
311
|
createTooltipElement,
|
|
308
312
|
createPropPanelElement,
|
|
@@ -353,7 +357,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
353
357
|
embedAxesNames: embedColsNames,
|
|
354
358
|
options: options
|
|
355
359
|
};
|
|
356
|
-
const sequenceSpaceRes = await
|
|
360
|
+
const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
|
|
357
361
|
const embeddings = sequenceSpaceRes.coordinates;
|
|
358
362
|
for (const col of embeddings) {
|
|
359
363
|
const listValues = col.toList();
|
|
@@ -407,9 +411,15 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
407
411
|
}
|
|
408
412
|
if (!checkInputColumnUI(macroMolecule, 'To Atomic Level'))
|
|
409
413
|
return;
|
|
410
|
-
const
|
|
411
|
-
const
|
|
412
|
-
|
|
414
|
+
const monomerLib: IMonomerLib = (await getMonomerLibHelper()).getBioLib();
|
|
415
|
+
const atomicLevelRes = await _toAtomicLevel(df, macroMolecule, monomerLib);
|
|
416
|
+
if (atomicLevelRes.col !== null) {
|
|
417
|
+
df.columns.add(atomicLevelRes.col, true);
|
|
418
|
+
await grok.data.detectSemanticTypes(df);
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
if (atomicLevelRes.warnings && atomicLevelRes.warnings.length > 0)
|
|
422
|
+
grok.shell.warning(ui.list(atomicLevelRes.warnings));
|
|
413
423
|
}
|
|
414
424
|
|
|
415
425
|
//top-menu: Bio | Alignment | MSA...
|
|
@@ -53,9 +53,9 @@ Y-N-R-Q-W-Y-V
|
|
|
53
53
|
M-K-P-S-E-Y-V
|
|
54
54
|
`,
|
|
55
55
|
helmPt: `seq
|
|
56
|
-
PEPTIDE1{F.W.P.H.E.Y}
|
|
57
|
-
PEPTIDE1{Y.N.R.Q.W.Y.V}
|
|
58
|
-
PEPTIDE1{M.K.P.S.E.Y.V}
|
|
56
|
+
PEPTIDE1{F.W.P.H.E.Y}$$$$
|
|
57
|
+
PEPTIDE1{Y.N.R.Q.W.Y.V}$$$$
|
|
58
|
+
PEPTIDE1{M.K.P.S.E.Y.V}$$$$
|
|
59
59
|
`,
|
|
60
60
|
fastaDna: `seq
|
|
61
61
|
ACGTC
|
|
@@ -68,9 +68,9 @@ C/A/G/T/G/T
|
|
|
68
68
|
T/T/C/A/A/C
|
|
69
69
|
`,
|
|
70
70
|
helmDna: `seq
|
|
71
|
-
DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}
|
|
72
|
-
DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}
|
|
73
|
-
DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}
|
|
71
|
+
DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}$$$$
|
|
72
|
+
DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}$$$$
|
|
73
|
+
DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$$
|
|
74
74
|
`,
|
|
75
75
|
fastaRna: `seq
|
|
76
76
|
ACGUC
|
|
@@ -83,9 +83,9 @@ C*A*G*U*G*U
|
|
|
83
83
|
U*U*C*A*A*C
|
|
84
84
|
`,
|
|
85
85
|
helmRna: `seq
|
|
86
|
-
RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}
|
|
87
|
-
RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}
|
|
88
|
-
RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}
|
|
86
|
+
RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
|
|
87
|
+
RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
|
|
88
|
+
RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$$
|
|
89
89
|
`,
|
|
90
90
|
fastaGaps: `seq
|
|
91
91
|
FW-PH-EYY
|
|
@@ -98,9 +98,9 @@ F/Y/N/R/Q/W/Y/V/
|
|
|
98
98
|
F/K/P//Q//S/E/Y/V
|
|
99
99
|
`,
|
|
100
100
|
helmGaps: `seq
|
|
101
|
-
PEPTIDE1{F.W.*.P.H.*.E.Y.Y}
|
|
102
|
-
PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}
|
|
103
|
-
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}
|
|
101
|
+
PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$$
|
|
102
|
+
PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$$
|
|
103
|
+
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$$
|
|
104
104
|
`,
|
|
105
105
|
|
|
106
106
|
fastaUn: `seq
|
|
@@ -114,24 +114,24 @@ meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
|
114
114
|
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
115
115
|
`,
|
|
116
116
|
helmUn: `seq
|
|
117
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}
|
|
118
|
-
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}
|
|
119
|
-
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}
|
|
117
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$$
|
|
118
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
|
|
119
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
|
|
120
120
|
`,
|
|
121
121
|
helmLoneDeoxyribose: `seq
|
|
122
|
-
DNA1{D(A).D(C).D(G).D(T).D(C)}
|
|
123
|
-
DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}
|
|
124
|
-
DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}
|
|
122
|
+
DNA1{D(A).D(C).D(G).D(T).D(C)}$$$$
|
|
123
|
+
DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}$$$$
|
|
124
|
+
DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}$$$$
|
|
125
125
|
`,
|
|
126
126
|
helmLoneRibose: `seq
|
|
127
|
-
RNA1{R(A).R(C).R(G).R(U).R(C)}
|
|
128
|
-
RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}
|
|
129
|
-
RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}
|
|
127
|
+
RNA1{R(A).R(C).R(G).R(U).R(C)}$$$$
|
|
128
|
+
RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}$$$$
|
|
129
|
+
RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}$$$$
|
|
130
130
|
`,
|
|
131
131
|
helmLonePhosphorus: `seq
|
|
132
|
-
RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}
|
|
133
|
-
RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}
|
|
134
|
-
RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}
|
|
132
|
+
RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
|
|
133
|
+
RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
|
|
134
|
+
RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$
|
|
135
135
|
`,
|
|
136
136
|
};
|
|
137
137
|
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {before, after, category, test, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
7
|
+
|
|
8
|
+
import {getMonomerLibHelper, toAtomicLevel} from '../package';
|
|
9
|
+
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
10
|
+
import {IMonomerLib} from '@datagrok-libraries/bio/src/types/index';
|
|
11
|
+
import {IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
12
|
+
import {LIB_STORAGE_NAME} from '../utils/monomer-lib';
|
|
13
|
+
|
|
14
|
+
const appPath = 'System:AppData/Bio';
|
|
15
|
+
const fileSource = new DG.FileSource(appPath);
|
|
16
|
+
|
|
17
|
+
const testNames: { [k: string]: string } = {
|
|
18
|
+
PT: 'peptides fasta',
|
|
19
|
+
DNA: 'dna fasta',
|
|
20
|
+
MSA: 'msa separator',
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const inputPath: { [k: string]: string } = {
|
|
24
|
+
PT: 'tests/to-atomic-level-peptides-fasta-input.csv',
|
|
25
|
+
DNA: 'tests/to-atomic-level-dna-fasta-input.csv',
|
|
26
|
+
MSA: 'tests/to-atomic-level-msa-separator-input.csv',
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
const outputPath: { [k: string]: string } = {
|
|
30
|
+
PT: 'tests/to-atomic-level-peptides-output.csv',
|
|
31
|
+
DNA: 'tests/to-atomic-level-dna-output.csv',
|
|
32
|
+
MSA: 'tests/to-atomic-level-msa-output.csv',
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const inputColName = 'sequence';
|
|
36
|
+
const outputColName = 'molfile(sequence)';
|
|
37
|
+
|
|
38
|
+
category('toAtomicLevel', async () => {
|
|
39
|
+
const sourceDf: { [key: string]: DG.DataFrame } = {};
|
|
40
|
+
const targetDf: { [key: string]: DG.DataFrame } = {};
|
|
41
|
+
|
|
42
|
+
let monomerLibHelper: IMonomerLibHelper;
|
|
43
|
+
/** Backup actual user's monomer libraries settings */
|
|
44
|
+
let userLibrariesSettings: any = null;
|
|
45
|
+
|
|
46
|
+
before(async () => {
|
|
47
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
48
|
+
userLibrariesSettings = await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true);
|
|
49
|
+
// Clear settings to test default
|
|
50
|
+
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
|
|
51
|
+
await monomerLibHelper.loadLibraries(true);
|
|
52
|
+
|
|
53
|
+
for (const key in testNames) {
|
|
54
|
+
sourceDf[key] = await fileSource.readCsv(inputPath[key]);
|
|
55
|
+
await grok.data.detectSemanticTypes(sourceDf[key]);
|
|
56
|
+
targetDf[key] = await fileSource.readCsv(outputPath[key]);
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
after(async () => {
|
|
61
|
+
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, userLibrariesSettings, true);
|
|
62
|
+
await monomerLibHelper.loadLibraries(true);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
async function getTestResult(source: DG.DataFrame, target: DG.DataFrame): Promise<void> {
|
|
66
|
+
const inputCol = source.getCol(inputColName);
|
|
67
|
+
await toAtomicLevel(source, inputCol);
|
|
68
|
+
const obtainedCol = source.getCol(outputColName);
|
|
69
|
+
const expectedCol = target.getCol(outputColName);
|
|
70
|
+
const obtainedArray = [...obtainedCol.values()];
|
|
71
|
+
const expectedArray = [...expectedCol.values()];
|
|
72
|
+
expectArray(obtainedArray, expectedArray);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
for (const key in testNames) {
|
|
76
|
+
test(`${testNames[key]}`, async () => {
|
|
77
|
+
await getTestResult(sourceDf[key], targetDf[key]);
|
|
78
|
+
}, {skipReason: 'GROK-13100'});
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
enum csvTests {
|
|
82
|
+
fastaDna = 'fastaDna',
|
|
83
|
+
fastaRna = 'fastaRna',
|
|
84
|
+
fastaPt = 'fastaPt',
|
|
85
|
+
|
|
86
|
+
separatorDna = 'separatorDna',
|
|
87
|
+
separatorRna = 'separatorRna',
|
|
88
|
+
separatorPt = 'separatorPt',
|
|
89
|
+
separatorUn = 'separatorUn',
|
|
90
|
+
|
|
91
|
+
helm = 'helm',
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const csvData: { [key in csvTests]: string } = {
|
|
95
|
+
[csvTests.fastaDna]: `seq
|
|
96
|
+
ACGTC
|
|
97
|
+
CAGTGT
|
|
98
|
+
TTCAAC
|
|
99
|
+
`,
|
|
100
|
+
[csvTests.fastaRna]: `seq
|
|
101
|
+
ACGUC
|
|
102
|
+
CAGUGU
|
|
103
|
+
UUCAAC
|
|
104
|
+
`,
|
|
105
|
+
[csvTests.fastaPt]: `seq
|
|
106
|
+
FWPHEY
|
|
107
|
+
YNRQWYV
|
|
108
|
+
MKPSEYV
|
|
109
|
+
`,
|
|
110
|
+
[csvTests.separatorDna]: `seq
|
|
111
|
+
A/C/G/T/C
|
|
112
|
+
C/A/G/T/G/T
|
|
113
|
+
T/T/C/A/A/C
|
|
114
|
+
`,
|
|
115
|
+
[csvTests.separatorRna]: `seq
|
|
116
|
+
A*C*G*U*C
|
|
117
|
+
C*A*G*U*G*U
|
|
118
|
+
U*U*C*A*A*C
|
|
119
|
+
`,
|
|
120
|
+
[csvTests.separatorPt]: `seq
|
|
121
|
+
F-W-P-H-E-Y
|
|
122
|
+
Y-N-R-Q-W-Y-V
|
|
123
|
+
M-K-P-S-E-Y-V
|
|
124
|
+
`,
|
|
125
|
+
[csvTests.separatorUn]: `seq
|
|
126
|
+
meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
|
|
127
|
+
meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
128
|
+
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
129
|
+
`,
|
|
130
|
+
|
|
131
|
+
[csvTests.helm]: `seq
|
|
132
|
+
PEPTIDE1{meI.D-gGlu.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$
|
|
133
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
134
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
135
|
+
`,
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
/** Also detects semantic types
|
|
139
|
+
* @param {string} key
|
|
140
|
+
* @return {Promise<DG.DataFrame>}
|
|
141
|
+
*/
|
|
142
|
+
async function readCsv(key: csvTests): Promise<DG.DataFrame> {
|
|
143
|
+
// Always recreate test data frame from CSV for reproducible detector behavior in tests.
|
|
144
|
+
const csv: string = csvData[key];
|
|
145
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
146
|
+
await grok.data.detectSemanticTypes(df);
|
|
147
|
+
return df;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
test('fastaDna', async () => {
|
|
151
|
+
await _testToAtomicLevel(await readCsv(csvTests.fastaDna), 'seq', monomerLibHelper);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
test('fastaRna', async () => {
|
|
155
|
+
await _testToAtomicLevel(await readCsv(csvTests.fastaRna), 'seq', monomerLibHelper);
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
test('fastaPt', async () => {
|
|
159
|
+
await _testToAtomicLevel(await readCsv(csvTests.fastaPt), 'seq', monomerLibHelper);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
test('separatorDna', async () => {
|
|
163
|
+
await _testToAtomicLevel(await readCsv(csvTests.separatorDna), 'seq', monomerLibHelper);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
test('separatorDna', async () => {
|
|
167
|
+
await _testToAtomicLevel(await readCsv(csvTests.separatorRna), 'seq', monomerLibHelper);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test('separatorPt', async () => {
|
|
171
|
+
await _testToAtomicLevel(await readCsv(csvTests.separatorPt), 'seq', monomerLibHelper);
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
test('separatorUn', async () => {
|
|
175
|
+
await _testToAtomicLevel(await readCsv(csvTests.separatorUn), 'seq', monomerLibHelper);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
test('helm', async () => {
|
|
179
|
+
await _testToAtomicLevel(await readCsv(csvTests.helm), 'seq', monomerLibHelper);
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
async function _testToAtomicLevel(df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper) {
|
|
184
|
+
const seqCol: DG.Column<string> = df.getCol(seqColName);
|
|
185
|
+
const monomerLib: IMonomerLib = monomerLibHelper.getBioLib();
|
|
186
|
+
const resCol = await _toAtomicLevel(df, seqCol, monomerLib);
|
|
187
|
+
}
|