@datagrok/bio 2.11.8 → 2.11.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/README.md +10 -10
- package/dist/361.js +1 -1
- package/dist/361.js.map +1 -1
- package/dist/381.js +1 -1
- package/dist/381.js.map +1 -1
- package/dist/44.js +2 -0
- package/dist/44.js.map +1 -0
- package/dist/770.js +1 -1
- package/dist/770.js.map +1 -1
- package/dist/79.js +1 -1
- package/dist/79.js.map +1 -1
- package/dist/868.js +1 -1
- package/dist/868.js.map +1 -1
- package/dist/931.js +3 -0
- package/dist/931.js.map +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/tests/{to-atomic-level-dna-output.csv → to-atomic-level-dna-fasta-output.csv} +6616 -6626
- package/files/tests/{to-atomic-level-msa-output.csv → to-atomic-level-msa-separator-output.csv} +1429 -1440
- package/files/tests/{to-atomic-level-peptides-output.csv → to-atomic-level-peptides-fasta-output.csv} +13341 -13405
- package/files/tests/to-atomic-level-pt-fasta-2.mol +45 -0
- package/package.json +5 -5
- package/src/package.ts +6 -3
- package/src/substructure-search/substructure-search.ts +10 -7
- package/src/tests/activity-cliffs-tests.ts +8 -5
- package/src/tests/monomer-libraries-tests.ts +21 -5
- package/src/tests/renderers-test.ts +0 -62
- package/src/tests/scoring.ts +29 -8
- package/src/tests/substructure-filters-tests.ts +29 -24
- package/src/tests/to-atomic-level-tests.ts +77 -45
- package/src/tests/utils.ts +9 -2
- package/src/utils/monomer-lib.ts +15 -43
- package/src/widgets/bio-substructure-filter.ts +3 -0
- package/dist/172.js +0 -2
- package/dist/172.js.map +0 -1
- package/dist/196.js +0 -3
- package/dist/196.js.map +0 -1
- /package/dist/{196.js.LICENSE.txt → 931.js.LICENSE.txt} +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
|
|
2
|
+
Datagrok macromolecule handler
|
|
3
|
+
|
|
4
|
+
0 0 0 0 0 0 999 V3000
|
|
5
|
+
M V30 BEGIN CTAB
|
|
6
|
+
M V30 COUNTS 17 16 0 0 0
|
|
7
|
+
M V30 BEGIN ATOM
|
|
8
|
+
M V30 1 C 0.7144 -1.2375 0.000000 0
|
|
9
|
+
M V30 2 C 0.7144 -0.4125 0.000000 0
|
|
10
|
+
M V30 3 N 0 0 0.000000 0
|
|
11
|
+
M V30 4 C 1.4289 0 0.000000 0
|
|
12
|
+
M V30 5 O 2.1434 -0.4126 0.000000 0
|
|
13
|
+
M V30 6 N 4.2867 4.9501 0.000000 0
|
|
14
|
+
M V30 7 C 3.5722 4.5376 0.000000 0
|
|
15
|
+
M V30 8 N 2.8578 4.95 0.000000 0
|
|
16
|
+
M V30 9 N 3.5723 3.7126 0.000000 0
|
|
17
|
+
M V30 10 C 2.8578 3.3001 0.000000 0
|
|
18
|
+
M V30 11 C 2.8578 2.475 0.000000 0
|
|
19
|
+
M V30 12 C 2.1433 2.0626 0.000000 0
|
|
20
|
+
M V30 13 C 2.1433 1.2375 0.000000 0
|
|
21
|
+
M V30 14 N 1.4289 0.825 0.000000 0
|
|
22
|
+
M V30 15 C 2.8578 0.8251 0.000000 0
|
|
23
|
+
M V30 16 O 3.5723 1.2376 0.000000 0
|
|
24
|
+
M V30 17 O 2.8578 0 0.000000 0
|
|
25
|
+
M V30 END ATOM
|
|
26
|
+
M V30 BEGIN BOND
|
|
27
|
+
M V30 1 1 2 1 CFG=3
|
|
28
|
+
M V30 2 1 2 3
|
|
29
|
+
M V30 3 1 2 4
|
|
30
|
+
M V30 4 2 4 5
|
|
31
|
+
M V30 5 1 4 14
|
|
32
|
+
M V30 6 1 6 7
|
|
33
|
+
M V30 7 2 7 8 CFG=3
|
|
34
|
+
M V30 8 1 7 9
|
|
35
|
+
M V30 9 1 9 10
|
|
36
|
+
M V30 10 1 10 11
|
|
37
|
+
M V30 11 1 11 12
|
|
38
|
+
M V30 12 1 13 12 CFG=1
|
|
39
|
+
M V30 13 1 13 14
|
|
40
|
+
M V30 14 1 13 15
|
|
41
|
+
M V30 15 2 15 16
|
|
42
|
+
M V30 16 1 15 17
|
|
43
|
+
M V30 END BOND
|
|
44
|
+
M V30 END CTAB
|
|
45
|
+
M END
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.11.
|
|
8
|
+
"version": "2.11.11",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.39.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.39.6",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
39
39
|
"@datagrok-libraries/ml": "^6.3.55",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
@@ -64,9 +64,9 @@
|
|
|
64
64
|
"webpack": "^5.76.3",
|
|
65
65
|
"webpack-bundle-analyzer": "latest",
|
|
66
66
|
"webpack-cli": "^4.9.1",
|
|
67
|
-
"@datagrok/chem": "1.
|
|
68
|
-
"@datagrok/helm": "2.1.
|
|
69
|
-
"@datagrok/dendrogram": "^1.2.
|
|
67
|
+
"@datagrok/chem": "^1.8.4",
|
|
68
|
+
"@datagrok/helm": "^2.1.21",
|
|
69
|
+
"@datagrok/dendrogram": "^1.2.21"
|
|
70
70
|
},
|
|
71
71
|
"scripts": {
|
|
72
72
|
"link-api": "npm link datagrok-api",
|
package/src/package.ts
CHANGED
|
@@ -409,7 +409,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
409
409
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
410
410
|
};
|
|
411
411
|
let cliffsEncodeFunction: (seqCol: DG.Column, similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics) => any =
|
|
412
|
-
|
|
412
|
+
getEncodedSeqSpaceCol;
|
|
413
413
|
const ncUH = UnitsHandler.getOrCreate(macroMolecule);
|
|
414
414
|
const columnDistanceMetric: MmDistanceFunctionsNames | BitArrayMetrics = similarityMetric;
|
|
415
415
|
const seqCol = macroMolecule;
|
|
@@ -671,8 +671,11 @@ export async function sequenceSpaceTopMenu(
|
|
|
671
671
|
//input: bool nonlinear=false { description: Slower mode for cycling/branching HELM structures }
|
|
672
672
|
export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column, nonlinear: boolean): Promise<void> {
|
|
673
673
|
const pi = DG.TaskBarProgressIndicator.create('Converting to atomic level ...');
|
|
674
|
-
|
|
675
|
-
|
|
674
|
+
try {
|
|
675
|
+
await sequenceToMolfile(df, macroMolecule, nonlinear);
|
|
676
|
+
} finally {
|
|
677
|
+
pi.close();
|
|
678
|
+
}
|
|
676
679
|
}
|
|
677
680
|
|
|
678
681
|
//top-menu: Bio | Analyze | MSA...
|
|
@@ -148,11 +148,14 @@ export async function helmSubstructureSearch(substructure: string, col: DG.Colum
|
|
|
148
148
|
|
|
149
149
|
export async function invalidateMols(col: DG.Column<string>, pattern: boolean) {
|
|
150
150
|
const progressBar = DG.TaskBarProgressIndicator.create(`Invalidating molfiles for ${col.name}`);
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
151
|
+
try {
|
|
152
|
+
await delay(10);
|
|
153
|
+
const monomersDict = new Map();
|
|
154
|
+
const monomericMolsCol = await getMonomericMols(col, pattern, monomersDict);
|
|
155
|
+
col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS] = monomericMolsCol;
|
|
156
|
+
col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT] = monomersDict;
|
|
157
|
+
col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION] = col.version;
|
|
158
|
+
} finally {
|
|
159
|
+
progressBar.close();
|
|
160
|
+
}
|
|
158
161
|
}
|
|
@@ -10,25 +10,28 @@ import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensional
|
|
|
10
10
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
11
11
|
import {BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
12
12
|
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
13
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
getUserLibSettings, LibSettings, setUserLibSettings, setUserLibSettingsForTests
|
|
15
|
+
} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
14
16
|
|
|
15
17
|
import {_package} from '../package-test';
|
|
16
18
|
|
|
19
|
+
|
|
17
20
|
category('activityCliffs', async () => {
|
|
18
21
|
let viewList: DG.ViewBase[] = [];
|
|
19
22
|
let dfList: DG.DataFrame[] = [];
|
|
20
23
|
|
|
21
24
|
let monomerLibHelper: IMonomerLibHelper;
|
|
22
25
|
/** Backup actual user's monomer libraries settings */
|
|
23
|
-
let
|
|
26
|
+
let userLibSettings: LibSettings;
|
|
24
27
|
|
|
25
28
|
|
|
26
29
|
before(async () => {
|
|
27
30
|
monomerLibHelper = await getMonomerLibHelper();
|
|
28
|
-
|
|
31
|
+
userLibSettings = await getUserLibSettings();
|
|
29
32
|
|
|
30
33
|
// Test 'helm' requires default monomer library loaded
|
|
31
|
-
await
|
|
34
|
+
await setUserLibSettingsForTests();
|
|
32
35
|
await monomerLibHelper.loadLibraries(true); // load default libraries
|
|
33
36
|
|
|
34
37
|
viewList = [];
|
|
@@ -40,7 +43,7 @@ category('activityCliffs', async () => {
|
|
|
40
43
|
// for (const view of viewList) view.close();
|
|
41
44
|
|
|
42
45
|
// UserDataStorage.put() replaces existing data
|
|
43
|
-
await
|
|
46
|
+
await setUserLibSettings(userLibSettings);
|
|
44
47
|
await monomerLibHelper.loadLibraries(true); // load user settings libraries
|
|
45
48
|
});
|
|
46
49
|
|
|
@@ -5,7 +5,10 @@ import * as ui from 'datagrok-api/ui';
|
|
|
5
5
|
import {test, after, before, category, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
|
|
7
7
|
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
8
|
-
import {
|
|
8
|
+
import {
|
|
9
|
+
getUserLibSettings, LibSettings, setUserLibSettings, setUserLibSettingsForTests
|
|
10
|
+
} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
11
|
+
import {getLibFileNameList} from '../utils/monomer-lib';
|
|
9
12
|
|
|
10
13
|
|
|
11
14
|
category('monomerLibraries', () => {
|
|
@@ -15,16 +18,16 @@ category('monomerLibraries', () => {
|
|
|
15
18
|
|
|
16
19
|
before(async () => {
|
|
17
20
|
monomerLibHelper = await getMonomerLibHelper();
|
|
18
|
-
userLibrariesSettings =
|
|
21
|
+
userLibrariesSettings = getUserLibSettings();
|
|
19
22
|
});
|
|
20
23
|
|
|
21
24
|
after(async () => {
|
|
22
|
-
await
|
|
25
|
+
await setUserLibSettings(userLibrariesSettings);
|
|
23
26
|
});
|
|
24
27
|
|
|
25
28
|
test('default', async () => {
|
|
26
29
|
// Clear settings to test default
|
|
27
|
-
await
|
|
30
|
+
await setUserLibSettings({exclude: [], explicit: []});
|
|
28
31
|
await monomerLibHelper.loadLibraries(true); // test defaultLib
|
|
29
32
|
|
|
30
33
|
// Currently default monomer lib set is of all files at LIB_PATH (at least HELMCoreLibrary.json)
|
|
@@ -32,12 +35,25 @@ category('monomerLibraries', () => {
|
|
|
32
35
|
expect(currentMonomerLib.getPolymerTypes().length > 0, true);
|
|
33
36
|
});
|
|
34
37
|
|
|
38
|
+
test('forTests', async () => {
|
|
39
|
+
await setUserLibSettingsForTests();
|
|
40
|
+
await monomerLibHelper.loadLibraries(true); // test defaultLib
|
|
41
|
+
|
|
42
|
+
// Currently default monomer lib set is of all files at LIB_PATH (at least HELMCoreLibrary.json)
|
|
43
|
+
const currentMonomerLib = monomerLibHelper.getBioLib();
|
|
44
|
+
// HELMCoreLibrary.json checks
|
|
45
|
+
expect(currentMonomerLib.getPolymerTypes().length, 2);
|
|
46
|
+
expect(currentMonomerLib.getMonomerSymbolsByType('PEPTIDE').length, 322);
|
|
47
|
+
expect(currentMonomerLib.getMonomerSymbolsByType('RNA').length, 383);
|
|
48
|
+
});
|
|
49
|
+
|
|
35
50
|
test('empty', async () => {
|
|
36
51
|
// exclude all monomer libraries for empty set
|
|
37
52
|
const libSettings = await getUserLibSettings();
|
|
38
53
|
const libFnList = await getLibFileNameList();
|
|
39
54
|
libSettings.exclude = libFnList;
|
|
40
|
-
|
|
55
|
+
libSettings.explicit = [];
|
|
56
|
+
await setUserLibSettings(libSettings);
|
|
41
57
|
|
|
42
58
|
await monomerLibHelper.loadLibraries(true);
|
|
43
59
|
const currentMonomerLib = monomerLibHelper.getBioLib();
|
|
@@ -49,14 +49,6 @@ category('renderers', () => {
|
|
|
49
49
|
await _selectRendererBySemType();
|
|
50
50
|
});
|
|
51
51
|
|
|
52
|
-
test('setRendererManuallyBeforeAddColumn', async () => {
|
|
53
|
-
await _setRendererManuallyBeforeAddColumn();
|
|
54
|
-
}, {skipReason: 'GROK-11212'});
|
|
55
|
-
|
|
56
|
-
test('setRendererManuallyAfterAddColumn', async () => {
|
|
57
|
-
await _setRendererManuallyAfterAddColumn();
|
|
58
|
-
}, {skipReason: 'GROK-11212'});
|
|
59
|
-
|
|
60
52
|
async function _rendererMacromoleculeFasta() {
|
|
61
53
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');
|
|
62
54
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
@@ -207,58 +199,4 @@ category('renderers', () => {
|
|
|
207
199
|
`view renderer has set to '${renderer}' instead of correct 'MacromoleculeDifference'.`);
|
|
208
200
|
}
|
|
209
201
|
}
|
|
210
|
-
|
|
211
|
-
/** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
|
|
212
|
-
* https://reddata.atlassian.net/browse/GROK-11212 */
|
|
213
|
-
async function _setRendererManuallyBeforeAddColumn() {
|
|
214
|
-
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
215
|
-
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
216
|
-
seqDiffCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
|
|
217
|
-
seqDiffCol.setTag(bioTAGS.separator, '/');
|
|
218
|
-
seqDiffCol.setTag(bioTAGS.aligned, 'SEQ');
|
|
219
|
-
seqDiffCol.setTag(bioTAGS.alphabet, 'UN');
|
|
220
|
-
seqDiffCol.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
221
|
-
seqDiffCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
222
|
-
const tgtCellRenderer = 'MacromoleculeDifference';
|
|
223
|
-
seqDiffCol.setTag(DG.TAGS.CELL_RENDERER, tgtCellRenderer);
|
|
224
|
-
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
225
|
-
await grok.data.detectSemanticTypes(df);
|
|
226
|
-
const tv = grok.shell.addTableView(df);
|
|
227
|
-
await awaitCheck(() => document.querySelector('canvas') !== null, 'cannot load table', 3000);
|
|
228
|
-
|
|
229
|
-
const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
230
|
-
if (resCellRenderer !== tgtCellRenderer) { // this is value of MacromoleculeDifferenceCR.cellType
|
|
231
|
-
throw new Error(`Tag 'cell.renderer' has been manually set to '${tgtCellRenderer}' for column ` +
|
|
232
|
-
`but after df was added as table, tag 'cell.renderer' has reset to '${resCellRenderer}' ` +
|
|
233
|
-
`instead of manual '${tgtCellRenderer}'.`);
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
/** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
|
|
238
|
-
* https://reddata.atlassian.net/browse/GROK-11212 */
|
|
239
|
-
async function _setRendererManuallyAfterAddColumn() {
|
|
240
|
-
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
241
|
-
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
242
|
-
seqDiffCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
|
|
243
|
-
seqDiffCol.setTag(bioTAGS.separator, '/');
|
|
244
|
-
seqDiffCol.setTag(bioTAGS.aligned, 'SEQ');
|
|
245
|
-
seqDiffCol.setTag(bioTAGS.alphabet, 'UN');
|
|
246
|
-
seqDiffCol.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
247
|
-
seqDiffCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
248
|
-
const tgtCellRenderer = 'MacromoleculeDifference';
|
|
249
|
-
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
250
|
-
await grok.data.detectSemanticTypes(df);
|
|
251
|
-
const tv = grok.shell.addTableView(df);
|
|
252
|
-
await awaitCheck(() => $(tv.root).find('.d4-grid canvas').length > 0, 'View grid canvas not found', 200);
|
|
253
|
-
|
|
254
|
-
seqDiffCol.setTag(DG.TAGS.CELL_RENDERER, tgtCellRenderer);
|
|
255
|
-
await awaitCheck(() => $(tv.root).find('.d4-grid canvas').length > 0, 'View grid canvas not found', 200);
|
|
256
|
-
|
|
257
|
-
const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
258
|
-
if (resCellRenderer !== tgtCellRenderer) { // this is value of MacromoleculeDifferenceCR.cellType
|
|
259
|
-
throw new Error(`Tag 'cell.renderer' has been manually set to '${tgtCellRenderer}' for column ` +
|
|
260
|
-
`but after df was added as table, tag 'cell.renderer' has reset to '${resCellRenderer}' ` +
|
|
261
|
-
`instead of manual '${tgtCellRenderer}'.`);
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
202
|
});
|
package/src/tests/scoring.ts
CHANGED
|
@@ -1,27 +1,48 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
1
3
|
import * as DG from 'datagrok-api/dg';
|
|
2
4
|
|
|
3
|
-
import {category, test, expectFloat, before} from '@datagrok-libraries/utils/src/test';
|
|
5
|
+
import {category, test, expectFloat, before, after} from '@datagrok-libraries/utils/src/test';
|
|
4
6
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
+
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
8
|
+
|
|
5
9
|
import {sequenceIdentityScoring, sequenceSimilarityScoring} from '../package';
|
|
6
|
-
import {
|
|
10
|
+
import {
|
|
11
|
+
LibSettings, getUserLibSettings, setUserLibSettings, setUserLibSettingsForTests
|
|
12
|
+
} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
7
13
|
|
|
8
14
|
category('Scoring', () => {
|
|
9
15
|
const sequence = 'sequence';
|
|
10
16
|
const expectedSimilarity = 'expected_similarity';
|
|
11
17
|
const expectedIdentity = 'expected_identity';
|
|
18
|
+
/* eslint-disable max-len */
|
|
12
19
|
const table = DG.DataFrame.fromCsv(`${sequence},${expectedSimilarity},${expectedIdentity}
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
20
|
+
PEPTIDE1{Aca.Orn.gGlu.Pqa.D-His_1Bn.dH.hHis.4Abz.D-Tic.D-Dap.Y.Iva.meS.F.P.F.D-1Nal}$$$$,1.0,1.0
|
|
21
|
+
PEPTIDE1{Iva.Gly_allyl.gGlu.Pqa.D-Dip.dH.hHis.4Abz.D-aHyp.D-Dap.Y.Iva.I.Tyr_26diMe.P.Asu.meC}$$$$,0.68,0.53
|
|
22
|
+
PEPTIDE1{[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal]}$$$$V2.0,0.34,0.0`
|
|
23
|
+
);
|
|
24
|
+
/* eslint-enable max-len */
|
|
17
25
|
const seqCol: DG.Column<string> = table.getCol(sequence);
|
|
18
26
|
seqCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
19
27
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
20
28
|
const reference = seqCol.get(0)!;
|
|
21
29
|
|
|
30
|
+
let monomerLibHelper: IMonomerLibHelper;
|
|
31
|
+
/** Backup actual user's monomer libraries settings */
|
|
32
|
+
let userLibSettings: LibSettings;
|
|
33
|
+
|
|
22
34
|
before(async () => {
|
|
23
|
-
|
|
24
|
-
await
|
|
35
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
36
|
+
userLibSettings = await getUserLibSettings();
|
|
37
|
+
|
|
38
|
+
await setUserLibSettingsForTests();
|
|
39
|
+
await monomerLibHelper.loadLibraries(true); // load default libraries
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
after(async () => {
|
|
43
|
+
// UserDataStorage.put() replaces existing data
|
|
44
|
+
await setUserLibSettings(userLibSettings);
|
|
45
|
+
await monomerLibHelper.loadLibraries(true); // load user settings libraries
|
|
25
46
|
});
|
|
26
47
|
|
|
27
48
|
test('Identity', async () => {
|
|
@@ -1,30 +1,35 @@
|
|
|
1
|
-
import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
|
|
2
1
|
import * as DG from 'datagrok-api/dg';
|
|
3
2
|
import * as grok from 'datagrok-api/grok';
|
|
4
3
|
|
|
5
|
-
import {
|
|
6
|
-
import {BioSubstructureFilter, HelmFilter, SeparatorFilter} from '../widgets/bio-substructure-filter';
|
|
4
|
+
import {after, before, category, test, expect, delay, testEvent} from '@datagrok-libraries/utils/src/test';
|
|
7
5
|
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
8
|
-
import {
|
|
6
|
+
import {
|
|
7
|
+
LibSettings, getUserLibSettings, setUserLibSettings, setUserLibSettingsForTests
|
|
8
|
+
} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
9
|
+
|
|
10
|
+
import {awaitGrid, readDataframe} from './utils';
|
|
11
|
+
import {BioSubstructureFilter, HelmFilter, SeparatorFilter} from '../widgets/bio-substructure-filter';
|
|
12
|
+
|
|
13
|
+
import {_package} from '../package-test';
|
|
9
14
|
|
|
10
15
|
|
|
11
16
|
category('substructureFilters', async () => {
|
|
12
17
|
let monomerLibHelper: IMonomerLibHelper;
|
|
13
18
|
/** Backup actual user's monomer libraries settings */
|
|
14
|
-
let
|
|
19
|
+
let userLibSettings: LibSettings;
|
|
15
20
|
|
|
16
21
|
before(async () => {
|
|
17
22
|
monomerLibHelper = await getMonomerLibHelper();
|
|
18
|
-
|
|
23
|
+
userLibSettings = await getUserLibSettings();
|
|
19
24
|
|
|
20
25
|
// Test 'helm' requires default monomer library loaded
|
|
21
|
-
await
|
|
26
|
+
await setUserLibSettingsForTests();
|
|
22
27
|
await monomerLibHelper.loadLibraries(true); // load default libraries
|
|
23
28
|
});
|
|
24
29
|
|
|
25
30
|
after(async () => {
|
|
26
31
|
// UserDataStorage.put() replaces existing data
|
|
27
|
-
await
|
|
32
|
+
await setUserLibSettings(userLibSettings);
|
|
28
33
|
await monomerLibHelper.loadLibraries(true); // load user settings libraries
|
|
29
34
|
});
|
|
30
35
|
|
|
@@ -66,30 +71,30 @@ category('substructureFilters', async () => {
|
|
|
66
71
|
const helmTableView = grok.shell.addTableView(helm);
|
|
67
72
|
const filter = new BioSubstructureFilter();
|
|
68
73
|
await grok.data.detectSemanticTypes(helm);
|
|
74
|
+
|
|
75
|
+
_package.logger.debug('Bio/substructureFilters/helm, filter attaching.');
|
|
69
76
|
filter.attach(helm);
|
|
77
|
+
_package.logger.debug('Bio/substructureFilters/helm, filter attached.');
|
|
70
78
|
|
|
71
|
-
|
|
72
|
-
helm.onFilterChanged.subscribe(async (_: any) => {
|
|
73
|
-
try {
|
|
74
|
-
resolve(true);
|
|
75
|
-
} catch (error) {
|
|
76
|
-
reject(error);
|
|
77
|
-
}
|
|
78
|
-
});
|
|
79
|
-
});
|
|
79
|
+
_package.logger.debug('Bio/substructureFilters/helm, filter 1 changing.');
|
|
80
80
|
(filter.bioFilter! as HelmFilter).helmSubstructure = 'PEPTIDE1{C}$$$$V2.0';
|
|
81
|
-
filter.bioFilter!.onChanged.next();
|
|
82
|
-
await helmFilterChanged;
|
|
83
81
|
|
|
84
|
-
|
|
82
|
+
_package.logger.debug('Bio/substructureFilters/helm, filter 1 change awaiting.');
|
|
83
|
+
await testEvent(helm.onRowsFiltered, () => {},
|
|
84
|
+
() => { filter.bioFilter!.onChanged.next(); }, 20000);
|
|
85
|
+
_package.logger.debug('Bio/substructureFilters/helm, filter 1 changed.');
|
|
85
86
|
expect(filter.dataFrame!.filter.trueCount, 2);
|
|
86
87
|
expect(filter.dataFrame!.filter.get(0), true);
|
|
87
88
|
expect(filter.dataFrame!.filter.get(3), true);
|
|
89
|
+
|
|
90
|
+
_package.logger.debug('Bio/substructureFilters/helm, filter 2 changing.');
|
|
88
91
|
(filter.bioFilter! as HelmFilter).helmSubstructure = 'PEPTIDE1{A.C}$$$$V2.0';
|
|
89
|
-
|
|
90
|
-
await
|
|
92
|
+
_package.logger.debug('Bio/substructureFilters/helm, filter 2 change awaiting.');
|
|
93
|
+
await testEvent(helm.onRowsFiltered, () => {},
|
|
94
|
+
() => { filter.bioFilter!.onChanged.next(); }, 20000);
|
|
95
|
+
await awaitGrid(helmTableView.grid);
|
|
96
|
+
_package.logger.debug('Bio/substructureFilters/helm, filter 2 changed.');
|
|
91
97
|
expect(filter.dataFrame!.filter.trueCount, 1);
|
|
92
98
|
expect(filter.dataFrame!.filter.get(3), true);
|
|
93
|
-
|
|
94
|
-
}, {skipReason: 'GROK-12779'});
|
|
99
|
+
}, {timeout: 30000});
|
|
95
100
|
});
|
|
@@ -3,13 +3,20 @@ import * as grok from 'datagrok-api/grok';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
5
|
|
|
6
|
-
import
|
|
6
|
+
import wu from 'wu';
|
|
7
7
|
|
|
8
|
-
import {
|
|
9
|
-
import {toAtomicLevel} from '../package';
|
|
8
|
+
import {before, after, category, test, expectArray, expect} from '@datagrok-libraries/utils/src/test';
|
|
10
9
|
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
11
10
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types/index';
|
|
12
|
-
import {
|
|
11
|
+
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
12
|
+
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
13
|
+
import {
|
|
14
|
+
getUserLibSettings, LibSettings, setUserLibSettings, setUserLibSettingsForTests
|
|
15
|
+
} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
16
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
17
|
+
|
|
18
|
+
import {toAtomicLevel} from '../package';
|
|
19
|
+
import {_package} from '../package-test';
|
|
13
20
|
|
|
14
21
|
const appPath = 'System:AppData/Bio';
|
|
15
22
|
const fileSource = new DG.FileSource(appPath);
|
|
@@ -27,9 +34,9 @@ const inputPath: { [k: string]: string } = {
|
|
|
27
34
|
};
|
|
28
35
|
|
|
29
36
|
const outputPath: { [k: string]: string } = {
|
|
30
|
-
PT: 'tests/to-atomic-level-peptides-output.csv',
|
|
31
|
-
DNA: 'tests/to-atomic-level-dna-output.csv',
|
|
32
|
-
MSA: 'tests/to-atomic-level-msa-output.csv',
|
|
37
|
+
PT: 'tests/to-atomic-level-peptides-fasta-output.csv',
|
|
38
|
+
DNA: 'tests/to-atomic-level-dna-fasta-output.csv',
|
|
39
|
+
MSA: 'tests/to-atomic-level-msa-separator-output.csv',
|
|
33
40
|
};
|
|
34
41
|
|
|
35
42
|
const inputColName = 'sequence';
|
|
@@ -41,24 +48,24 @@ category('toAtomicLevel', async () => {
|
|
|
41
48
|
|
|
42
49
|
let monomerLibHelper: IMonomerLibHelper;
|
|
43
50
|
/** Backup actual user's monomer libraries settings */
|
|
44
|
-
let
|
|
51
|
+
let userLibSettings: LibSettings;
|
|
45
52
|
|
|
46
53
|
before(async () => {
|
|
47
54
|
monomerLibHelper = await getMonomerLibHelper();
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
await
|
|
51
|
-
monomerLibHelper.loadLibraries(true);
|
|
55
|
+
userLibSettings = await getUserLibSettings();
|
|
56
|
+
// Clear settings to test default
|
|
57
|
+
await setUserLibSettingsForTests();
|
|
58
|
+
await monomerLibHelper.loadLibraries(true);
|
|
52
59
|
|
|
53
60
|
for (const key in testNames) {
|
|
54
|
-
sourceDf[key] = await fileSource.
|
|
61
|
+
sourceDf[key] = DG.DataFrame.fromCsv((await fileSource.readAsText(inputPath[key])).replace(/\n$/, ''));
|
|
55
62
|
await grok.data.detectSemanticTypes(sourceDf[key]);
|
|
56
|
-
targetDf[key] = await fileSource.
|
|
63
|
+
targetDf[key] = DG.DataFrame.fromCsv((await fileSource.readAsText(outputPath[key])).replace(/\n$/, ''));
|
|
57
64
|
}
|
|
58
65
|
});
|
|
59
66
|
|
|
60
67
|
after(async () => {
|
|
61
|
-
await
|
|
68
|
+
await setUserLibSettings(userLibSettings);
|
|
62
69
|
await monomerLibHelper.loadLibraries(true);
|
|
63
70
|
});
|
|
64
71
|
|
|
@@ -67,15 +74,15 @@ category('toAtomicLevel', async () => {
|
|
|
67
74
|
await toAtomicLevel(source, inputCol, false);
|
|
68
75
|
const obtainedCol = source.getCol(outputColName);
|
|
69
76
|
const expectedCol = target.getCol(outputColName);
|
|
70
|
-
const obtainedArray =
|
|
71
|
-
const expectedArray =
|
|
77
|
+
const obtainedArray: string[] = wu(obtainedCol.values()).map((mol) => polishMolfile(mol)).toArray();
|
|
78
|
+
const expectedArray: string[] = wu(expectedCol.values()).map((mol) => polishMolfile(mol)).toArray();
|
|
72
79
|
expectArray(obtainedArray, expectedArray);
|
|
73
80
|
}
|
|
74
81
|
|
|
75
82
|
for (const key in testNames) {
|
|
76
83
|
test(`${testNames[key]}`, async () => {
|
|
77
84
|
await getTestResult(sourceDf[key], targetDf[key]);
|
|
78
|
-
}
|
|
85
|
+
});
|
|
79
86
|
}
|
|
80
87
|
|
|
81
88
|
enum csvTests {
|
|
@@ -93,38 +100,38 @@ category('toAtomicLevel', async () => {
|
|
|
93
100
|
|
|
94
101
|
const csvData: { [key in csvTests]: string } = {
|
|
95
102
|
[csvTests.fastaDna]: `seq
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
103
|
+
ACGTCACGTC
|
|
104
|
+
CAGTGTCAGTGT
|
|
105
|
+
TTCAACTTCAAC`,
|
|
99
106
|
[csvTests.fastaRna]: `seq
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
107
|
+
ACGUCACGUC
|
|
108
|
+
CAGUGUCAGUGU
|
|
109
|
+
UUCAACUUCAAC`,
|
|
103
110
|
[csvTests.fastaPt]: `seq
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
111
|
+
FWPHEYFWPHEY
|
|
112
|
+
YNRQWYVYNRQWYV
|
|
113
|
+
MKPSEYVMKPSEYV`,
|
|
107
114
|
[csvTests.separatorDna]: `seq
|
|
108
|
-
A/C/G/T/C
|
|
109
|
-
C/A/G/T/G/T
|
|
110
|
-
T/T/C/A/A/C`,
|
|
115
|
+
A/C/G/T/C/A/C/G/T/C
|
|
116
|
+
C/A/G/T/G/T/C/A/G/T/G/T
|
|
117
|
+
T/T/C/A/A/C/T/T/C/A/A/C`,
|
|
111
118
|
[csvTests.separatorRna]: `seq
|
|
112
|
-
A*C*G*U*C
|
|
113
|
-
C*A*G*U*G*U
|
|
114
|
-
U*U*C*A*A*C`,
|
|
119
|
+
A*C*G*U*C*A*C*G*U*C
|
|
120
|
+
C*A*G*U*G*U*C*A*G*U*G*U
|
|
121
|
+
U*U*C*A*A*C*U*U*C*A*A*C`,
|
|
115
122
|
[csvTests.separatorPt]: `seq
|
|
116
|
-
F-W-P-H-E-Y
|
|
117
|
-
Y-N-R-Q-W-Y-V
|
|
118
|
-
M-K-P-S-E-Y-V`,
|
|
123
|
+
F-W-P-H-E-Y-F-W-P-H-E-Y
|
|
124
|
+
Y-N-R-Q-W-Y-V-Y-N-R-Q-W-Y-V
|
|
125
|
+
M-K-P-S-E-Y-V-M-K-P-S-E-Y-V`,
|
|
119
126
|
[csvTests.separatorUn]: `seq
|
|
120
|
-
meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
|
|
121
|
-
meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
122
|
-
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2`,
|
|
127
|
+
meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D-meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
|
|
128
|
+
meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
129
|
+
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2`,
|
|
123
130
|
|
|
124
131
|
[csvTests.helm]: `seq
|
|
125
|
-
PEPTIDE1{meI.D-gGlu.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$
|
|
126
|
-
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
127
|
-
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$`,
|
|
132
|
+
PEPTIDE1{meI.D-gGlu.Aca.N.T.dE.Thr_PO3H2.Aca.D.Thr_PO3H2.Aca.D}$$$
|
|
133
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
134
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr_PO3H2}$$$`,
|
|
128
135
|
};
|
|
129
136
|
|
|
130
137
|
/** Also detects semantic types
|
|
@@ -134,7 +141,7 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$`,
|
|
|
134
141
|
async function readCsv(key: csvTests): Promise<DG.DataFrame> {
|
|
135
142
|
// Always recreate test data frame from CSV for reproducible detector behavior in tests.
|
|
136
143
|
const csv: string = csvData[key];
|
|
137
|
-
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
144
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv.replace(/\n$/, ''));
|
|
138
145
|
await grok.data.detectSemanticTypes(df);
|
|
139
146
|
return df;
|
|
140
147
|
}
|
|
@@ -170,10 +177,35 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$`,
|
|
|
170
177
|
test('helm', async () => {
|
|
171
178
|
await _testToAtomicLevel(await readCsv(csvTests.helm), 'seq', monomerLibHelper);
|
|
172
179
|
});
|
|
180
|
+
|
|
181
|
+
test('ptFasta2', async () => {
|
|
182
|
+
const srcCsv: string = `seq\nAR`;
|
|
183
|
+
const tgtMol: string = await _package.files.readAsText('tests/to-atomic-level-pt-fasta-2.mol');
|
|
184
|
+
|
|
185
|
+
const srcDf = DG.DataFrame.fromCsv(srcCsv);
|
|
186
|
+
const seqCol = srcDf.getCol('seq');
|
|
187
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
188
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
189
|
+
seqCol.setTag(bioTAGS.alphabet, ALPHABET.PT);
|
|
190
|
+
const uh = UnitsHandler.getOrCreate(seqCol);
|
|
191
|
+
const resCol = (await _testToAtomicLevel(srcDf, 'seq', monomerLibHelper))!;
|
|
192
|
+
expect(polishMolfile(resCol.get(0)), polishMolfile(tgtMol));
|
|
193
|
+
});
|
|
173
194
|
});
|
|
174
195
|
|
|
175
|
-
async function _testToAtomicLevel(
|
|
196
|
+
async function _testToAtomicLevel(
|
|
197
|
+
df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper
|
|
198
|
+
): Promise<DG.Column | null> {
|
|
176
199
|
const seqCol: DG.Column<string> = df.getCol(seqColName);
|
|
177
200
|
const monomerLib: IMonomerLib = monomerLibHelper.getBioLib();
|
|
178
|
-
const
|
|
201
|
+
const res = await _toAtomicLevel(df, seqCol, monomerLib);
|
|
202
|
+
if (res.warnings.length > 0)
|
|
203
|
+
_package.logger.warning(`_toAtomicLevel() warnings ${res.warnings.join('\n')}`);
|
|
204
|
+
return res.col;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function polishMolfile(mol: string): string {
|
|
208
|
+
return mol.replaceAll('\r\n', '\n')
|
|
209
|
+
.replace(/\n$/, '')
|
|
210
|
+
.split('\n').map((l) => l.trimEnd()).join('\n');
|
|
179
211
|
}
|