@datagrok/bio 2.0.26 → 2.0.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +56796 -697
- package/dist/package.js +56673 -658
- package/files/tests/filter_FASTA.csv +14 -0
- package/files/tests/filter_HELM.csv +5 -0
- package/files/tests/filter_MSA.csv +9 -0
- package/package.json +4 -3
- package/scripts/admet-run.py +27 -0
- package/src/analysis/sequence-similarity-viewer.ts +1 -2
- package/src/package-test.ts +1 -0
- package/src/package.ts +0 -1
- package/src/substructure-search/substructure-search.ts +4 -5
- package/src/tests/checkInputColumn-tests.ts +2 -4
- package/src/tests/detectors-test.ts +19 -21
- package/src/tests/renderers-test.ts +5 -7
- package/src/tests/substructure-filter-tests.ts +61 -0
- package/src/tests/test-sequnces-generators.ts +7 -8
- package/src/utils/cell-renderer.ts +1 -2
- package/src/widgets/bio-substructure-filter.ts +2 -2
- package/test-Bio-49ff04f38f57-8ab13d10.html +385 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
fasta
|
|
2
|
+
MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
|
|
3
|
+
MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL
|
|
4
|
+
MMELVLKTIIGPIVVGVVLRIVDKWLNKDK
|
|
5
|
+
MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN
|
|
6
|
+
MKSTKEEIQTIKTLLKDSRTAKYHKRLQIVL
|
|
7
|
+
MHAILRYFIRRLFYHIFYKIYSLISKKHQSLPSDVRQF
|
|
8
|
+
MSNFHNEHVMQFYRNNLKTKGVFGRQ
|
|
9
|
+
MPNSEPASLLELFNSIATQGELVRSLKAGNASK
|
|
10
|
+
IRVVGRYLIEVWKAAGMDMDKVLFLWSSDEI
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
MSA,Activity
|
|
2
|
+
meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me,5.307510973968128
|
|
3
|
+
meI/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Phe_ab-dehydro/N/D-Orn/D-aThr//Phe_4Me,5.723876853431544
|
|
4
|
+
Lys_Boc/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Thr_PO3H2/N/D-Orn/D-aThr//Phe_4Me,5.185811246022437
|
|
5
|
+
meI/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Thr_PO3H2/N/D-Orn/D-aThr//Phe_4Me,6.223502390804369
|
|
6
|
+
meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/Chg/N/D-Orn/D-aThr//Phe_4Me,3.8459123763832412
|
|
7
|
+
meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_Bn/dV/E/N/dV///Phe_4Me,3.27920438824657
|
|
8
|
+
meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca/D-Tyr_Et/Aze/dV/E/N/dV///Phe_4Me,2.105852152992568
|
|
9
|
+
meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca/D-Tyr_Et/meQ/dV/E/N/dV///Phe_4Me,1.803695001649272
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.0.
|
|
8
|
+
"version": "2.0.28",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.
|
|
17
|
+
"@datagrok-libraries/bio": "^5.7.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.1",
|
|
19
19
|
"@datagrok-libraries/ml": "^6.2.0",
|
|
20
20
|
"@datagrok-libraries/utils": "^1.11.1",
|
|
@@ -47,7 +47,8 @@
|
|
|
47
47
|
"webpack-cli": "^4.6.0"
|
|
48
48
|
},
|
|
49
49
|
"grokDependencies": {
|
|
50
|
-
"@datagrok/chem": "1.3.16"
|
|
50
|
+
"@datagrok/chem": "1.3.16",
|
|
51
|
+
"@datagrok/helm": "latest"
|
|
51
52
|
},
|
|
52
53
|
"scripts": {
|
|
53
54
|
"link-api": "npm link datagrok-api",
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#!/usr/bin/env python2
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on Mon Feb 19 17:30:46 2018
|
|
5
|
+
|
|
6
|
+
@author: cbdd
|
|
7
|
+
"""
|
|
8
|
+
from sklearn.externals import joblib
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
###################################### Load model ##########
|
|
14
|
+
# current_path = os.path.split(os.path.realpath(__file__))[0]
|
|
15
|
+
cf = joblib.load('CYP3A4-substrate.pkl')
|
|
16
|
+
|
|
17
|
+
###################################### Load descriptors ##########
|
|
18
|
+
fingerprint_content = pd.read_csv('des.csv').ix[:, 1:]
|
|
19
|
+
des_list = np.array(fingerprint_content)
|
|
20
|
+
|
|
21
|
+
###################################### Prediction ##########
|
|
22
|
+
y_predict_label = cf.predict(des_list)
|
|
23
|
+
y_predict_proba = cf.predict_proba(des_list)
|
|
24
|
+
print('#' * 10 + 'Results labels' + '#' * 10)
|
|
25
|
+
print(y_predict_label)
|
|
26
|
+
print('#' * 10 + 'Results probabilities' + '#' * 10)
|
|
27
|
+
print(y_predict_proba)
|
|
@@ -8,7 +8,6 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
|
8
8
|
import * as C from '../utils/constants';
|
|
9
9
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
10
10
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
11
|
-
import {TableView} from 'datagrok-api/dg';
|
|
12
11
|
import {Subject} from 'rxjs';
|
|
13
12
|
|
|
14
13
|
export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
@@ -70,7 +69,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
70
69
|
const targetMolRow = this.idxs?.getRawData().findIndex((it) => it == this.targetMoleculeIdx);
|
|
71
70
|
const targetScoreCell = grid.cell('score', targetMolRow!);
|
|
72
71
|
targetScoreCell.cell.value = null;
|
|
73
|
-
(grok.shell.v as TableView).grid.root.addEventListener('click', (event: MouseEvent) => {
|
|
72
|
+
(grok.shell.v as DG.TableView).grid.root.addEventListener('click', (event: MouseEvent) => {
|
|
74
73
|
this.gridSelect = false;
|
|
75
74
|
});
|
|
76
75
|
updateDivInnerHTML(this.root, grid.root);
|
package/src/package-test.ts
CHANGED
|
@@ -16,6 +16,7 @@ import './tests/bio-tests';
|
|
|
16
16
|
import './tests/WebLogo-positions-test';
|
|
17
17
|
import './tests/checkInputColumn-tests';
|
|
18
18
|
import './tests/similarity-diversity-tests';
|
|
19
|
+
import './tests/substructure-filter-tests';
|
|
19
20
|
|
|
20
21
|
export const _package = new DG.Package();
|
|
21
22
|
export {tests};
|
package/src/package.ts
CHANGED
|
@@ -19,7 +19,6 @@ import {getMacroMol} from './utils/atomic-works';
|
|
|
19
19
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
20
20
|
import {convert} from './utils/convert';
|
|
21
21
|
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
22
|
-
import {UnitsHandler, ALIGNMENT} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
23
22
|
import {TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
24
23
|
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule'
|
|
25
24
|
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/utils/to-atomic-level';
|
|
@@ -5,12 +5,11 @@ import * as bio from '@datagrok-libraries/bio';
|
|
|
5
5
|
|
|
6
6
|
import * as C from '../utils/constants';
|
|
7
7
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
8
|
-
import {BitSet} from 'datagrok-api/dg';
|
|
9
8
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
10
9
|
|
|
11
10
|
export const MONOMER_MOLS_COL = 'monomeric-mols';
|
|
12
11
|
|
|
13
|
-
const enum MONOMERIC_COL_TAGS{
|
|
12
|
+
const enum MONOMERIC_COL_TAGS {
|
|
14
13
|
MONOMERIC_MOLS = 'monomeric-mols',
|
|
15
14
|
LAST_INVALIDATED_VERSION = 'last-invalidated-version',
|
|
16
15
|
MONOMERS_DICT = 'monomers-dict'
|
|
@@ -64,7 +63,7 @@ export function substructureSearchDialog(col: DG.Column): void {
|
|
|
64
63
|
const colExists = col.dataFrame.columns.names()
|
|
65
64
|
.filter((it) => it.toLocaleLowerCase() === matchesColName.toLocaleLowerCase()).length > 0;
|
|
66
65
|
if (!colExists) {
|
|
67
|
-
let matches: BitSet;
|
|
66
|
+
let matches: DG.BitSet;
|
|
68
67
|
if (units === bio.NOTATION.HELM)
|
|
69
68
|
matches = await helmSubstructureSearch(substructure, col);
|
|
70
69
|
else
|
|
@@ -92,13 +91,13 @@ function prepareSubstructureRegex(substructure: string, separator: string) {
|
|
|
92
91
|
const endsWithSep = substructure.charAt(substructure.length - 1) === separator;
|
|
93
92
|
const substrWithoutSep = substructure.replace(new RegExp(`^${char}|${char}$`, 'g'), '');
|
|
94
93
|
const re = startsWithSep ? endsWithSep ? `${char}${substrWithoutSep}${char}` :
|
|
95
|
-
|
|
94
|
+
`${char}${substrWithoutSep}${char}|${char}${substrWithoutSep}$` :
|
|
96
95
|
endsWithSep ? `^${substrWithoutSep}${char}|${char}${substrWithoutSep}${char}` :
|
|
97
96
|
`^${substrWithoutSep}${char}|${char}${substrWithoutSep}${char}|${char}${substrWithoutSep}$`;
|
|
98
97
|
return re;
|
|
99
98
|
}
|
|
100
99
|
|
|
101
|
-
export async function helmSubstructureSearch(substructure: string, col: DG.Column): Promise<BitSet> {
|
|
100
|
+
export async function helmSubstructureSearch(substructure: string, col: DG.Column): Promise<DG.BitSet> {
|
|
102
101
|
if (col.version !== col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
103
102
|
await invalidateHelmMols(col);
|
|
104
103
|
const substructureCol = DG.Column.string('helm', 1).init((i) => substructure);
|
|
@@ -7,8 +7,6 @@ import * as bio from '@datagrok-libraries/bio';
|
|
|
7
7
|
import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
8
8
|
|
|
9
9
|
import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
|
|
10
|
-
import {UNITS} from 'datagrok-api/dg';
|
|
11
|
-
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
12
10
|
|
|
13
11
|
category('checkInputColumn', () => {
|
|
14
12
|
const csv = `seq
|
|
@@ -41,7 +39,7 @@ seq4`;
|
|
|
41
39
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
42
40
|
const col: DG.Column = df.getCol('seq');
|
|
43
41
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
44
|
-
col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
42
|
+
col.setTag(DG.TAGS.UNITS, bio.NOTATION.HELM);
|
|
45
43
|
col.setTag(bio.TAGS.alphabetSize, '11');
|
|
46
44
|
col.setTag(bio.TAGS.alphabetIsMultichar, 'true');
|
|
47
45
|
|
|
@@ -56,7 +54,7 @@ seq4`;
|
|
|
56
54
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
57
55
|
const col: DG.Column = df.getCol('seq');
|
|
58
56
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
59
|
-
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
57
|
+
col.setTag(DG.TAGS.UNITS, bio.NOTATION.FASTA);
|
|
60
58
|
col.setTag(bio.TAGS.alphabet, 'UN');
|
|
61
59
|
col.setTag(bio.TAGS.alphabetSize, '11');
|
|
62
60
|
col.setTag(bio.TAGS.alphabetIsMultichar, 'true');
|
|
@@ -6,8 +6,6 @@ import * as bio from '@datagrok-libraries/bio';
|
|
|
6
6
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
|
|
8
8
|
import {importFasta} from '../package';
|
|
9
|
-
import {UnitsHandler, ALIGNMENT} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
|
-
import {NOTATION, ALPHABET} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
9
|
|
|
12
10
|
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
13
11
|
|
|
@@ -201,49 +199,49 @@ MWRSWY-CKHP
|
|
|
201
199
|
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
202
200
|
|
|
203
201
|
test('Dna1', async () => {
|
|
204
|
-
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false);
|
|
202
|
+
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.DNA, 4, false);
|
|
205
203
|
});
|
|
206
204
|
test('Rna1', async () => {
|
|
207
|
-
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false);
|
|
205
|
+
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.RNA, 4, false);
|
|
208
206
|
});
|
|
209
207
|
test('AA1', async () => {
|
|
210
|
-
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
208
|
+
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
|
|
211
209
|
});
|
|
212
210
|
test('MsaDna1', async () => {
|
|
213
|
-
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false);
|
|
211
|
+
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.DNA, 4, false);
|
|
214
212
|
});
|
|
215
213
|
|
|
216
214
|
test('MsaAA1', async () => {
|
|
217
|
-
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', NOTATION.FASTA,
|
|
218
|
-
ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false);
|
|
215
|
+
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', bio.NOTATION.FASTA,
|
|
216
|
+
bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.PT, 20, false);
|
|
219
217
|
});
|
|
220
218
|
|
|
221
219
|
test('SepDna', async () => {
|
|
222
|
-
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false, '*');
|
|
220
|
+
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.DNA, 4, false, '*');
|
|
223
221
|
});
|
|
224
222
|
test('SepRna', async () => {
|
|
225
|
-
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false, '*');
|
|
223
|
+
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.RNA, 4, false, '*');
|
|
226
224
|
});
|
|
227
225
|
test('SepPt', async () => {
|
|
228
226
|
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq',
|
|
229
|
-
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.PT, 20, false, '-');
|
|
227
|
+
bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false, '-');
|
|
230
228
|
});
|
|
231
229
|
test('SepUn1', async () => {
|
|
232
230
|
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq',
|
|
233
|
-
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 8, true, '-');
|
|
231
|
+
bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.UN, 8, true, '-');
|
|
234
232
|
});
|
|
235
233
|
test('SepUn2', async () => {
|
|
236
234
|
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq',
|
|
237
|
-
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 9, true, '/');
|
|
235
|
+
bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.UN, 9, true, '/');
|
|
238
236
|
});
|
|
239
237
|
|
|
240
238
|
test('SepMsaN1', async () => {
|
|
241
239
|
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq',
|
|
242
|
-
NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false, '-');
|
|
240
|
+
bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.DNA, 4, false, '-');
|
|
243
241
|
});
|
|
244
242
|
|
|
245
243
|
test('SamplesFastaCsvPt', async () => {
|
|
246
|
-
await _testPos(readSamples(Samples.fastaCsv), 'sequence', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
244
|
+
await _testPos(readSamples(Samples.fastaCsv), 'sequence', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
|
|
247
245
|
});
|
|
248
246
|
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
249
247
|
await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
|
|
@@ -257,7 +255,7 @@ MWRSWY-CKHP
|
|
|
257
255
|
|
|
258
256
|
test('SamplesFastaFastaPt', async () => {
|
|
259
257
|
await _testPos(readSamples(Samples.fastaFasta, readFileFasta),
|
|
260
|
-
'sequence', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
258
|
+
'sequence', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
|
|
261
259
|
});
|
|
262
260
|
|
|
263
261
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
@@ -276,7 +274,7 @@ MWRSWY-CKHP
|
|
|
276
274
|
|
|
277
275
|
test('samplesMsaComplexUn', async () => {
|
|
278
276
|
await _testPos(readSamples(Samples.msaComplex), 'MSA',
|
|
279
|
-
NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 161, true, '/');
|
|
277
|
+
bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.UN, 161, true, '/');
|
|
280
278
|
});
|
|
281
279
|
test('samplesMsaComplexNegativeActivity', async () => {
|
|
282
280
|
await _testNeg(readSamples(Samples.msaComplex), 'Activity');
|
|
@@ -291,7 +289,7 @@ MWRSWY-CKHP
|
|
|
291
289
|
});
|
|
292
290
|
|
|
293
291
|
test('samplesHelmCsvHELM', async () => {
|
|
294
|
-
await _testPos(readSamples(Samples.helmCsv), 'HELM', NOTATION.HELM, null, null, 160, true, null);
|
|
292
|
+
await _testPos(readSamples(Samples.helmCsv), 'HELM', bio.NOTATION.HELM, null, null, 160, true, null);
|
|
295
293
|
});
|
|
296
294
|
|
|
297
295
|
test('samplesHelmCsvNegativeActivity', async () => {
|
|
@@ -307,7 +305,7 @@ MWRSWY-CKHP
|
|
|
307
305
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
308
306
|
});
|
|
309
307
|
test('samplesTestHelmPositiveHelmString', async () => {
|
|
310
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', NOTATION.HELM, null, null, 9, true, null);
|
|
308
|
+
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', bio.NOTATION.HELM, null, null, 9, true, null);
|
|
311
309
|
});
|
|
312
310
|
test('samplesTestHelmNegativeValid', async () => {
|
|
313
311
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
@@ -339,7 +337,7 @@ MWRSWY-CKHP
|
|
|
339
337
|
});
|
|
340
338
|
|
|
341
339
|
test('samplesFastaPtPosSequence', async () => {
|
|
342
|
-
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
340
|
+
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
|
|
343
341
|
});
|
|
344
342
|
|
|
345
343
|
test('samplesTestCerealNegativeCerealName', async () => {
|
|
@@ -414,7 +412,7 @@ export async function _testPos(
|
|
|
414
412
|
if (separator)
|
|
415
413
|
expect(col.getTag(bio.TAGS.separator), separator);
|
|
416
414
|
|
|
417
|
-
const uh = new UnitsHandler(col);
|
|
415
|
+
const uh = new bio.UnitsHandler(col);
|
|
418
416
|
expect(uh.getAlphabetSize(), alphabetSize);
|
|
419
417
|
expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
|
|
420
418
|
if (!uh.isHelm()) {
|
|
@@ -6,8 +6,6 @@ import {after, before, category, delay, expect, test} from '@datagrok-libraries/
|
|
|
6
6
|
|
|
7
7
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
8
8
|
import {convertDo} from '../utils/convert';
|
|
9
|
-
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
|
-
import {UnitsHandler, ALIGNMENT} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
11
9
|
import {SEM_TYPES, TAGS} from '../utils/constants';
|
|
12
10
|
import {generateLongSequence, generateManySequences, performanceTest} from './test-sequnces-generators';
|
|
13
11
|
|
|
@@ -105,7 +103,7 @@ category('renderers', () => {
|
|
|
105
103
|
async function _rendererMacromoleculeDifference() {
|
|
106
104
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
107
105
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
108
|
-
seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
|
|
106
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = bio.NOTATION.SEPARATOR;
|
|
109
107
|
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
110
108
|
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
111
109
|
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
@@ -141,7 +139,7 @@ category('renderers', () => {
|
|
|
141
139
|
`cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
|
|
142
140
|
expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
143
141
|
expect(srcSeqCol.getTag(DG.TAGS.UNITS), bio.NOTATION.FASTA);
|
|
144
|
-
expect(srcSeqCol.getTag(bio.TAGS.aligned), ALIGNMENT.SEQ);
|
|
142
|
+
expect(srcSeqCol.getTag(bio.TAGS.aligned), bio.ALIGNMENT.SEQ);
|
|
145
143
|
expect(srcSeqCol.getTag(bio.TAGS.alphabet), bio.ALPHABET.PT);
|
|
146
144
|
expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
147
145
|
|
|
@@ -150,7 +148,7 @@ category('renderers', () => {
|
|
|
150
148
|
|
|
151
149
|
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
152
150
|
expect(msaSeqCol.getTag(DG.TAGS.UNITS), bio.NOTATION.FASTA);
|
|
153
|
-
expect(msaSeqCol.getTag(bio.TAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
151
|
+
expect(msaSeqCol.getTag(bio.TAGS.aligned), bio.ALIGNMENT.SEQ_MSA);
|
|
154
152
|
expect(msaSeqCol.getTag(bio.TAGS.alphabet), bio.ALPHABET.PT);
|
|
155
153
|
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
156
154
|
|
|
@@ -192,7 +190,7 @@ category('renderers', () => {
|
|
|
192
190
|
/**/
|
|
193
191
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
194
192
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
195
|
-
seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
|
|
193
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = bio.NOTATION.SEPARATOR;
|
|
196
194
|
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
197
195
|
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
198
196
|
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
@@ -213,7 +211,7 @@ category('renderers', () => {
|
|
|
213
211
|
async function _setRendererManually() {
|
|
214
212
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
215
213
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
216
|
-
seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
|
|
214
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = bio.NOTATION.SEPARATOR;
|
|
217
215
|
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
218
216
|
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE;
|
|
219
217
|
const tgtCellRenderer = 'MacromoleculeDifference';
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import {readDataframe} from './utils';
|
|
5
|
+
import {BioSubstructureFilter, HelmFilter, SeparatorFilter} from '../widgets/bio-substructure-filter';
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
category('substructureFilters', async () => {
|
|
9
|
+
test('fasta', async () => {
|
|
10
|
+
const fasta = await readDataframe('tests/filter_FASTA.csv');
|
|
11
|
+
const filter = new BioSubstructureFilter();
|
|
12
|
+
await grok.data.detectSemanticTypes(fasta);
|
|
13
|
+
filter.attach(fasta);
|
|
14
|
+
filter.bioFilter!.substructure = 'MD';
|
|
15
|
+
await delay(100);
|
|
16
|
+
expect(filter.dataFrame!.filter.trueCount, 3);
|
|
17
|
+
expect(filter.dataFrame!.filter.get(0), true);
|
|
18
|
+
expect(filter.dataFrame!.filter.get(3), true);
|
|
19
|
+
expect(filter.dataFrame!.filter.get(8), true);
|
|
20
|
+
expect(filter.dataFrame!.filter.get(1), false);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test('separator', async () => {
|
|
24
|
+
const msa = await readDataframe('tests/filter_MSA.csv');
|
|
25
|
+
const filter = new BioSubstructureFilter();
|
|
26
|
+
await grok.data.detectSemanticTypes(msa);
|
|
27
|
+
filter.attach(msa);
|
|
28
|
+
filter.bioFilter!.substructure = 'meI';
|
|
29
|
+
await delay(100);
|
|
30
|
+
expect(filter.dataFrame!.filter.trueCount, 7);
|
|
31
|
+
expect(filter.dataFrame!.filter.get(2), false);
|
|
32
|
+
filter.bioFilter!.substructure = '/meI';
|
|
33
|
+
await delay(100);
|
|
34
|
+
expect(filter.dataFrame!.filter.trueCount, 0);
|
|
35
|
+
filter.bioFilter!.substructure = 'meI-hHis';
|
|
36
|
+
(filter.bioFilter! as SeparatorFilter).separatorInput.value = '-';
|
|
37
|
+
await delay(100);
|
|
38
|
+
expect(filter.dataFrame!.filter.trueCount, 7);
|
|
39
|
+
expect(filter.dataFrame!.filter.get(2), false);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test('helm', async () => {
|
|
43
|
+
const helm = await readDataframe('tests/filter_HELM.csv');
|
|
44
|
+
const helmTableView = grok.shell.addTableView(helm);
|
|
45
|
+
const filter = new BioSubstructureFilter();
|
|
46
|
+
await grok.data.detectSemanticTypes(helm);
|
|
47
|
+
filter.attach(helm);
|
|
48
|
+
(filter.bioFilter! as HelmFilter).helmSubstructure = 'PEPTIDE1{C}$$$$V2.0';
|
|
49
|
+
filter.bioFilter!.onChanged.next();
|
|
50
|
+
await delay(1000);
|
|
51
|
+
expect(filter.dataFrame!.filter.trueCount, 2);
|
|
52
|
+
expect(filter.dataFrame!.filter.get(0), true);
|
|
53
|
+
expect(filter.dataFrame!.filter.get(3), true);
|
|
54
|
+
(filter.bioFilter! as HelmFilter).helmSubstructure = 'PEPTIDE1{A.C}$$$$V2.0';
|
|
55
|
+
filter.bioFilter!.onChanged.next();
|
|
56
|
+
await delay(100);
|
|
57
|
+
expect(filter.dataFrame!.filter.trueCount, 1);
|
|
58
|
+
expect(filter.dataFrame!.filter.get(3), true);
|
|
59
|
+
helmTableView.close();
|
|
60
|
+
});
|
|
61
|
+
});
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
2
1
|
import * as grok from 'datagrok-api/grok';
|
|
3
|
-
import
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
4
5
|
|
|
5
|
-
import {ALIGNMENT, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
6
|
-
import {NOTATION, ALPHABET, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
6
|
|
|
8
7
|
export function generateManySequences(): DG.Column[] {
|
|
9
8
|
let columns: DG.Column[] = [];
|
|
@@ -22,10 +21,10 @@ export function generateLongSequence(): DG.Column[] {
|
|
|
22
21
|
|
|
23
22
|
export function setTagsMacromolecule(col: DG.Column) {
|
|
24
23
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
25
|
-
col.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
|
|
26
|
-
col.setTag(TAGS.aligned, ALIGNMENT.SEQ_MSA);
|
|
27
|
-
col.setTag(TAGS.alphabet, ALPHABET.UN);
|
|
28
|
-
col.setTag(TAGS.separator, '/');
|
|
24
|
+
col.setTag(DG.TAGS.UNITS, bio.NOTATION.SEPARATOR);
|
|
25
|
+
col.setTag(bio.TAGS.aligned, bio.ALIGNMENT.SEQ_MSA);
|
|
26
|
+
col.setTag(bio.TAGS.alphabet, bio.ALPHABET.UN);
|
|
27
|
+
col.setTag(bio.TAGS.separator, '/');
|
|
29
28
|
return col;
|
|
30
29
|
}
|
|
31
30
|
|
|
@@ -3,7 +3,6 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
|
|
5
5
|
import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
|
|
6
|
-
import {ALIGNMENT} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
7
6
|
import * as bio from '@datagrok-libraries/bio';
|
|
8
7
|
import * as C from './constants';
|
|
9
8
|
|
|
@@ -47,7 +46,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
47
46
|
}
|
|
48
47
|
|
|
49
48
|
onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
|
|
50
|
-
if (gridCell.cell.column.getTag(bio.TAGS.aligned) !== ALIGNMENT.SEQ_MSA)
|
|
49
|
+
if (gridCell.cell.column.getTag(bio.TAGS.aligned) !== bio.ALIGNMENT.SEQ_MSA)
|
|
51
50
|
return;
|
|
52
51
|
|
|
53
52
|
const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
|
|
@@ -158,7 +158,7 @@ class FastaFilter extends BioFilterBase {
|
|
|
158
158
|
}
|
|
159
159
|
}
|
|
160
160
|
|
|
161
|
-
class SeparatorFilter extends FastaFilter {
|
|
161
|
+
export class SeparatorFilter extends FastaFilter {
|
|
162
162
|
separatorInput: DG.InputBase<string> = ui.stringInput('', '', () => {
|
|
163
163
|
this.onChanged.next();
|
|
164
164
|
}, {placeholder: 'Separator'});
|
|
@@ -192,7 +192,7 @@ class SeparatorFilter extends FastaFilter {
|
|
|
192
192
|
}
|
|
193
193
|
}
|
|
194
194
|
|
|
195
|
-
class HelmFilter extends BioFilterBase {
|
|
195
|
+
export class HelmFilter extends BioFilterBase {
|
|
196
196
|
helmEditor: any;
|
|
197
197
|
_filterPanel = ui.div('', {style: {cursor: 'pointer'}});
|
|
198
198
|
helmSubstructure = '';
|