@datagrok/bio 2.11.3 → 2.11.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/detectors.js +99 -48
- package/dist/196.js +1 -1
- package/dist/196.js.map +1 -1
- package/dist/361.js +1 -1
- package/dist/361.js.map +1 -1
- package/dist/381.js +1 -1
- package/dist/381.js.map +1 -1
- package/dist/770.js +1 -1
- package/dist/770.js.map +1 -1
- package/dist/79.js.map +1 -1
- package/dist/868.js +1 -1
- package/dist/868.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +3 -3
- package/src/analysis/sequence-space.ts +34 -12
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +2 -1
- package/src/package.ts +51 -29
- package/src/tests/activity-cliffs-tests.ts +5 -3
- package/src/tests/activity-cliffs-utils.ts +5 -2
- package/src/tests/converters-test.ts +72 -72
- package/src/tests/detectors-benchmark-tests.ts +2 -2
- package/src/tests/detectors-tests.ts +36 -36
- package/src/tests/detectors-weak-and-likely-tests.ts +24 -24
- package/src/tests/mm-distance-tests.ts +10 -9
- package/src/tests/units-handler-splitted-tests.ts +33 -36
- package/src/tests/units-handler-tests.ts +9 -9
- package/src/utils/detect-macromolecule-probe.ts +44 -0
- package/src/utils/monomer-lib.ts +4 -9
|
@@ -28,35 +28,35 @@ category('detectors:weak-and-likely', () => {
|
|
|
28
28
|
|
|
29
29
|
const csvData: { [name: string]: string } = {
|
|
30
30
|
[csvTests.fastaDnaWeak1]: `id,colName
|
|
31
|
-
1,
|
|
32
|
-
2,
|
|
33
|
-
3,
|
|
34
|
-
4,
|
|
31
|
+
1,TTTTTTTTTT
|
|
32
|
+
2,TTTTTTTTTT
|
|
33
|
+
3,TTTTTTTTTT
|
|
34
|
+
4,TTTTTTTTTT`,
|
|
35
35
|
[csvTests.fastaDnaWeak1LikelyName]: `id,seq
|
|
36
|
-
1,
|
|
37
|
-
2,
|
|
38
|
-
3,
|
|
39
|
-
4,
|
|
36
|
+
1,TTTTTTT
|
|
37
|
+
2,TTTTTTT
|
|
38
|
+
3,TTTTTTT
|
|
39
|
+
4,TTTTTTT`,
|
|
40
40
|
[csvTests.fastaRnaWeak1]: `id,colName
|
|
41
|
-
1,
|
|
42
|
-
2,
|
|
43
|
-
3,
|
|
44
|
-
4,
|
|
41
|
+
1,UUUUUUUUUU
|
|
42
|
+
2,UUUUUUUUUU
|
|
43
|
+
3,UUUUUUUUUU
|
|
44
|
+
4,UUUUUUUUUU`,
|
|
45
45
|
[csvTests.fastaRnaWeak1LikelyName]: `id,seq
|
|
46
|
-
1,
|
|
47
|
-
2,
|
|
48
|
-
3,
|
|
49
|
-
4,
|
|
46
|
+
1,UUUUUUU
|
|
47
|
+
2,UUUUUUU
|
|
48
|
+
3,UUUUUUU
|
|
49
|
+
4,UUUUUUU`,
|
|
50
50
|
[csvTests.fastaPtWeak1]: `id,colName
|
|
51
|
-
1,
|
|
52
|
-
2,
|
|
53
|
-
3,
|
|
54
|
-
4,
|
|
51
|
+
1,SLSLSPGKSLSLSPGK
|
|
52
|
+
2,SLSLSPGKSLSLSPGK
|
|
53
|
+
3,SLSLSPGKSLSLSPGK
|
|
54
|
+
4,SLSLSPGKSLSLSPGK`,
|
|
55
55
|
[csvTests.fastaPtWeak1LikelyName]: `id,seq
|
|
56
|
-
1,
|
|
57
|
-
2,
|
|
58
|
-
3,
|
|
59
|
-
4,
|
|
56
|
+
1,SLSLSPGKSLSLSPGK
|
|
57
|
+
2,SLSLSPGKSLSLSPGK
|
|
58
|
+
3,SLSLSPGKSLSLSPGK
|
|
59
|
+
4,SLSLSPGKSLSLSPGK`,
|
|
60
60
|
[csvTests.fastaUn1]: `id,colName
|
|
61
61
|
1,word
|
|
62
62
|
2,other
|
|
@@ -27,19 +27,20 @@ category('Distance', async () => {
|
|
|
27
27
|
const prot6 = 'FWRRRRY';
|
|
28
28
|
|
|
29
29
|
const protTable = `seq
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
30
|
+
FWRWYVKHPFWRWYVKHP
|
|
31
|
+
YNRWYVKHPYNRWYVKHP
|
|
32
|
+
MWRSWYCKHPMWRSWYCKHP`;
|
|
33
33
|
|
|
34
34
|
const DNATable = `seq
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
ATAACGATAACG
|
|
36
|
+
ATCGAATCGA
|
|
37
|
+
ATCGAATCGA`;
|
|
38
38
|
|
|
39
39
|
const MSATable = `seq
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
40
|
+
ATAACATAAC
|
|
41
|
+
ATCGAATCGA
|
|
42
|
+
ATCGAATCGA`;
|
|
43
|
+
|
|
43
44
|
test('protein-distance-function', async () => {
|
|
44
45
|
const uh = await _initMacromoleculeColumn(protTable);
|
|
45
46
|
const distFunc = uh.getDistanceFunctionName();
|
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
-
import * as ui from 'datagrok-api/ui';
|
|
3
2
|
import * as DG from 'datagrok-api/dg';
|
|
4
3
|
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
import {category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
8
5
|
import {GapSymbols, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
6
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
7
|
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
@@ -17,31 +14,31 @@ category('UnitsHandler', () => {
|
|
|
17
14
|
fasta: {
|
|
18
15
|
src: {
|
|
19
16
|
csv: `seq
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
17
|
+
ACGTCACGTC
|
|
18
|
+
CAGTGTCAGTGT
|
|
19
|
+
TTCAACTTCAAC`
|
|
23
20
|
},
|
|
24
21
|
tgt: {
|
|
25
22
|
splitted: [
|
|
26
|
-
'
|
|
27
|
-
'
|
|
28
|
-
'
|
|
23
|
+
'ACGTCACGTC',
|
|
24
|
+
'CAGTGTCAGTGT',
|
|
25
|
+
'TTCAACTTCAAC',
|
|
29
26
|
]
|
|
30
27
|
}
|
|
31
28
|
},
|
|
32
29
|
fastaMsa: {
|
|
33
30
|
src: {
|
|
34
31
|
csv: `seq
|
|
35
|
-
AC-GT-CT
|
|
36
|
-
CAC-T-GT
|
|
37
|
-
|
|
32
|
+
AC-GT-CTAC-GT-CT
|
|
33
|
+
CAC-T-GTCAC-T-GT
|
|
34
|
+
ACCGTACTACCGTACT`,
|
|
38
35
|
},
|
|
39
36
|
tgt: {
|
|
40
37
|
splitted: [
|
|
41
38
|
//@formatter:off
|
|
42
|
-
'AC-GT-CT',
|
|
43
|
-
'CAC-T-GT',
|
|
44
|
-
'
|
|
39
|
+
'AC-GT-CTAC-GT-CT',
|
|
40
|
+
'CAC-T-GTCAC-T-GT',
|
|
41
|
+
'ACCGTACTACCGTACT',
|
|
45
42
|
//@formatter:on
|
|
46
43
|
]
|
|
47
44
|
}
|
|
@@ -49,15 +46,15 @@ ACCGTACT`,
|
|
|
49
46
|
separator: {
|
|
50
47
|
src: {
|
|
51
48
|
csv: `seq
|
|
52
|
-
abc-dfgg-abc1-cfr3-rty-wert
|
|
53
|
-
rut12-her2-rty-wert-abc-abc1-dfgg
|
|
54
|
-
rut12-rty-her2-abc-cfr3-wert-rut12`,
|
|
49
|
+
abc-dfgg-abc1-cfr3-rty-wert-abc-dfgg-abc1-cfr3-rty-wert
|
|
50
|
+
rut12-her2-rty-wert-abc-abc1-dfgg-rut12-her2-rty-wert-abc
|
|
51
|
+
rut12-rty-her2-abc-cfr3-wert-rut12-rut12-rty-her2-abc-cfr3`,
|
|
55
52
|
},
|
|
56
53
|
tgt: {
|
|
57
54
|
splitted: [
|
|
58
|
-
['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
|
|
59
|
-
['rut12', 'her2', 'rty', 'wert', 'abc', 'abc1', 'dfgg'],
|
|
60
|
-
['rut12', 'rty', 'her2', 'abc', 'cfr3', 'wert', 'rut12']
|
|
55
|
+
['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert', 'abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
|
|
56
|
+
['rut12', 'her2', 'rty', 'wert', 'abc', 'abc1', 'dfgg', 'rut12', 'her2', 'rty', 'wert', 'abc'],
|
|
57
|
+
['rut12', 'rty', 'her2', 'abc', 'cfr3', 'wert', 'rut12', 'rut12', 'rty', 'her2', 'abc', 'cfr3']
|
|
61
58
|
]
|
|
62
59
|
}
|
|
63
60
|
},
|
|
@@ -65,32 +62,32 @@ rut12-rty-her2-abc-cfr3-wert-rut12`,
|
|
|
65
62
|
separatorMsa: {
|
|
66
63
|
src: {
|
|
67
64
|
csv: `seq
|
|
68
|
-
abc-dfgg-abc1-cfr3-rty-wert
|
|
69
|
-
rut12-her2-rty--abc1-dfgg
|
|
70
|
-
rut12-rty-her2---wert`
|
|
65
|
+
abc-dfgg-abc1-cfr3-rty-wert-abc-dfgg-abc1-cfr3-rty-wert
|
|
66
|
+
rut12-her2-rty--abc1-dfgg-rut12-her2-rty--abc1-dfgg
|
|
67
|
+
rut12-rty-her2---wert-rut12-rty-her2---wert`
|
|
71
68
|
},
|
|
72
69
|
tgt: {
|
|
73
70
|
splitted: [
|
|
74
|
-
['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
|
|
75
|
-
['rut12', 'her2', 'rty', sG, 'abc1', 'dfgg'],
|
|
76
|
-
['rut12', 'rty', 'her2', sG, sG, 'wert'],
|
|
71
|
+
['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert', 'abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
|
|
72
|
+
['rut12', 'her2', 'rty', sG, 'abc1', 'dfgg', 'rut12', 'her2', 'rty', sG, 'abc1', 'dfgg'],
|
|
73
|
+
['rut12', 'rty', 'her2', sG, sG, 'wert', 'rut12', 'rty', 'her2', sG, sG, 'wert'],
|
|
77
74
|
]
|
|
78
75
|
}
|
|
79
76
|
},
|
|
80
77
|
helm: {
|
|
81
78
|
src: {
|
|
82
79
|
csv: `seq
|
|
83
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et}$$$$
|
|
84
|
-
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$
|
|
85
|
-
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$
|
|
86
|
-
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2}$$$$`
|
|
80
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Thr_PO3H2.Aca.D-Tyr_Et}$$$$
|
|
81
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.dK.Thr_PO3H2.Aca}$$$$
|
|
82
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.dK.Thr_PO3H2.Aca}$$$$
|
|
83
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.T.dK.Thr_PO3H2}$$$$`
|
|
87
84
|
},
|
|
88
85
|
tgt: {
|
|
89
86
|
splitted: [
|
|
90
|
-
['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et'],
|
|
91
|
-
['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca'],
|
|
92
|
-
['Lys_Boc', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca'],
|
|
93
|
-
['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2'],
|
|
87
|
+
['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et'],
|
|
88
|
+
['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca', 'dK', 'Thr_PO3H2', 'Aca'],
|
|
89
|
+
['Lys_Boc', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca', 'dK', 'Thr_PO3H2', 'Aca'],
|
|
90
|
+
['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'T', 'dK', 'Thr_PO3H2'],
|
|
94
91
|
]
|
|
95
92
|
}
|
|
96
93
|
}
|
|
@@ -7,19 +7,19 @@ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
|
7
7
|
import {ALPHABET, NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
8
|
|
|
9
9
|
const seqDna = `seq
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
ACGTCACGTC
|
|
11
|
+
CAGTGTCAGTGT
|
|
12
|
+
TTCAACTTCAAC`;
|
|
13
13
|
|
|
14
14
|
const seqDnaMsa = `seq
|
|
15
|
-
AC-GT-CT
|
|
16
|
-
CAC-T-GT
|
|
17
|
-
|
|
15
|
+
AC-GT-CTAC-GT-CT
|
|
16
|
+
CAC-T-GTCAC-T-GT
|
|
17
|
+
ACCGTACTACCGTACT`;
|
|
18
18
|
|
|
19
19
|
const seqUn = `seq
|
|
20
|
-
abc-dfgg-abc1-cfr3-rty-wert
|
|
21
|
-
rut12-her2-rty-wert-abc-abc1-dfgg
|
|
22
|
-
rut12-rty-her2-abc-cfr3-wert-rut12`;
|
|
20
|
+
abc-dfgg-abc1-cfr3-rty-wert-abc-dfgg-abc1-cfr3-rty-wert
|
|
21
|
+
rut12-her2-rty-wert-abc-abc1-dfgg-rut12-her2-rty-wert-abc-abc1-dfgg
|
|
22
|
+
rut12-rty-her2-abc-cfr3-wert-rut12-rut12-rty-her2-abc-cfr3-wert-rut12`;
|
|
23
23
|
|
|
24
24
|
const seqHelm = `seq
|
|
25
25
|
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import {_package} from '../package';
|
|
7
|
+
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
8
|
+
|
|
9
|
+
type IDetectorReport = { categoriesSample: any[], rejectReason: string };
|
|
10
|
+
|
|
11
|
+
type IDetectorDebugStore = { last: IDetectorReport };
|
|
12
|
+
|
|
13
|
+
export async function detectMacromoleculeProbeDo(
|
|
14
|
+
csv: string, colName: string | undefined, probeCount: number
|
|
15
|
+
): Promise<void> {
|
|
16
|
+
const pi = DG.TaskBarProgressIndicator.create(`detectMacromolecule probe ...`);
|
|
17
|
+
try {
|
|
18
|
+
let progressLast = 0;
|
|
19
|
+
const store: IDetectorDebugStore = await grok.functions.call('Bio:detectMacromoleculeEnableStore');
|
|
20
|
+
let failCount: number = 0;
|
|
21
|
+
for (let i = 0; i < probeCount; ++i) {
|
|
22
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
23
|
+
const seqCol = colName ? df.getCol(colName) : df.columns.byIndex(0);
|
|
24
|
+
|
|
25
|
+
const detectRes: string = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
26
|
+
if (detectRes !== DG.SEMTYPE.MACROMOLECULE) {
|
|
27
|
+
++failCount;
|
|
28
|
+
console.warn(`Reject reason: ${store.last.rejectReason}`);
|
|
29
|
+
}
|
|
30
|
+
const progress = i / probeCount;
|
|
31
|
+
if ((progress - progressLast) >= 0.1) {
|
|
32
|
+
progressLast = progress;
|
|
33
|
+
pi.update(100 * progress, `detectMacromolecule probe ${failCount}/${i}/${probeCount} ...`);
|
|
34
|
+
await delay(0);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
if (failCount > 0)
|
|
38
|
+
grok.shell.warning(`detectMacromolecule failed ${failCount} / ${probeCount}`);
|
|
39
|
+
else
|
|
40
|
+
grok.shell.info(`detectMacromolecule success ${probeCount}`);
|
|
41
|
+
} finally {
|
|
42
|
+
pi.close();
|
|
43
|
+
}
|
|
44
|
+
}
|
package/src/utils/monomer-lib.ts
CHANGED
|
@@ -8,9 +8,7 @@ import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/index';
|
|
|
8
8
|
import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler';
|
|
9
9
|
import {
|
|
10
10
|
createJsonMonomerLibFromSdf,
|
|
11
|
-
getJsonMonomerLibForEnumerator,
|
|
12
11
|
IMonomerLibHelper,
|
|
13
|
-
isValidEnumeratorLib,
|
|
14
12
|
} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
15
13
|
import {
|
|
16
14
|
HELM_REQUIRED_FIELDS as REQ, HELM_OPTIONAL_FIELDS as OPT, HELM_POLYMER_TYPE
|
|
@@ -18,12 +16,8 @@ import {
|
|
|
18
16
|
|
|
19
17
|
import {_package} from '../package';
|
|
20
18
|
|
|
21
|
-
|
|
22
|
-
REQ.SYMBOL, REQ.NAME, REQ.MOLFILE, REQ.AUTHOR, REQ.ID,
|
|
23
|
-
REQ.RGROUPS, REQ.SMILES, REQ.POLYMER_TYPE, REQ.MONOMER_TYPE, REQ.CREATE_DATE,
|
|
24
|
-
] as const;
|
|
19
|
+
import {PolyToolMonomerLibHandler} from '@datagrok-libraries/bio/src/utils/poly-tool/monomer-lib-handler';
|
|
25
20
|
|
|
26
|
-
const _HELM_OPTIONAL_FIELDS_ARRAY = [OPT.NATURAL_ANALOG, OPT.META] as const;
|
|
27
21
|
// -- Monomer libraries --
|
|
28
22
|
export const LIB_STORAGE_NAME = 'Libraries';
|
|
29
23
|
export const LIB_PATH = 'System:AppData/Bio/libraries/';
|
|
@@ -291,8 +285,9 @@ export class MonomerLibHelper implements IMonomerLibHelper {
|
|
|
291
285
|
}
|
|
292
286
|
const df = await fileSource.readCsv(fileName);
|
|
293
287
|
const json = toJson(df);
|
|
294
|
-
|
|
295
|
-
|
|
288
|
+
const polyToolMonomerLib = new PolyToolMonomerLibHandler(json);
|
|
289
|
+
if (polyToolMonomerLib.isValid())
|
|
290
|
+
rawLibData = polyToolMonomerLib.getJsonMonomerLib();
|
|
296
291
|
else
|
|
297
292
|
throw new Error('Invalid format of CSV monomer lib');
|
|
298
293
|
} else {
|