@datagrok/bio 2.1.12 → 2.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -12
- package/css/helm.css +10 -0
- package/detectors.js +83 -59
- package/dist/package-test.js +2 -13168
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -10560
- package/dist/package.js.map +1 -0
- package/dockerfiles/Dockerfile +86 -0
- package/files/icons/composition-analysis.svg +17 -0
- package/files/icons/sequence-diversity-viewer.svg +4 -0
- package/files/icons/sequence-similarity-viewer.svg +4 -0
- package/files/icons/vdregions-viewer.svg +22 -0
- package/files/icons/weblogo-viewer.svg +7 -0
- package/files/tests/testUrl.csv +11 -0
- package/files/tests/toAtomicLevelTest.csv +4 -0
- package/package.json +24 -25
- package/src/analysis/sequence-activity-cliffs.ts +11 -9
- package/src/analysis/sequence-search-base-viewer.ts +2 -1
- package/src/analysis/sequence-similarity-viewer.ts +3 -3
- package/src/analysis/sequence-space.ts +2 -1
- package/src/calculations/monomerLevelMols.ts +4 -4
- package/src/package-test.ts +9 -2
- package/src/package.ts +215 -131
- package/src/substructure-search/substructure-search.ts +19 -16
- package/src/tests/Palettes-test.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +113 -57
- package/src/tests/_first-tests.ts +9 -0
- package/src/tests/activity-cliffs-tests.ts +8 -7
- package/src/tests/activity-cliffs-utils.ts +17 -9
- package/src/tests/bio-tests.ts +4 -5
- package/src/tests/checkInputColumn-tests.ts +1 -1
- package/src/tests/converters-test.ts +52 -17
- package/src/tests/detectors-benchmark-tests.ts +3 -2
- package/src/tests/detectors-tests.ts +177 -172
- package/src/tests/fasta-export-tests.ts +1 -1
- package/src/tests/monomer-libraries-tests.ts +34 -0
- package/src/tests/pepsea-tests.ts +21 -0
- package/src/tests/renderers-test.ts +21 -19
- package/src/tests/sequence-space-test.ts +6 -4
- package/src/tests/similarity-diversity-tests.ts +4 -4
- package/src/tests/splitters-test.ts +4 -5
- package/src/tests/substructure-filters-tests.ts +23 -1
- package/src/tests/utils/sequences-generators.ts +1 -1
- package/src/tests/utils.ts +2 -1
- package/src/tests/viewers.ts +16 -0
- package/src/utils/cell-renderer.ts +88 -35
- package/src/utils/constants.ts +7 -6
- package/src/utils/convert.ts +8 -2
- package/src/utils/monomer-lib.ts +174 -0
- package/src/utils/multiple-sequence-alignment.ts +44 -20
- package/src/utils/pepsea.ts +78 -0
- package/src/utils/save-as-fasta.ts +2 -1
- package/src/utils/ui-utils.ts +15 -3
- package/src/viewers/vd-regions-viewer.ts +113 -72
- package/src/viewers/web-logo-viewer.ts +1031 -0
- package/src/widgets/bio-substructure-filter.ts +38 -24
- package/tsconfig.json +71 -72
- package/webpack.config.js +4 -11
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -9039
|
@@ -5,7 +5,8 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
|
|
7
7
|
import {importFasta} from '../package';
|
|
8
|
-
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS
|
|
8
|
+
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
9
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
10
|
|
|
10
11
|
/*
|
|
11
12
|
// snippet to list df columns of semType='Macromolecule' (false positive)
|
|
@@ -21,103 +22,115 @@ for (let i = 0; i < df.columns.length; i++) {
|
|
|
21
22
|
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
22
23
|
|
|
23
24
|
category('detectors', () => {
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
const enum csvTests {
|
|
26
|
+
negEmpty = 'negEmpty',
|
|
27
|
+
neg1 = 'neg1',
|
|
28
|
+
neg2 = 'neg2',
|
|
29
|
+
neg3 = 'neg3',
|
|
30
|
+
negSmiles = 'negSmiles',
|
|
31
|
+
fastaDna1 = 'csvFastaDna1',
|
|
32
|
+
fastaRna1 = 'fastaRna1',
|
|
33
|
+
fastaPt1 = 'fastaPt1',
|
|
34
|
+
fastaUn = 'fastaUn',
|
|
35
|
+
sepDna = 'sepDna',
|
|
36
|
+
sepRna = 'sepRna',
|
|
37
|
+
sepPt = 'sepPt',
|
|
38
|
+
sepUn1 = 'sepUn1',
|
|
39
|
+
sepUn2 = 'sepUn2',
|
|
40
|
+
sepMsaDna1 = 'sepMsaDna1',
|
|
41
|
+
fastaMsaDna1 = 'fastaMsaDna1',
|
|
42
|
+
fastaMsaPt1 = 'fastaMsaPt1',
|
|
43
|
+
}
|
|
28
44
|
|
|
29
|
-
const
|
|
45
|
+
const csvData = new class {
|
|
46
|
+
[csvTests.negEmpty]: string = `id,col1
|
|
30
47
|
1,
|
|
31
48
|
2,
|
|
32
49
|
3,
|
|
33
50
|
4,
|
|
34
51
|
5,`;
|
|
35
|
-
|
|
36
|
-
|
|
52
|
+
[csvTests.neg1]: string = `col1
|
|
53
|
+
1
|
|
54
|
+
2
|
|
55
|
+
3`;
|
|
56
|
+
[csvTests.neg2]: string = `col1
|
|
37
57
|
4
|
|
38
58
|
5
|
|
39
59
|
6
|
|
40
60
|
7`;
|
|
41
|
-
|
|
42
|
-
const csvDf3: string = `col1
|
|
61
|
+
[csvTests.neg3]: string = `col1
|
|
43
62
|
8
|
|
44
63
|
9
|
|
45
64
|
10
|
|
46
65
|
11
|
|
47
66
|
12`;
|
|
48
|
-
|
|
49
|
-
const csvDfSmiles: string = `col1
|
|
67
|
+
[csvTests.negSmiles]: string = `col1
|
|
50
68
|
CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
51
69
|
C1CCCCC1
|
|
52
70
|
CCCCCC
|
|
53
71
|
`;
|
|
54
|
-
|
|
55
|
-
const csvDfDna1: string = `seq
|
|
72
|
+
[csvTests.fastaDna1]: string = `seq
|
|
56
73
|
ACGTC
|
|
57
74
|
CAGTGT
|
|
58
75
|
TTCAAC
|
|
59
76
|
`;
|
|
60
|
-
|
|
61
|
-
const csvDfRna1: string = `seq
|
|
77
|
+
[csvTests.fastaRna1]: string = `seq
|
|
62
78
|
ACGUC
|
|
63
79
|
CAGUGU
|
|
64
80
|
UUCAAC
|
|
65
81
|
`;
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
const csvDfPt1: string = `seq
|
|
82
|
+
/** Pure amino acids sequence */
|
|
83
|
+
[csvTests.fastaPt1]: string = `seq
|
|
69
84
|
FWPHEY
|
|
70
85
|
YNRQWYV
|
|
71
86
|
MKPSEYV
|
|
72
87
|
`;
|
|
73
|
-
|
|
74
|
-
|
|
88
|
+
[csvTests.fastaUn]: string = `seq
|
|
89
|
+
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
|
|
90
|
+
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
91
|
+
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
92
|
+
`;
|
|
93
|
+
[csvTests.sepDna]: string = `seq
|
|
75
94
|
A*C*G*T*C
|
|
76
95
|
C*A*G*T*G*T
|
|
77
96
|
T*T*C*A*A*C
|
|
78
97
|
`;
|
|
79
|
-
|
|
80
|
-
const csvDfSepRna: string = `seq
|
|
98
|
+
[csvTests.sepRna]: string = `seq
|
|
81
99
|
A*C*G*U*C
|
|
82
100
|
C*A*G*U*G*U
|
|
83
101
|
U*U*C*A*A*C
|
|
84
102
|
`;
|
|
85
|
-
|
|
86
|
-
const csvDfSepPt: string = `seq
|
|
103
|
+
[csvTests.sepPt]: string = `seq
|
|
87
104
|
F-W-P-H-E-Y
|
|
88
105
|
Y-N-R-Q-W-Y-V
|
|
89
106
|
M-K-P-S-E-Y-V
|
|
90
107
|
`;
|
|
91
|
-
|
|
92
|
-
const csvDfSepUn1: string = `seq
|
|
108
|
+
[csvTests.sepUn1]: string = `seq
|
|
93
109
|
abc-dfgg-abc1-cfr3-rty-wert
|
|
94
110
|
rut12-her2-rty-wert-abc-abc1-dfgg
|
|
95
111
|
rut12-rty-her2-abc-cfr3-wert-rut12
|
|
96
112
|
`;
|
|
97
|
-
|
|
98
|
-
const csvDfSepUn2: string = `seq
|
|
113
|
+
[csvTests.sepUn2]: string = `seq
|
|
99
114
|
abc/dfgg/abc1/cfr3/rty/wert
|
|
100
115
|
rut12/her2/rty/wert//abc/abc1/dfgg
|
|
101
116
|
rut12/rty/her2/abc/cfr3//wert/rut12
|
|
102
117
|
`;
|
|
103
|
-
|
|
104
|
-
const csvDfSepMsaDna1: string = `seq
|
|
118
|
+
[csvTests.sepMsaDna1]: string = `seq
|
|
105
119
|
A-C--G-T--C-T
|
|
106
120
|
C-A-C--T--G-T
|
|
107
121
|
A-C-C-G-T-A-C-T
|
|
108
122
|
`;
|
|
109
|
-
|
|
110
|
-
const csvDfMsaDna1: string = `seq
|
|
123
|
+
[csvTests.fastaMsaDna1]: string = `seq
|
|
111
124
|
AC-GT-CT
|
|
112
125
|
CAC-T-GT
|
|
113
126
|
ACCGTACT
|
|
114
127
|
`;
|
|
115
|
-
|
|
116
|
-
const csvDfMsaPt1: string = `seq
|
|
128
|
+
[csvTests.fastaMsaPt1]: string = `seq
|
|
117
129
|
FWR-WYV-KHP
|
|
118
130
|
YNR-WYV-KHP
|
|
119
131
|
MWRSWY-CKHP
|
|
120
132
|
`;
|
|
133
|
+
}();
|
|
121
134
|
|
|
122
135
|
const enum Samples {
|
|
123
136
|
peptidesComplex = 'peptidesComplex',
|
|
@@ -135,11 +148,12 @@ MWRSWY-CKHP
|
|
|
135
148
|
testSmilesShort = 'testSmilesShort',
|
|
136
149
|
testCerealCsv = 'testCerealCsv',
|
|
137
150
|
testActivityCliffsCsv = 'testActivityCliffsCsv',
|
|
138
|
-
testSpgi100 = 'testSpgi100',
|
|
139
151
|
testUnichemSources = 'testUnichemSources',
|
|
140
152
|
testDmvOffices = 'testDmvOffices',
|
|
141
153
|
testAlertCollection = 'testAlertCollection',
|
|
142
154
|
testSpgi = 'testSpgi',
|
|
155
|
+
testSpgi100 = 'testSpgi100',
|
|
156
|
+
testUrl = 'testUrl',
|
|
143
157
|
}
|
|
144
158
|
|
|
145
159
|
const samples: { [key: string]: string } = {
|
|
@@ -158,11 +172,12 @@ MWRSWY-CKHP
|
|
|
158
172
|
[Samples.testSmilesShort]: 'System:AppData/Bio/tests/testSmilesShort.csv',
|
|
159
173
|
[Samples.testActivityCliffsCsv]: 'System:AppData/Bio/tests/testActivityCliffs.csv', // smiles
|
|
160
174
|
[Samples.testCerealCsv]: 'System:AppData/Bio/tests/testCereal.csv',
|
|
161
|
-
[Samples.testSpgi100]: 'System:AppData/Bio/tests/testSpgi100.csv',
|
|
162
175
|
[Samples.testUnichemSources]: 'System:AppData/Bio/tests/testUnichemSources.csv',
|
|
163
176
|
[Samples.testDmvOffices]: 'System:AppData/Bio/tests/testDmvOffices.csv',
|
|
164
177
|
[Samples.testAlertCollection]: 'System:AppData/Bio/tests/testAlertCollection.csv',
|
|
178
|
+
[Samples.testSpgi100]: 'System:AppData/Bio/tests/testSpgi100.csv',
|
|
165
179
|
[Samples.testSpgi]: 'System:AppData/Bio/tests/SPGI-derived.csv',
|
|
180
|
+
[Samples.testUrl]: 'System:AppData/Bio/tests/testUrl.csv',
|
|
166
181
|
};
|
|
167
182
|
|
|
168
183
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -192,225 +207,177 @@ MWRSWY-CKHP
|
|
|
192
207
|
return df;
|
|
193
208
|
}
|
|
194
209
|
|
|
195
|
-
const
|
|
196
|
-
const readCsv: (key: string, csv: string) => DfReaderFunc = (key: string, csv: string) => {
|
|
210
|
+
const readCsv: (key: csvTests) => DfReaderFunc = (key: keyof typeof csvData) => {
|
|
197
211
|
return async () => {
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
})();
|
|
204
|
-
}
|
|
205
|
-
return _csvDfs[key];
|
|
212
|
+
// Always recreate test data frame from CSV for reproducible detector behavior in tests.
|
|
213
|
+
const csv: string = csvData[key];
|
|
214
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
215
|
+
await grok.data.detectSemanticTypes(df);
|
|
216
|
+
return df;
|
|
206
217
|
};
|
|
207
218
|
};
|
|
208
219
|
|
|
209
220
|
|
|
210
|
-
test('NegativeEmpty', async () => { await _testNeg(readCsv(
|
|
211
|
-
test('Negative1', async () => { await _testNeg(readCsv(
|
|
212
|
-
test('Negative2', async () => { await _testNeg(readCsv(
|
|
213
|
-
test('Negative3', async () => { await _testNeg(readCsv(
|
|
214
|
-
test('NegativeSmiles', async () => { await _testNeg(readCsv(
|
|
221
|
+
test('NegativeEmpty', async () => { await _testNeg(readCsv(csvTests.negEmpty), 'col1'); });
|
|
222
|
+
test('Negative1', async () => { await _testNeg(readCsv(csvTests.neg1), 'col1'); });
|
|
223
|
+
test('Negative2', async () => { await _testNeg(readCsv(csvTests.neg2), 'col1'); });
|
|
224
|
+
test('Negative3', async () => { await _testNeg(readCsv(csvTests.neg3), 'col1'); });
|
|
225
|
+
test('NegativeSmiles', async () => { await _testNeg(readCsv(csvTests.negSmiles), 'col1'); });
|
|
215
226
|
|
|
216
|
-
test('
|
|
217
|
-
await _testPos(readCsv(
|
|
227
|
+
test('FastaDna1', async () => {
|
|
228
|
+
await _testPos(readCsv(csvTests.fastaDna1), 'seq',
|
|
218
229
|
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false);
|
|
219
230
|
});
|
|
220
|
-
test('
|
|
221
|
-
await _testPos(readCsv(
|
|
231
|
+
test('FastaRna1', async () => {
|
|
232
|
+
await _testPos(readCsv(csvTests.fastaRna1), 'seq',
|
|
222
233
|
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false);
|
|
223
234
|
});
|
|
224
|
-
test('
|
|
225
|
-
await _testPos(readCsv(
|
|
235
|
+
test('FastaPt1', async () => {
|
|
236
|
+
await _testPos(readCsv(csvTests.fastaPt1), 'seq',
|
|
226
237
|
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
227
238
|
});
|
|
228
|
-
test('
|
|
229
|
-
await _testPos(readCsv(
|
|
239
|
+
test('FastaUn', async () => {
|
|
240
|
+
await _testPos(readCsv(csvTests.fastaUn), 'seq',
|
|
241
|
+
NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 12, true);
|
|
242
|
+
});
|
|
243
|
+
test('FastaMsaDna1', async () => {
|
|
244
|
+
await _testPos(readCsv(csvTests.fastaMsaDna1), 'seq',
|
|
230
245
|
NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false);
|
|
231
246
|
});
|
|
232
247
|
|
|
233
|
-
test('
|
|
234
|
-
await _testPos(readCsv(
|
|
235
|
-
ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false);
|
|
248
|
+
test('FastaMsaPt1', async () => {
|
|
249
|
+
await _testPos(readCsv(csvTests.fastaMsaPt1), 'seq',
|
|
250
|
+
NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false);
|
|
236
251
|
});
|
|
237
252
|
|
|
238
253
|
test('SepDna', async () => {
|
|
239
|
-
await _testPos(readCsv(
|
|
254
|
+
await _testPos(readCsv(csvTests.sepDna), 'seq',
|
|
240
255
|
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false, '*');
|
|
241
256
|
});
|
|
242
257
|
test('SepRna', async () => {
|
|
243
|
-
await _testPos(readCsv(
|
|
258
|
+
await _testPos(readCsv(csvTests.sepRna), 'seq',
|
|
244
259
|
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false, '*');
|
|
245
260
|
});
|
|
246
261
|
test('SepPt', async () => {
|
|
247
|
-
await _testPos(readCsv(
|
|
262
|
+
await _testPos(readCsv(csvTests.sepPt), 'seq',
|
|
248
263
|
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.PT, 20, false, '-');
|
|
249
264
|
});
|
|
250
265
|
test('SepUn1', async () => {
|
|
251
|
-
await _testPos(readCsv(
|
|
266
|
+
await _testPos(readCsv(csvTests.sepUn1), 'seq',
|
|
252
267
|
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 8, true, '-');
|
|
253
268
|
});
|
|
254
269
|
test('SepUn2', async () => {
|
|
255
|
-
await _testPos(readCsv(
|
|
270
|
+
await _testPos(readCsv(csvTests.sepUn2), 'seq',
|
|
256
271
|
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 9, true, '/');
|
|
257
272
|
});
|
|
258
273
|
|
|
259
274
|
test('SepMsaN1', async () => {
|
|
260
|
-
await _testPos(readCsv(
|
|
275
|
+
await _testPos(readCsv(csvTests.sepMsaDna1), 'seq',
|
|
261
276
|
NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false, '-');
|
|
262
277
|
});
|
|
263
278
|
|
|
264
|
-
test('
|
|
265
|
-
await
|
|
266
|
-
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false)
|
|
267
|
-
|
|
268
|
-
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
269
|
-
await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
|
|
270
|
-
});
|
|
271
|
-
test('SamplesFastaCsvNegativeLength', async () => {
|
|
272
|
-
await _testNeg(readSamples(Samples.fastaCsv), 'Length');
|
|
273
|
-
});
|
|
274
|
-
test('SamplesFastaCsvNegativeUniProtKB', async () => {
|
|
275
|
-
await _testNeg(readSamples(Samples.fastaCsv), 'UniProtKB');
|
|
279
|
+
test('samplesFastaCsv', async () => {
|
|
280
|
+
await _testDf(readSamples(Samples.fastaCsv), {
|
|
281
|
+
'Sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
|
|
282
|
+
});
|
|
276
283
|
});
|
|
277
284
|
|
|
278
|
-
test('
|
|
279
|
-
await
|
|
280
|
-
'sequence'
|
|
285
|
+
test('samplesFastaFasta', async () => {
|
|
286
|
+
await _testDf(readSamples(Samples.fastaFasta), {
|
|
287
|
+
'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
|
|
288
|
+
});
|
|
281
289
|
});
|
|
282
290
|
|
|
283
291
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
284
292
|
// test('samplesPeptidesComplexPositiveAlignedSequence', async () => {
|
|
285
293
|
// await _testPos(readSamples(Samples.peptidesComplex), 'AlignedSequence', 'separator:SEQ:UN', '-');
|
|
286
294
|
// });
|
|
287
|
-
test('
|
|
288
|
-
await
|
|
289
|
-
});
|
|
290
|
-
test('SamplesPeptidesComplexNegativeMeasured', async () => {
|
|
291
|
-
await _testNeg(readSamples(Samples.peptidesComplex), 'Measured');
|
|
292
|
-
});
|
|
293
|
-
test('SamplesPeptidesComplexNegativeValue', async () => {
|
|
294
|
-
await _testNeg(readSamples(Samples.peptidesComplex), 'Value');
|
|
295
|
+
test('samplesPeptidesComplex', async () => {
|
|
296
|
+
await _testDf(readSamples(Samples.peptidesComplex), {} /* no positive */);
|
|
295
297
|
});
|
|
296
298
|
|
|
297
|
-
test('
|
|
298
|
-
await
|
|
299
|
-
NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 161, true, '/')
|
|
300
|
-
|
|
301
|
-
test('samplesMsaComplexNegativeActivity', async () => {
|
|
302
|
-
await _testNeg(readSamples(Samples.msaComplex), 'Activity');
|
|
299
|
+
test('samplesMsaComplex', async () => {
|
|
300
|
+
await _testDf(readSamples(Samples.msaComplex), {
|
|
301
|
+
'MSA': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 161, true, '/'),
|
|
302
|
+
});
|
|
303
303
|
});
|
|
304
304
|
|
|
305
|
-
test('
|
|
306
|
-
await
|
|
305
|
+
test('samplesIdCsv', async () => {
|
|
306
|
+
await _testDf(readSamples(Samples.testIdCsv), {} /* no positive */);
|
|
307
307
|
});
|
|
308
308
|
|
|
309
|
-
test('
|
|
310
|
-
await
|
|
309
|
+
test('samplesSarSmallCsv', async () => {
|
|
310
|
+
await _testDf(readSamples(Samples.testSmilesCsv), {} /* nopositive */);
|
|
311
311
|
});
|
|
312
312
|
|
|
313
|
-
test('
|
|
314
|
-
await
|
|
315
|
-
NOTATION.HELM, null, null, 160, true,
|
|
313
|
+
test('samplesHelmCsv', async () => {
|
|
314
|
+
await _testDf(readSamples(Samples.helmCsv), {
|
|
315
|
+
'HELM': new PosCol(NOTATION.HELM, null, null, 160, true),
|
|
316
|
+
});
|
|
316
317
|
});
|
|
317
318
|
|
|
318
|
-
|
|
319
|
-
await _testNeg(readSamples(Samples.helmCsv), 'Activity');
|
|
320
|
-
});
|
|
321
|
-
|
|
322
|
-
// sample_testHelm.csb
|
|
319
|
+
// sample_testHelm.csv
|
|
323
320
|
// columns: ID,Test type,HELM string,Valid?,Mol Weight,Mol Formula,SMILES
|
|
324
|
-
test('
|
|
325
|
-
await
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
329
|
-
});
|
|
330
|
-
test('samplesTestHelmPositiveHelmString', async () => {
|
|
331
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', NOTATION.HELM, null, null, 9, true, null);
|
|
332
|
-
});
|
|
333
|
-
test('samplesTestHelmNegativeValid', async () => {
|
|
334
|
-
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
335
|
-
});
|
|
336
|
-
test('samplesTestHelmNegativeMolWeight', async () => {
|
|
337
|
-
await _testNeg(readSamples(Samples.testHelmCsv), 'Mol Weight');
|
|
338
|
-
});
|
|
339
|
-
test('samplesTestHelmNegativeMolFormula', async () => {
|
|
340
|
-
await _testNeg(readSamples(Samples.testHelmCsv), 'Mol Formula');
|
|
341
|
-
});
|
|
342
|
-
test('samplesTestHelmNegativeSmiles', async () => {
|
|
343
|
-
await _testNeg(readSamples(Samples.testHelmCsv), 'Smiles');
|
|
321
|
+
test('samplesTestHelmCsv', async () => {
|
|
322
|
+
await _testDf(readSamples(Samples.testHelmCsv), {
|
|
323
|
+
'HELM string': new PosCol(NOTATION.HELM, null, null, 9, true),
|
|
324
|
+
});
|
|
344
325
|
});
|
|
345
326
|
|
|
346
|
-
test('
|
|
347
|
-
|
|
348
|
-
const df: DG.DataFrame = await dfFunc();
|
|
349
|
-
|
|
350
|
-
for (const col of df.columns.toList())
|
|
351
|
-
await _testNeg(dfFunc, col.name);
|
|
327
|
+
test('samplesTestDemogCsv', async () => {
|
|
328
|
+
await _testDf(readSamples(Samples.testDemogCsv), {} /* no positive */);
|
|
352
329
|
});
|
|
353
330
|
|
|
354
|
-
test('
|
|
355
|
-
await
|
|
331
|
+
test('samplesTestSmiles2Csv', async () => {
|
|
332
|
+
await _testDf(readSamples(Samples.testSmiles2Csv), {} /* no positive */);
|
|
356
333
|
});
|
|
357
334
|
|
|
358
|
-
test('
|
|
359
|
-
await
|
|
335
|
+
test('samplesTestSmilesShort', async () => {
|
|
336
|
+
await _testDf(readSamples(Samples.testSmilesShort), {} /* no positive */);
|
|
360
337
|
});
|
|
361
338
|
|
|
362
339
|
test('samplesTestActivityCliffsNegativeSmiles', async () => {
|
|
363
|
-
await
|
|
340
|
+
await _testDf(readSamples(Samples.testActivityCliffsCsv), {} /* no positive */);
|
|
364
341
|
});
|
|
365
342
|
|
|
366
|
-
test('
|
|
367
|
-
await
|
|
368
|
-
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false)
|
|
343
|
+
test('samplesFastaPtCsv', async () => {
|
|
344
|
+
await _testDf(readSamples(Samples.fastaPtCsv), {
|
|
345
|
+
'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
|
|
346
|
+
});
|
|
369
347
|
});
|
|
370
348
|
|
|
371
|
-
test('
|
|
372
|
-
await
|
|
349
|
+
test('samplesTestCerealCsv', async () => {
|
|
350
|
+
await _testDf(readSamples(Samples.testCerealCsv), {} /* no positive */);
|
|
373
351
|
});
|
|
374
352
|
|
|
375
|
-
test('
|
|
376
|
-
await
|
|
377
|
-
});
|
|
378
|
-
test('samplesTestSpgi100NegativeScaffoldNames', async () => {
|
|
379
|
-
await _testNeg(readSamples(Samples.testSpgi100), 'Scaffold Names');
|
|
380
|
-
});
|
|
381
|
-
test('samplesTestSpgi100NegativePrimaryScaffoldName', async () => {
|
|
382
|
-
await _testNeg(readSamples(Samples.testSpgi100), 'Primary Scaffold Name');
|
|
383
|
-
});
|
|
384
|
-
test('samplesTestSpgi100NegativeSampleName', async () => {
|
|
385
|
-
await _testNeg(readSamples(Samples.testSpgi100), 'Sample Name');
|
|
353
|
+
test('samplesTestUnichemSources', async () => {
|
|
354
|
+
await _testDf(readSamples(Samples.testUnichemSources), {} /* no positive */);
|
|
386
355
|
});
|
|
387
356
|
|
|
388
|
-
test('
|
|
389
|
-
await
|
|
390
|
-
});
|
|
391
|
-
test('samplesTestUnichemSourcesNegativeBaseIdUrl', async () => {
|
|
392
|
-
await _testNeg(readSamples(Samples.testUnichemSources), 'base_id_url');
|
|
357
|
+
test('samplesTestDmvOffices', async () => {
|
|
358
|
+
await _testDf(readSamples(Samples.testDmvOffices), {} /* no positive */);
|
|
393
359
|
});
|
|
394
360
|
|
|
395
|
-
test('
|
|
396
|
-
await
|
|
361
|
+
test('samplesTestAlertCollection', async () => {
|
|
362
|
+
await _testDf(readSamples(Samples.testAlertCollection), {} /* no positive */);
|
|
397
363
|
});
|
|
398
|
-
|
|
399
|
-
|
|
364
|
+
|
|
365
|
+
test('samplesTestSpgi', async () => {
|
|
366
|
+
await _testDf(readSamples(Samples.testSpgi), {} /* no positive */);
|
|
400
367
|
});
|
|
401
368
|
|
|
402
|
-
test('
|
|
403
|
-
await
|
|
369
|
+
test('samplesTestSpgi100', async () => {
|
|
370
|
+
await _testDf(readSamples(Samples.testSpgi100), {} /* no positive */);
|
|
404
371
|
});
|
|
405
372
|
|
|
406
|
-
test('
|
|
407
|
-
await
|
|
373
|
+
test('samplesTestUrl', async () => {
|
|
374
|
+
await _testDf(readSamples(Samples.testUrl), {} /* no positive */);
|
|
408
375
|
});
|
|
409
376
|
});
|
|
410
377
|
|
|
411
378
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
412
379
|
const df: DG.DataFrame = await readDf();
|
|
413
|
-
const col: DG.Column = df.
|
|
380
|
+
const col: DG.Column = df.getCol(colName)!;
|
|
414
381
|
const semType: string = await grok.functions
|
|
415
382
|
.call('Bio:detectMacromolecule', {col: col}) as unknown as string;
|
|
416
383
|
if (semType)
|
|
@@ -437,7 +404,7 @@ export async function _testPos(
|
|
|
437
404
|
if (semType)
|
|
438
405
|
col.semType = semType;
|
|
439
406
|
|
|
440
|
-
expect(col.semType
|
|
407
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
441
408
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
442
409
|
expect(col.getTag(bioTAGS.aligned), aligned);
|
|
443
410
|
expect(col.getTag(bioTAGS.alphabet), alphabet);
|
|
@@ -452,3 +419,41 @@ export async function _testPos(
|
|
|
452
419
|
expect(uh.alphabet, alphabet);
|
|
453
420
|
}
|
|
454
421
|
}
|
|
422
|
+
|
|
423
|
+
class PosCol {
|
|
424
|
+
constructor(
|
|
425
|
+
public readonly units: string,
|
|
426
|
+
public readonly aligned: string | null,
|
|
427
|
+
public readonly alphabet: string | null,
|
|
428
|
+
public readonly alphabetSize: number,
|
|
429
|
+
public readonly alphabetIsMultichar: boolean,
|
|
430
|
+
public readonly separator?: string
|
|
431
|
+
) { };
|
|
432
|
+
};
|
|
433
|
+
|
|
434
|
+
export async function _testDf(readDf: DfReaderFunc, posCols: { [colName: string]: PosCol }): Promise<void> {
|
|
435
|
+
const df: DG.DataFrame = await readDf();
|
|
436
|
+
const errList: string[] = [];
|
|
437
|
+
for (const colName of df.columns.names()) {
|
|
438
|
+
if (colName in posCols) {
|
|
439
|
+
const p = posCols[colName];
|
|
440
|
+
try {
|
|
441
|
+
await _testPos(readDf, colName, p.units, p.aligned, p.alphabet,
|
|
442
|
+
p.alphabetSize, p.alphabetIsMultichar, p.separator);
|
|
443
|
+
} catch (err: any) {
|
|
444
|
+
const errMsg: string = err.toString();
|
|
445
|
+
errList.push(`Positive col '${colName}' failed: ${errMsg}`);
|
|
446
|
+
}
|
|
447
|
+
} else {
|
|
448
|
+
try {
|
|
449
|
+
await _testNeg(readDf, colName);
|
|
450
|
+
} catch (err: any) {
|
|
451
|
+
const errMsg: string = err.toString();
|
|
452
|
+
errList.push(`Negative col '${colName}' failed: ${errMsg}`);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if (errList.length > 0)
|
|
458
|
+
throw new Error(errList.join('\n'));
|
|
459
|
+
}
|
|
@@ -4,7 +4,7 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
|
|
5
5
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
|
|
7
|
-
import {splitterAsFasta} from '@datagrok-libraries/bio';
|
|
7
|
+
import {splitterAsFasta} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
8
|
|
|
9
9
|
type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
|
|
10
10
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
import {test, after, before, category, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
|
|
7
|
+
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
8
|
+
import {LIB_STORAGE_NAME} from '../utils/monomer-lib';
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
category('monomerLibraries', () => {
|
|
12
|
+
let monomerLibHelper: IMonomerLibHelper;
|
|
13
|
+
/** Backup actual user's monomer libraries settings */
|
|
14
|
+
let userLibrariesSettings: any = null;
|
|
15
|
+
|
|
16
|
+
before(async () => {
|
|
17
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
18
|
+
userLibrariesSettings = await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
after(async () => {
|
|
22
|
+
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, userLibrariesSettings, true);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
test('default', async () => {
|
|
26
|
+
// Clear settings to test default
|
|
27
|
+
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
|
|
28
|
+
await monomerLibHelper.loadLibraries(true); // test defaultLib
|
|
29
|
+
|
|
30
|
+
// Currently default monomer lib is empty
|
|
31
|
+
const currentMonomerLib = monomerLibHelper.getBioLib();
|
|
32
|
+
expect(currentMonomerLib.getTypes().length, 0);
|
|
33
|
+
});
|
|
34
|
+
});
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
import {category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {runPepsea} from '../utils/pepsea';
|
|
5
|
+
|
|
6
|
+
category('PepSeA', () => {
|
|
7
|
+
const testCsv = `HELM,MSA
|
|
8
|
+
"PEPTIDE1{F.L.R.G.W.[MeF].Y.S.N.N.C}$$$$","F.L.R.G.W.MeF.Y..S.N.N.C"
|
|
9
|
+
"PEPTIDE1{F.L.R.G.Y.[MeF].Y.W.N.C}$$$$","F.L.R.G.Y.MeF.Y.W...N.C"
|
|
10
|
+
"PEPTIDE1{F.G.Y.[MeF].Y.W.S.D.N.C}$$$$","F...G.Y.MeF.Y.W.S.D.N.C"
|
|
11
|
+
"PEPTIDE1{F.L.R.G.Y.[MeF].Y.W.S.N.D.C}$$$$","F.L.R.G.Y.MeF.Y.W.S.N.D.C"
|
|
12
|
+
"PEPTIDE1{F.V.R.G.Y.[MeF].Y.W.S.N.C}$$$$","F.V.R.G.Y.MeF.Y.W.S..N.C"`;
|
|
13
|
+
|
|
14
|
+
test('Basic alignment', async () => {
|
|
15
|
+
const table = DG.DataFrame.fromCsv(testCsv);
|
|
16
|
+
const alignedCol = await runPepsea(table.getCol('HELM'), 'msa(HELM)');
|
|
17
|
+
const alignedTestCol = table.getCol('MSA');
|
|
18
|
+
for (let i = 0; i < alignedCol.length; ++i)
|
|
19
|
+
expect(alignedCol.get(i) == alignedTestCol.get(i), true);
|
|
20
|
+
}, {skipReason: 'GROK-12764'});
|
|
21
|
+
});
|