@datagrok/bio 2.1.11 → 2.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -12
- package/css/helm.css +10 -0
- package/detectors.js +83 -59
- package/dist/package-test.js +2 -68651
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -66040
- package/dist/package.js.map +1 -0
- package/dockerfiles/Dockerfile +86 -0
- package/files/icons/composition-analysis.svg +17 -0
- package/files/icons/sequence-diversity-viewer.svg +4 -0
- package/files/icons/sequence-similarity-viewer.svg +4 -0
- package/files/icons/vdregions-viewer.svg +22 -0
- package/files/icons/weblogo-viewer.svg +7 -0
- package/files/tests/testUrl.csv +11 -0
- package/files/tests/toAtomicLevelTest.csv +4 -0
- package/package.json +29 -32
- package/src/analysis/sequence-activity-cliffs.ts +15 -13
- package/src/analysis/sequence-diversity-viewer.ts +3 -2
- package/src/analysis/sequence-search-base-viewer.ts +4 -2
- package/src/analysis/sequence-similarity-viewer.ts +4 -4
- package/src/analysis/sequence-space.ts +2 -1
- package/src/calculations/monomerLevelMols.ts +6 -6
- package/src/package-test.ts +9 -2
- package/src/package.ts +230 -145
- package/src/substructure-search/substructure-search.ts +25 -22
- package/src/tests/Palettes-test.ts +9 -9
- package/src/tests/WebLogo-positions-test.ts +131 -68
- package/src/tests/_first-tests.ts +9 -0
- package/src/tests/activity-cliffs-tests.ts +8 -7
- package/src/tests/activity-cliffs-utils.ts +17 -9
- package/src/tests/bio-tests.ts +30 -21
- package/src/tests/checkInputColumn-tests.ts +17 -17
- package/src/tests/converters-test.ts +81 -46
- package/src/tests/detectors-benchmark-tests.ts +17 -17
- package/src/tests/detectors-tests.ts +190 -178
- package/src/tests/fasta-export-tests.ts +2 -3
- package/src/tests/monomer-libraries-tests.ts +34 -0
- package/src/tests/pepsea-tests.ts +21 -0
- package/src/tests/renderers-test.ts +33 -29
- package/src/tests/sequence-space-test.ts +6 -4
- package/src/tests/similarity-diversity-tests.ts +4 -4
- package/src/tests/splitters-test.ts +6 -7
- package/src/tests/substructure-filters-tests.ts +23 -1
- package/src/tests/utils/sequences-generators.ts +7 -7
- package/src/tests/utils.ts +2 -1
- package/src/tests/viewers.ts +16 -0
- package/src/utils/cell-renderer.ts +116 -54
- package/src/utils/constants.ts +7 -6
- package/src/utils/convert.ts +17 -11
- package/src/utils/monomer-lib.ts +174 -0
- package/src/utils/multiple-sequence-alignment.ts +49 -26
- package/src/utils/pepsea.ts +78 -0
- package/src/utils/save-as-fasta.ts +9 -8
- package/src/utils/ui-utils.ts +15 -3
- package/src/viewers/vd-regions-viewer.ts +125 -83
- package/src/viewers/web-logo-viewer.ts +1031 -0
- package/src/widgets/bio-substructure-filter.ts +38 -24
- package/tsconfig.json +71 -72
- package/webpack.config.js +4 -11
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -8988
- package/jest.config.js +0 -33
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -98
- package/test-Bio-91c83d8913ff-bb573307.html +0 -392
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
6
|
|
|
8
7
|
import {importFasta} from '../package';
|
|
8
|
+
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
9
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
10
|
|
|
10
11
|
/*
|
|
11
12
|
// snippet to list df columns of semType='Macromolecule' (false positive)
|
|
@@ -21,103 +22,115 @@ for (let i = 0; i < df.columns.length; i++) {
|
|
|
21
22
|
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
22
23
|
|
|
23
24
|
category('detectors', () => {
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
const enum csvTests {
|
|
26
|
+
negEmpty = 'negEmpty',
|
|
27
|
+
neg1 = 'neg1',
|
|
28
|
+
neg2 = 'neg2',
|
|
29
|
+
neg3 = 'neg3',
|
|
30
|
+
negSmiles = 'negSmiles',
|
|
31
|
+
fastaDna1 = 'csvFastaDna1',
|
|
32
|
+
fastaRna1 = 'fastaRna1',
|
|
33
|
+
fastaPt1 = 'fastaPt1',
|
|
34
|
+
fastaUn = 'fastaUn',
|
|
35
|
+
sepDna = 'sepDna',
|
|
36
|
+
sepRna = 'sepRna',
|
|
37
|
+
sepPt = 'sepPt',
|
|
38
|
+
sepUn1 = 'sepUn1',
|
|
39
|
+
sepUn2 = 'sepUn2',
|
|
40
|
+
sepMsaDna1 = 'sepMsaDna1',
|
|
41
|
+
fastaMsaDna1 = 'fastaMsaDna1',
|
|
42
|
+
fastaMsaPt1 = 'fastaMsaPt1',
|
|
43
|
+
}
|
|
28
44
|
|
|
29
|
-
const
|
|
45
|
+
const csvData = new class {
|
|
46
|
+
[csvTests.negEmpty]: string = `id,col1
|
|
30
47
|
1,
|
|
31
48
|
2,
|
|
32
49
|
3,
|
|
33
50
|
4,
|
|
34
51
|
5,`;
|
|
35
|
-
|
|
36
|
-
|
|
52
|
+
[csvTests.neg1]: string = `col1
|
|
53
|
+
1
|
|
54
|
+
2
|
|
55
|
+
3`;
|
|
56
|
+
[csvTests.neg2]: string = `col1
|
|
37
57
|
4
|
|
38
58
|
5
|
|
39
59
|
6
|
|
40
60
|
7`;
|
|
41
|
-
|
|
42
|
-
const csvDf3: string = `col1
|
|
61
|
+
[csvTests.neg3]: string = `col1
|
|
43
62
|
8
|
|
44
63
|
9
|
|
45
64
|
10
|
|
46
65
|
11
|
|
47
66
|
12`;
|
|
48
|
-
|
|
49
|
-
const csvDfSmiles: string = `col1
|
|
67
|
+
[csvTests.negSmiles]: string = `col1
|
|
50
68
|
CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
51
69
|
C1CCCCC1
|
|
52
70
|
CCCCCC
|
|
53
71
|
`;
|
|
54
|
-
|
|
55
|
-
const csvDfDna1: string = `seq
|
|
72
|
+
[csvTests.fastaDna1]: string = `seq
|
|
56
73
|
ACGTC
|
|
57
74
|
CAGTGT
|
|
58
75
|
TTCAAC
|
|
59
76
|
`;
|
|
60
|
-
|
|
61
|
-
const csvDfRna1: string = `seq
|
|
77
|
+
[csvTests.fastaRna1]: string = `seq
|
|
62
78
|
ACGUC
|
|
63
79
|
CAGUGU
|
|
64
80
|
UUCAAC
|
|
65
81
|
`;
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
const csvDfPt1: string = `seq
|
|
82
|
+
/** Pure amino acids sequence */
|
|
83
|
+
[csvTests.fastaPt1]: string = `seq
|
|
69
84
|
FWPHEY
|
|
70
85
|
YNRQWYV
|
|
71
86
|
MKPSEYV
|
|
72
87
|
`;
|
|
73
|
-
|
|
74
|
-
|
|
88
|
+
[csvTests.fastaUn]: string = `seq
|
|
89
|
+
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
|
|
90
|
+
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
91
|
+
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
92
|
+
`;
|
|
93
|
+
[csvTests.sepDna]: string = `seq
|
|
75
94
|
A*C*G*T*C
|
|
76
95
|
C*A*G*T*G*T
|
|
77
96
|
T*T*C*A*A*C
|
|
78
97
|
`;
|
|
79
|
-
|
|
80
|
-
const csvDfSepRna: string = `seq
|
|
98
|
+
[csvTests.sepRna]: string = `seq
|
|
81
99
|
A*C*G*U*C
|
|
82
100
|
C*A*G*U*G*U
|
|
83
101
|
U*U*C*A*A*C
|
|
84
102
|
`;
|
|
85
|
-
|
|
86
|
-
const csvDfSepPt: string = `seq
|
|
103
|
+
[csvTests.sepPt]: string = `seq
|
|
87
104
|
F-W-P-H-E-Y
|
|
88
105
|
Y-N-R-Q-W-Y-V
|
|
89
106
|
M-K-P-S-E-Y-V
|
|
90
107
|
`;
|
|
91
|
-
|
|
92
|
-
const csvDfSepUn1: string = `seq
|
|
108
|
+
[csvTests.sepUn1]: string = `seq
|
|
93
109
|
abc-dfgg-abc1-cfr3-rty-wert
|
|
94
110
|
rut12-her2-rty-wert-abc-abc1-dfgg
|
|
95
111
|
rut12-rty-her2-abc-cfr3-wert-rut12
|
|
96
112
|
`;
|
|
97
|
-
|
|
98
|
-
const csvDfSepUn2: string = `seq
|
|
113
|
+
[csvTests.sepUn2]: string = `seq
|
|
99
114
|
abc/dfgg/abc1/cfr3/rty/wert
|
|
100
115
|
rut12/her2/rty/wert//abc/abc1/dfgg
|
|
101
116
|
rut12/rty/her2/abc/cfr3//wert/rut12
|
|
102
117
|
`;
|
|
103
|
-
|
|
104
|
-
const csvDfSepMsaDna1: string = `seq
|
|
118
|
+
[csvTests.sepMsaDna1]: string = `seq
|
|
105
119
|
A-C--G-T--C-T
|
|
106
120
|
C-A-C--T--G-T
|
|
107
121
|
A-C-C-G-T-A-C-T
|
|
108
122
|
`;
|
|
109
|
-
|
|
110
|
-
const csvDfMsaDna1: string = `seq
|
|
123
|
+
[csvTests.fastaMsaDna1]: string = `seq
|
|
111
124
|
AC-GT-CT
|
|
112
125
|
CAC-T-GT
|
|
113
126
|
ACCGTACT
|
|
114
127
|
`;
|
|
115
|
-
|
|
116
|
-
const csvDfMsaPt1: string = `seq
|
|
128
|
+
[csvTests.fastaMsaPt1]: string = `seq
|
|
117
129
|
FWR-WYV-KHP
|
|
118
130
|
YNR-WYV-KHP
|
|
119
131
|
MWRSWY-CKHP
|
|
120
132
|
`;
|
|
133
|
+
}();
|
|
121
134
|
|
|
122
135
|
const enum Samples {
|
|
123
136
|
peptidesComplex = 'peptidesComplex',
|
|
@@ -135,11 +148,12 @@ MWRSWY-CKHP
|
|
|
135
148
|
testSmilesShort = 'testSmilesShort',
|
|
136
149
|
testCerealCsv = 'testCerealCsv',
|
|
137
150
|
testActivityCliffsCsv = 'testActivityCliffsCsv',
|
|
138
|
-
testSpgi100 = 'testSpgi100',
|
|
139
151
|
testUnichemSources = 'testUnichemSources',
|
|
140
152
|
testDmvOffices = 'testDmvOffices',
|
|
141
153
|
testAlertCollection = 'testAlertCollection',
|
|
142
154
|
testSpgi = 'testSpgi',
|
|
155
|
+
testSpgi100 = 'testSpgi100',
|
|
156
|
+
testUrl = 'testUrl',
|
|
143
157
|
}
|
|
144
158
|
|
|
145
159
|
const samples: { [key: string]: string } = {
|
|
@@ -158,11 +172,12 @@ MWRSWY-CKHP
|
|
|
158
172
|
[Samples.testSmilesShort]: 'System:AppData/Bio/tests/testSmilesShort.csv',
|
|
159
173
|
[Samples.testActivityCliffsCsv]: 'System:AppData/Bio/tests/testActivityCliffs.csv', // smiles
|
|
160
174
|
[Samples.testCerealCsv]: 'System:AppData/Bio/tests/testCereal.csv',
|
|
161
|
-
[Samples.testSpgi100]: 'System:AppData/Bio/tests/testSpgi100.csv',
|
|
162
175
|
[Samples.testUnichemSources]: 'System:AppData/Bio/tests/testUnichemSources.csv',
|
|
163
176
|
[Samples.testDmvOffices]: 'System:AppData/Bio/tests/testDmvOffices.csv',
|
|
164
177
|
[Samples.testAlertCollection]: 'System:AppData/Bio/tests/testAlertCollection.csv',
|
|
178
|
+
[Samples.testSpgi100]: 'System:AppData/Bio/tests/testSpgi100.csv',
|
|
165
179
|
[Samples.testSpgi]: 'System:AppData/Bio/tests/SPGI-derived.csv',
|
|
180
|
+
[Samples.testUrl]: 'System:AppData/Bio/tests/testUrl.csv',
|
|
166
181
|
};
|
|
167
182
|
|
|
168
183
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -192,217 +207,177 @@ MWRSWY-CKHP
|
|
|
192
207
|
return df;
|
|
193
208
|
}
|
|
194
209
|
|
|
195
|
-
const
|
|
196
|
-
const readCsv: (key: string, csv: string) => DfReaderFunc = (key: string, csv: string) => {
|
|
210
|
+
const readCsv: (key: csvTests) => DfReaderFunc = (key: keyof typeof csvData) => {
|
|
197
211
|
return async () => {
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
})();
|
|
204
|
-
}
|
|
205
|
-
return _csvDfs[key];
|
|
212
|
+
// Always recreate test data frame from CSV for reproducible detector behavior in tests.
|
|
213
|
+
const csv: string = csvData[key];
|
|
214
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
215
|
+
await grok.data.detectSemanticTypes(df);
|
|
216
|
+
return df;
|
|
206
217
|
};
|
|
207
218
|
};
|
|
208
219
|
|
|
209
220
|
|
|
210
|
-
test('NegativeEmpty', async () => { await _testNeg(readCsv(
|
|
211
|
-
test('Negative1', async () => { await _testNeg(readCsv(
|
|
212
|
-
test('Negative2', async () => { await _testNeg(readCsv(
|
|
213
|
-
test('Negative3', async () => { await _testNeg(readCsv(
|
|
214
|
-
test('NegativeSmiles', async () => { await _testNeg(readCsv(
|
|
221
|
+
test('NegativeEmpty', async () => { await _testNeg(readCsv(csvTests.negEmpty), 'col1'); });
|
|
222
|
+
test('Negative1', async () => { await _testNeg(readCsv(csvTests.neg1), 'col1'); });
|
|
223
|
+
test('Negative2', async () => { await _testNeg(readCsv(csvTests.neg2), 'col1'); });
|
|
224
|
+
test('Negative3', async () => { await _testNeg(readCsv(csvTests.neg3), 'col1'); });
|
|
225
|
+
test('NegativeSmiles', async () => { await _testNeg(readCsv(csvTests.negSmiles), 'col1'); });
|
|
215
226
|
|
|
216
|
-
test('
|
|
217
|
-
await _testPos(readCsv(
|
|
227
|
+
test('FastaDna1', async () => {
|
|
228
|
+
await _testPos(readCsv(csvTests.fastaDna1), 'seq',
|
|
229
|
+
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false);
|
|
218
230
|
});
|
|
219
|
-
test('
|
|
220
|
-
await _testPos(readCsv(
|
|
231
|
+
test('FastaRna1', async () => {
|
|
232
|
+
await _testPos(readCsv(csvTests.fastaRna1), 'seq',
|
|
233
|
+
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false);
|
|
221
234
|
});
|
|
222
|
-
test('
|
|
223
|
-
await _testPos(readCsv(
|
|
235
|
+
test('FastaPt1', async () => {
|
|
236
|
+
await _testPos(readCsv(csvTests.fastaPt1), 'seq',
|
|
237
|
+
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
224
238
|
});
|
|
225
|
-
test('
|
|
226
|
-
await _testPos(readCsv(
|
|
239
|
+
test('FastaUn', async () => {
|
|
240
|
+
await _testPos(readCsv(csvTests.fastaUn), 'seq',
|
|
241
|
+
NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 12, true);
|
|
242
|
+
});
|
|
243
|
+
test('FastaMsaDna1', async () => {
|
|
244
|
+
await _testPos(readCsv(csvTests.fastaMsaDna1), 'seq',
|
|
245
|
+
NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false);
|
|
227
246
|
});
|
|
228
247
|
|
|
229
|
-
test('
|
|
230
|
-
await _testPos(readCsv(
|
|
231
|
-
|
|
248
|
+
test('FastaMsaPt1', async () => {
|
|
249
|
+
await _testPos(readCsv(csvTests.fastaMsaPt1), 'seq',
|
|
250
|
+
NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false);
|
|
232
251
|
});
|
|
233
252
|
|
|
234
253
|
test('SepDna', async () => {
|
|
235
|
-
await _testPos(readCsv(
|
|
254
|
+
await _testPos(readCsv(csvTests.sepDna), 'seq',
|
|
255
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false, '*');
|
|
236
256
|
});
|
|
237
257
|
test('SepRna', async () => {
|
|
238
|
-
await _testPos(readCsv(
|
|
258
|
+
await _testPos(readCsv(csvTests.sepRna), 'seq',
|
|
259
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false, '*');
|
|
239
260
|
});
|
|
240
261
|
test('SepPt', async () => {
|
|
241
|
-
await _testPos(readCsv(
|
|
242
|
-
|
|
262
|
+
await _testPos(readCsv(csvTests.sepPt), 'seq',
|
|
263
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.PT, 20, false, '-');
|
|
243
264
|
});
|
|
244
265
|
test('SepUn1', async () => {
|
|
245
|
-
await _testPos(readCsv(
|
|
246
|
-
|
|
266
|
+
await _testPos(readCsv(csvTests.sepUn1), 'seq',
|
|
267
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 8, true, '-');
|
|
247
268
|
});
|
|
248
269
|
test('SepUn2', async () => {
|
|
249
|
-
await _testPos(readCsv(
|
|
250
|
-
|
|
270
|
+
await _testPos(readCsv(csvTests.sepUn2), 'seq',
|
|
271
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 9, true, '/');
|
|
251
272
|
});
|
|
252
273
|
|
|
253
274
|
test('SepMsaN1', async () => {
|
|
254
|
-
await _testPos(readCsv(
|
|
255
|
-
|
|
275
|
+
await _testPos(readCsv(csvTests.sepMsaDna1), 'seq',
|
|
276
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false, '-');
|
|
256
277
|
});
|
|
257
278
|
|
|
258
|
-
test('
|
|
259
|
-
await
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
|
|
263
|
-
});
|
|
264
|
-
test('SamplesFastaCsvNegativeLength', async () => {
|
|
265
|
-
await _testNeg(readSamples(Samples.fastaCsv), 'Length');
|
|
266
|
-
});
|
|
267
|
-
test('SamplesFastaCsvNegativeUniProtKB', async () => {
|
|
268
|
-
await _testNeg(readSamples(Samples.fastaCsv), 'UniProtKB');
|
|
279
|
+
test('samplesFastaCsv', async () => {
|
|
280
|
+
await _testDf(readSamples(Samples.fastaCsv), {
|
|
281
|
+
'Sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
|
|
282
|
+
});
|
|
269
283
|
});
|
|
270
284
|
|
|
271
|
-
test('
|
|
272
|
-
await
|
|
273
|
-
'sequence'
|
|
285
|
+
test('samplesFastaFasta', async () => {
|
|
286
|
+
await _testDf(readSamples(Samples.fastaFasta), {
|
|
287
|
+
'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
|
|
288
|
+
});
|
|
274
289
|
});
|
|
275
290
|
|
|
276
291
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
277
292
|
// test('samplesPeptidesComplexPositiveAlignedSequence', async () => {
|
|
278
293
|
// await _testPos(readSamples(Samples.peptidesComplex), 'AlignedSequence', 'separator:SEQ:UN', '-');
|
|
279
294
|
// });
|
|
280
|
-
test('
|
|
281
|
-
await
|
|
282
|
-
});
|
|
283
|
-
test('SamplesPeptidesComplexNegativeMeasured', async () => {
|
|
284
|
-
await _testNeg(readSamples(Samples.peptidesComplex), 'Measured');
|
|
285
|
-
});
|
|
286
|
-
test('SamplesPeptidesComplexNegativeValue', async () => {
|
|
287
|
-
await _testNeg(readSamples(Samples.peptidesComplex), 'Value');
|
|
288
|
-
});
|
|
289
|
-
|
|
290
|
-
test('samplesMsaComplexUn', async () => {
|
|
291
|
-
await _testPos(readSamples(Samples.msaComplex), 'MSA',
|
|
292
|
-
bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.UN, 161, true, '/');
|
|
293
|
-
});
|
|
294
|
-
test('samplesMsaComplexNegativeActivity', async () => {
|
|
295
|
-
await _testNeg(readSamples(Samples.msaComplex), 'Activity');
|
|
295
|
+
test('samplesPeptidesComplex', async () => {
|
|
296
|
+
await _testDf(readSamples(Samples.peptidesComplex), {} /* no positive */);
|
|
296
297
|
});
|
|
297
298
|
|
|
298
|
-
test('
|
|
299
|
-
await
|
|
299
|
+
test('samplesMsaComplex', async () => {
|
|
300
|
+
await _testDf(readSamples(Samples.msaComplex), {
|
|
301
|
+
'MSA': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 161, true, '/'),
|
|
302
|
+
});
|
|
300
303
|
});
|
|
301
304
|
|
|
302
|
-
test('
|
|
303
|
-
await
|
|
305
|
+
test('samplesIdCsv', async () => {
|
|
306
|
+
await _testDf(readSamples(Samples.testIdCsv), {} /* no positive */);
|
|
304
307
|
});
|
|
305
308
|
|
|
306
|
-
test('
|
|
307
|
-
await
|
|
309
|
+
test('samplesSarSmallCsv', async () => {
|
|
310
|
+
await _testDf(readSamples(Samples.testSmilesCsv), {} /* nopositive */);
|
|
308
311
|
});
|
|
309
312
|
|
|
310
|
-
test('
|
|
311
|
-
await
|
|
313
|
+
test('samplesHelmCsv', async () => {
|
|
314
|
+
await _testDf(readSamples(Samples.helmCsv), {
|
|
315
|
+
'HELM': new PosCol(NOTATION.HELM, null, null, 160, true),
|
|
316
|
+
});
|
|
312
317
|
});
|
|
313
318
|
|
|
314
|
-
// sample_testHelm.
|
|
319
|
+
// sample_testHelm.csv
|
|
315
320
|
// columns: ID,Test type,HELM string,Valid?,Mol Weight,Mol Formula,SMILES
|
|
316
|
-
test('
|
|
317
|
-
await
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
321
|
-
});
|
|
322
|
-
test('samplesTestHelmPositiveHelmString', async () => {
|
|
323
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', bio.NOTATION.HELM, null, null, 9, true, null);
|
|
324
|
-
});
|
|
325
|
-
test('samplesTestHelmNegativeValid', async () => {
|
|
326
|
-
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
327
|
-
});
|
|
328
|
-
test('samplesTestHelmNegativeMolWeight', async () => {
|
|
329
|
-
await _testNeg(readSamples(Samples.testHelmCsv), 'Mol Weight');
|
|
330
|
-
});
|
|
331
|
-
test('samplesTestHelmNegativeMolFormula', async () => {
|
|
332
|
-
await _testNeg(readSamples(Samples.testHelmCsv), 'Mol Formula');
|
|
321
|
+
test('samplesTestHelmCsv', async () => {
|
|
322
|
+
await _testDf(readSamples(Samples.testHelmCsv), {
|
|
323
|
+
'HELM string': new PosCol(NOTATION.HELM, null, null, 9, true),
|
|
324
|
+
});
|
|
333
325
|
});
|
|
334
|
-
test('samplesTestHelmNegativeSmiles', async () => {
|
|
335
|
-
await _testNeg(readSamples(Samples.testHelmCsv), 'Smiles');
|
|
336
|
-
});
|
|
337
|
-
|
|
338
|
-
test('samplesTestDemogNegativeAll', async () => {
|
|
339
|
-
const dfFunc: DfReaderFunc = readSamples(Samples.testDemogCsv);
|
|
340
|
-
const df: DG.DataFrame = await dfFunc();
|
|
341
326
|
|
|
342
|
-
|
|
343
|
-
|
|
327
|
+
test('samplesTestDemogCsv', async () => {
|
|
328
|
+
await _testDf(readSamples(Samples.testDemogCsv), {} /* no positive */);
|
|
344
329
|
});
|
|
345
330
|
|
|
346
|
-
test('
|
|
347
|
-
await
|
|
331
|
+
test('samplesTestSmiles2Csv', async () => {
|
|
332
|
+
await _testDf(readSamples(Samples.testSmiles2Csv), {} /* no positive */);
|
|
348
333
|
});
|
|
349
334
|
|
|
350
|
-
test('
|
|
351
|
-
await
|
|
335
|
+
test('samplesTestSmilesShort', async () => {
|
|
336
|
+
await _testDf(readSamples(Samples.testSmilesShort), {} /* no positive */);
|
|
352
337
|
});
|
|
353
338
|
|
|
354
339
|
test('samplesTestActivityCliffsNegativeSmiles', async () => {
|
|
355
|
-
await
|
|
340
|
+
await _testDf(readSamples(Samples.testActivityCliffsCsv), {} /* no positive */);
|
|
356
341
|
});
|
|
357
342
|
|
|
358
|
-
test('
|
|
359
|
-
await
|
|
360
|
-
|
|
343
|
+
test('samplesFastaPtCsv', async () => {
|
|
344
|
+
await _testDf(readSamples(Samples.fastaPtCsv), {
|
|
345
|
+
'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
|
|
346
|
+
});
|
|
361
347
|
});
|
|
362
348
|
|
|
363
|
-
test('
|
|
364
|
-
await
|
|
349
|
+
test('samplesTestCerealCsv', async () => {
|
|
350
|
+
await _testDf(readSamples(Samples.testCerealCsv), {} /* no positive */);
|
|
365
351
|
});
|
|
366
352
|
|
|
367
|
-
test('
|
|
368
|
-
await
|
|
369
|
-
});
|
|
370
|
-
test('samplesTestSpgi100NegativeScaffoldNames', async () => {
|
|
371
|
-
await _testNeg(readSamples(Samples.testSpgi100), 'Scaffold Names');
|
|
372
|
-
});
|
|
373
|
-
test('samplesTestSpgi100NegativePrimaryScaffoldName', async () => {
|
|
374
|
-
await _testNeg(readSamples(Samples.testSpgi100), 'Primary Scaffold Name');
|
|
375
|
-
});
|
|
376
|
-
test('samplesTestSpgi100NegativeSampleName', async () => {
|
|
377
|
-
await _testNeg(readSamples(Samples.testSpgi100), 'Sample Name');
|
|
353
|
+
test('samplesTestUnichemSources', async () => {
|
|
354
|
+
await _testDf(readSamples(Samples.testUnichemSources), {} /* no positive */);
|
|
378
355
|
});
|
|
379
356
|
|
|
380
|
-
test('
|
|
381
|
-
await
|
|
382
|
-
});
|
|
383
|
-
test('samplesTestUnichemSourcesNegativeBaseIdUrl', async () => {
|
|
384
|
-
await _testNeg(readSamples(Samples.testUnichemSources), 'base_id_url');
|
|
357
|
+
test('samplesTestDmvOffices', async () => {
|
|
358
|
+
await _testDf(readSamples(Samples.testDmvOffices), {} /* no positive */);
|
|
385
359
|
});
|
|
386
360
|
|
|
387
|
-
test('
|
|
388
|
-
await
|
|
361
|
+
test('samplesTestAlertCollection', async () => {
|
|
362
|
+
await _testDf(readSamples(Samples.testAlertCollection), {} /* no positive */);
|
|
389
363
|
});
|
|
390
|
-
|
|
391
|
-
|
|
364
|
+
|
|
365
|
+
test('samplesTestSpgi', async () => {
|
|
366
|
+
await _testDf(readSamples(Samples.testSpgi), {} /* no positive */);
|
|
392
367
|
});
|
|
393
368
|
|
|
394
|
-
test('
|
|
395
|
-
await
|
|
369
|
+
test('samplesTestSpgi100', async () => {
|
|
370
|
+
await _testDf(readSamples(Samples.testSpgi100), {} /* no positive */);
|
|
396
371
|
});
|
|
397
372
|
|
|
398
|
-
test('
|
|
399
|
-
await
|
|
373
|
+
test('samplesTestUrl', async () => {
|
|
374
|
+
await _testDf(readSamples(Samples.testUrl), {} /* no positive */);
|
|
400
375
|
});
|
|
401
376
|
});
|
|
402
377
|
|
|
403
378
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
404
379
|
const df: DG.DataFrame = await readDf();
|
|
405
|
-
const col: DG.Column = df.
|
|
380
|
+
const col: DG.Column = df.getCol(colName)!;
|
|
406
381
|
const semType: string = await grok.functions
|
|
407
382
|
.call('Bio:detectMacromolecule', {col: col}) as unknown as string;
|
|
408
383
|
if (semType)
|
|
@@ -429,14 +404,14 @@ export async function _testPos(
|
|
|
429
404
|
if (semType)
|
|
430
405
|
col.semType = semType;
|
|
431
406
|
|
|
432
|
-
expect(col.semType
|
|
407
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
433
408
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
434
|
-
expect(col.getTag(
|
|
435
|
-
expect(col.getTag(
|
|
409
|
+
expect(col.getTag(bioTAGS.aligned), aligned);
|
|
410
|
+
expect(col.getTag(bioTAGS.alphabet), alphabet);
|
|
436
411
|
if (separator)
|
|
437
|
-
expect(col.getTag(
|
|
412
|
+
expect(col.getTag(bioTAGS.separator), separator);
|
|
438
413
|
|
|
439
|
-
const uh = new
|
|
414
|
+
const uh = new UnitsHandler(col);
|
|
440
415
|
expect(uh.getAlphabetSize(), alphabetSize);
|
|
441
416
|
expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
|
|
442
417
|
if (!uh.isHelm()) {
|
|
@@ -445,3 +420,40 @@ export async function _testPos(
|
|
|
445
420
|
}
|
|
446
421
|
}
|
|
447
422
|
|
|
423
|
+
class PosCol {
|
|
424
|
+
constructor(
|
|
425
|
+
public readonly units: string,
|
|
426
|
+
public readonly aligned: string | null,
|
|
427
|
+
public readonly alphabet: string | null,
|
|
428
|
+
public readonly alphabetSize: number,
|
|
429
|
+
public readonly alphabetIsMultichar: boolean,
|
|
430
|
+
public readonly separator?: string
|
|
431
|
+
) { };
|
|
432
|
+
};
|
|
433
|
+
|
|
434
|
+
export async function _testDf(readDf: DfReaderFunc, posCols: { [colName: string]: PosCol }): Promise<void> {
|
|
435
|
+
const df: DG.DataFrame = await readDf();
|
|
436
|
+
const errList: string[] = [];
|
|
437
|
+
for (const colName of df.columns.names()) {
|
|
438
|
+
if (colName in posCols) {
|
|
439
|
+
const p = posCols[colName];
|
|
440
|
+
try {
|
|
441
|
+
await _testPos(readDf, colName, p.units, p.aligned, p.alphabet,
|
|
442
|
+
p.alphabetSize, p.alphabetIsMultichar, p.separator);
|
|
443
|
+
} catch (err: any) {
|
|
444
|
+
const errMsg: string = err.toString();
|
|
445
|
+
errList.push(`Positive col '${colName}' failed: ${errMsg}`);
|
|
446
|
+
}
|
|
447
|
+
} else {
|
|
448
|
+
try {
|
|
449
|
+
await _testNeg(readDf, colName);
|
|
450
|
+
} catch (err: any) {
|
|
451
|
+
const errMsg: string = err.toString();
|
|
452
|
+
errList.push(`Negative col '${colName}' failed: ${errMsg}`);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if (errList.length > 0)
|
|
458
|
+
throw new Error(errList.join('\n'));
|
|
459
|
+
}
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
7
6
|
import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
|
|
7
|
+
import {splitterAsFasta} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
8
|
|
|
9
9
|
type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
|
|
10
10
|
|
|
11
11
|
category('fastaExport', () => {
|
|
12
|
-
|
|
13
12
|
enum WrapDataTest {
|
|
14
13
|
single = 'single',
|
|
15
14
|
multi = 'multi'
|
|
@@ -88,7 +87,7 @@ MRGGL
|
|
|
88
87
|
});
|
|
89
88
|
|
|
90
89
|
function _testWrapSequence(testKey: string, lineWidth: number = 10) {
|
|
91
|
-
const splitter =
|
|
90
|
+
const splitter = splitterAsFasta;
|
|
92
91
|
|
|
93
92
|
const srcSeq: string = wrapData[testKey].src;
|
|
94
93
|
const wrapRes: string[] = wrapSequence(srcSeq, splitter, lineWidth);
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
import {test, after, before, category, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
|
|
7
|
+
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
8
|
+
import {LIB_STORAGE_NAME} from '../utils/monomer-lib';
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
category('monomerLibraries', () => {
|
|
12
|
+
let monomerLibHelper: IMonomerLibHelper;
|
|
13
|
+
/** Backup actual user's monomer libraries settings */
|
|
14
|
+
let userLibrariesSettings: any = null;
|
|
15
|
+
|
|
16
|
+
before(async () => {
|
|
17
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
18
|
+
userLibrariesSettings = await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
after(async () => {
|
|
22
|
+
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, userLibrariesSettings, true);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
test('default', async () => {
|
|
26
|
+
// Clear settings to test default
|
|
27
|
+
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
|
|
28
|
+
await monomerLibHelper.loadLibraries(true); // test defaultLib
|
|
29
|
+
|
|
30
|
+
// Currently default monomer lib is empty
|
|
31
|
+
const currentMonomerLib = monomerLibHelper.getBioLib();
|
|
32
|
+
expect(currentMonomerLib.getTypes().length, 0);
|
|
33
|
+
});
|
|
34
|
+
});
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
import {category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {runPepsea} from '../utils/pepsea';
|
|
5
|
+
|
|
6
|
+
category('PepSeA', () => {
|
|
7
|
+
const testCsv = `HELM,MSA
|
|
8
|
+
"PEPTIDE1{F.L.R.G.W.[MeF].Y.S.N.N.C}$$$$","F.L.R.G.W.MeF.Y..S.N.N.C"
|
|
9
|
+
"PEPTIDE1{F.L.R.G.Y.[MeF].Y.W.N.C}$$$$","F.L.R.G.Y.MeF.Y.W...N.C"
|
|
10
|
+
"PEPTIDE1{F.G.Y.[MeF].Y.W.S.D.N.C}$$$$","F...G.Y.MeF.Y.W.S.D.N.C"
|
|
11
|
+
"PEPTIDE1{F.L.R.G.Y.[MeF].Y.W.S.N.D.C}$$$$","F.L.R.G.Y.MeF.Y.W.S.N.D.C"
|
|
12
|
+
"PEPTIDE1{F.V.R.G.Y.[MeF].Y.W.S.N.C}$$$$","F.V.R.G.Y.MeF.Y.W.S..N.C"`;
|
|
13
|
+
|
|
14
|
+
test('Basic alignment', async () => {
|
|
15
|
+
const table = DG.DataFrame.fromCsv(testCsv);
|
|
16
|
+
const alignedCol = await runPepsea(table.getCol('HELM'), 'msa(HELM)');
|
|
17
|
+
const alignedTestCol = table.getCol('MSA');
|
|
18
|
+
for (let i = 0; i < alignedCol.length; ++i)
|
|
19
|
+
expect(alignedCol.get(i) == alignedTestCol.get(i), true);
|
|
20
|
+
}, {skipReason: 'GROK-12764'});
|
|
21
|
+
});
|