@datagrok/bio 2.1.11 → 2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +11 -12
  2. package/css/helm.css +10 -0
  3. package/detectors.js +83 -59
  4. package/dist/package-test.js +2 -68651
  5. package/dist/package-test.js.map +1 -0
  6. package/dist/package.js +2 -66040
  7. package/dist/package.js.map +1 -0
  8. package/dockerfiles/Dockerfile +86 -0
  9. package/files/icons/composition-analysis.svg +17 -0
  10. package/files/icons/sequence-diversity-viewer.svg +4 -0
  11. package/files/icons/sequence-similarity-viewer.svg +4 -0
  12. package/files/icons/vdregions-viewer.svg +22 -0
  13. package/files/icons/weblogo-viewer.svg +7 -0
  14. package/files/tests/testUrl.csv +11 -0
  15. package/files/tests/toAtomicLevelTest.csv +4 -0
  16. package/package.json +29 -32
  17. package/src/analysis/sequence-activity-cliffs.ts +15 -13
  18. package/src/analysis/sequence-diversity-viewer.ts +3 -2
  19. package/src/analysis/sequence-search-base-viewer.ts +4 -2
  20. package/src/analysis/sequence-similarity-viewer.ts +4 -4
  21. package/src/analysis/sequence-space.ts +2 -1
  22. package/src/calculations/monomerLevelMols.ts +6 -6
  23. package/src/package-test.ts +9 -2
  24. package/src/package.ts +230 -145
  25. package/src/substructure-search/substructure-search.ts +25 -22
  26. package/src/tests/Palettes-test.ts +9 -9
  27. package/src/tests/WebLogo-positions-test.ts +131 -68
  28. package/src/tests/_first-tests.ts +9 -0
  29. package/src/tests/activity-cliffs-tests.ts +8 -7
  30. package/src/tests/activity-cliffs-utils.ts +17 -9
  31. package/src/tests/bio-tests.ts +30 -21
  32. package/src/tests/checkInputColumn-tests.ts +17 -17
  33. package/src/tests/converters-test.ts +81 -46
  34. package/src/tests/detectors-benchmark-tests.ts +17 -17
  35. package/src/tests/detectors-tests.ts +190 -178
  36. package/src/tests/fasta-export-tests.ts +2 -3
  37. package/src/tests/monomer-libraries-tests.ts +34 -0
  38. package/src/tests/pepsea-tests.ts +21 -0
  39. package/src/tests/renderers-test.ts +33 -29
  40. package/src/tests/sequence-space-test.ts +6 -4
  41. package/src/tests/similarity-diversity-tests.ts +4 -4
  42. package/src/tests/splitters-test.ts +6 -7
  43. package/src/tests/substructure-filters-tests.ts +23 -1
  44. package/src/tests/utils/sequences-generators.ts +7 -7
  45. package/src/tests/utils.ts +2 -1
  46. package/src/tests/viewers.ts +16 -0
  47. package/src/utils/cell-renderer.ts +116 -54
  48. package/src/utils/constants.ts +7 -6
  49. package/src/utils/convert.ts +17 -11
  50. package/src/utils/monomer-lib.ts +174 -0
  51. package/src/utils/multiple-sequence-alignment.ts +49 -26
  52. package/src/utils/pepsea.ts +78 -0
  53. package/src/utils/save-as-fasta.ts +9 -8
  54. package/src/utils/ui-utils.ts +15 -3
  55. package/src/viewers/vd-regions-viewer.ts +125 -83
  56. package/src/viewers/web-logo-viewer.ts +1031 -0
  57. package/src/widgets/bio-substructure-filter.ts +38 -24
  58. package/tsconfig.json +71 -72
  59. package/webpack.config.js +4 -11
  60. package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -8988
  61. package/jest.config.js +0 -33
  62. package/src/__jest__/remote.test.ts +0 -77
  63. package/src/__jest__/test-node.ts +0 -98
  64. package/test-Bio-91c83d8913ff-bb573307.html +0 -392
@@ -1,11 +1,12 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
- import * as bio from '@datagrok-libraries/bio';
5
4
 
6
5
  import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
7
6
 
8
7
  import {importFasta} from '../package';
8
+ import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
9
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
10
 
10
11
  /*
11
12
  // snippet to list df columns of semType='Macromolecule' (false positive)
@@ -21,103 +22,115 @@ for (let i = 0; i < df.columns.length; i++) {
21
22
  type DfReaderFunc = () => Promise<DG.DataFrame>;
22
23
 
23
24
  category('detectors', () => {
24
- const csvDf1: string = `col1
25
- 1
26
- 2
27
- 3`;
25
+ const enum csvTests {
26
+ negEmpty = 'negEmpty',
27
+ neg1 = 'neg1',
28
+ neg2 = 'neg2',
29
+ neg3 = 'neg3',
30
+ negSmiles = 'negSmiles',
31
+ fastaDna1 = 'csvFastaDna1',
32
+ fastaRna1 = 'fastaRna1',
33
+ fastaPt1 = 'fastaPt1',
34
+ fastaUn = 'fastaUn',
35
+ sepDna = 'sepDna',
36
+ sepRna = 'sepRna',
37
+ sepPt = 'sepPt',
38
+ sepUn1 = 'sepUn1',
39
+ sepUn2 = 'sepUn2',
40
+ sepMsaDna1 = 'sepMsaDna1',
41
+ fastaMsaDna1 = 'fastaMsaDna1',
42
+ fastaMsaPt1 = 'fastaMsaPt1',
43
+ }
28
44
 
29
- const csvDfEmpty: string = `id,col1
45
+ const csvData = new class {
46
+ [csvTests.negEmpty]: string = `id,col1
30
47
  1,
31
48
  2,
32
49
  3,
33
50
  4,
34
51
  5,`;
35
-
36
- const csvDf2: string = `col1
52
+ [csvTests.neg1]: string = `col1
53
+ 1
54
+ 2
55
+ 3`;
56
+ [csvTests.neg2]: string = `col1
37
57
  4
38
58
  5
39
59
  6
40
60
  7`;
41
-
42
- const csvDf3: string = `col1
61
+ [csvTests.neg3]: string = `col1
43
62
  8
44
63
  9
45
64
  10
46
65
  11
47
66
  12`;
48
-
49
- const csvDfSmiles: string = `col1
67
+ [csvTests.negSmiles]: string = `col1
50
68
  CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
51
69
  C1CCCCC1
52
70
  CCCCCC
53
71
  `;
54
-
55
- const csvDfDna1: string = `seq
72
+ [csvTests.fastaDna1]: string = `seq
56
73
  ACGTC
57
74
  CAGTGT
58
75
  TTCAAC
59
76
  `;
60
-
61
- const csvDfRna1: string = `seq
77
+ [csvTests.fastaRna1]: string = `seq
62
78
  ACGUC
63
79
  CAGUGU
64
80
  UUCAAC
65
81
  `;
66
-
67
- /** Pure amino acids sequence */
68
- const csvDfPt1: string = `seq
82
+ /** Pure amino acids sequence */
83
+ [csvTests.fastaPt1]: string = `seq
69
84
  FWPHEY
70
85
  YNRQWYV
71
86
  MKPSEYV
72
87
  `;
73
-
74
- const csvDfSepDna: string = `seq
88
+ [csvTests.fastaUn]: string = `seq
89
+ [meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
90
+ [meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
91
+ [Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
92
+ `;
93
+ [csvTests.sepDna]: string = `seq
75
94
  A*C*G*T*C
76
95
  C*A*G*T*G*T
77
96
  T*T*C*A*A*C
78
97
  `;
79
-
80
- const csvDfSepRna: string = `seq
98
+ [csvTests.sepRna]: string = `seq
81
99
  A*C*G*U*C
82
100
  C*A*G*U*G*U
83
101
  U*U*C*A*A*C
84
102
  `;
85
-
86
- const csvDfSepPt: string = `seq
103
+ [csvTests.sepPt]: string = `seq
87
104
  F-W-P-H-E-Y
88
105
  Y-N-R-Q-W-Y-V
89
106
  M-K-P-S-E-Y-V
90
107
  `;
91
-
92
- const csvDfSepUn1: string = `seq
108
+ [csvTests.sepUn1]: string = `seq
93
109
  abc-dfgg-abc1-cfr3-rty-wert
94
110
  rut12-her2-rty-wert-abc-abc1-dfgg
95
111
  rut12-rty-her2-abc-cfr3-wert-rut12
96
112
  `;
97
-
98
- const csvDfSepUn2: string = `seq
113
+ [csvTests.sepUn2]: string = `seq
99
114
  abc/dfgg/abc1/cfr3/rty/wert
100
115
  rut12/her2/rty/wert//abc/abc1/dfgg
101
116
  rut12/rty/her2/abc/cfr3//wert/rut12
102
117
  `;
103
-
104
- const csvDfSepMsaDna1: string = `seq
118
+ [csvTests.sepMsaDna1]: string = `seq
105
119
  A-C--G-T--C-T
106
120
  C-A-C--T--G-T
107
121
  A-C-C-G-T-A-C-T
108
122
  `;
109
-
110
- const csvDfMsaDna1: string = `seq
123
+ [csvTests.fastaMsaDna1]: string = `seq
111
124
  AC-GT-CT
112
125
  CAC-T-GT
113
126
  ACCGTACT
114
127
  `;
115
-
116
- const csvDfMsaPt1: string = `seq
128
+ [csvTests.fastaMsaPt1]: string = `seq
117
129
  FWR-WYV-KHP
118
130
  YNR-WYV-KHP
119
131
  MWRSWY-CKHP
120
132
  `;
133
+ }();
121
134
 
122
135
  const enum Samples {
123
136
  peptidesComplex = 'peptidesComplex',
@@ -135,11 +148,12 @@ MWRSWY-CKHP
135
148
  testSmilesShort = 'testSmilesShort',
136
149
  testCerealCsv = 'testCerealCsv',
137
150
  testActivityCliffsCsv = 'testActivityCliffsCsv',
138
- testSpgi100 = 'testSpgi100',
139
151
  testUnichemSources = 'testUnichemSources',
140
152
  testDmvOffices = 'testDmvOffices',
141
153
  testAlertCollection = 'testAlertCollection',
142
154
  testSpgi = 'testSpgi',
155
+ testSpgi100 = 'testSpgi100',
156
+ testUrl = 'testUrl',
143
157
  }
144
158
 
145
159
  const samples: { [key: string]: string } = {
@@ -158,11 +172,12 @@ MWRSWY-CKHP
158
172
  [Samples.testSmilesShort]: 'System:AppData/Bio/tests/testSmilesShort.csv',
159
173
  [Samples.testActivityCliffsCsv]: 'System:AppData/Bio/tests/testActivityCliffs.csv', // smiles
160
174
  [Samples.testCerealCsv]: 'System:AppData/Bio/tests/testCereal.csv',
161
- [Samples.testSpgi100]: 'System:AppData/Bio/tests/testSpgi100.csv',
162
175
  [Samples.testUnichemSources]: 'System:AppData/Bio/tests/testUnichemSources.csv',
163
176
  [Samples.testDmvOffices]: 'System:AppData/Bio/tests/testDmvOffices.csv',
164
177
  [Samples.testAlertCollection]: 'System:AppData/Bio/tests/testAlertCollection.csv',
178
+ [Samples.testSpgi100]: 'System:AppData/Bio/tests/testSpgi100.csv',
165
179
  [Samples.testSpgi]: 'System:AppData/Bio/tests/SPGI-derived.csv',
180
+ [Samples.testUrl]: 'System:AppData/Bio/tests/testUrl.csv',
166
181
  };
167
182
 
168
183
  const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
@@ -192,217 +207,177 @@ MWRSWY-CKHP
192
207
  return df;
193
208
  }
194
209
 
195
- const _csvDfs: { [key: string]: Promise<DG.DataFrame> } = {};
196
- const readCsv: (key: string, csv: string) => DfReaderFunc = (key: string, csv: string) => {
210
+ const readCsv: (key: csvTests) => DfReaderFunc = (key: keyof typeof csvData) => {
197
211
  return async () => {
198
- if (!(key in _csvDfs)) {
199
- _csvDfs[key] = (async (): Promise<DG.DataFrame> => {
200
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
201
- await grok.data.detectSemanticTypes(df);
202
- return df;
203
- })();
204
- }
205
- return _csvDfs[key];
212
+ // Always recreate test data frame from CSV for reproducible detector behavior in tests.
213
+ const csv: string = csvData[key];
214
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
215
+ await grok.data.detectSemanticTypes(df);
216
+ return df;
206
217
  };
207
218
  };
208
219
 
209
220
 
210
- test('NegativeEmpty', async () => { await _testNeg(readCsv('csvDfEmpty', csvDfEmpty), 'col1'); });
211
- test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
212
- test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
213
- test('Negative3', async () => { await _testNeg(readCsv('csvDf3', csvDf3), 'col1'); });
214
- test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
221
+ test('NegativeEmpty', async () => { await _testNeg(readCsv(csvTests.negEmpty), 'col1'); });
222
+ test('Negative1', async () => { await _testNeg(readCsv(csvTests.neg1), 'col1'); });
223
+ test('Negative2', async () => { await _testNeg(readCsv(csvTests.neg2), 'col1'); });
224
+ test('Negative3', async () => { await _testNeg(readCsv(csvTests.neg3), 'col1'); });
225
+ test('NegativeSmiles', async () => { await _testNeg(readCsv(csvTests.negSmiles), 'col1'); });
215
226
 
216
- test('Dna1', async () => {
217
- await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.DNA, 4, false);
227
+ test('FastaDna1', async () => {
228
+ await _testPos(readCsv(csvTests.fastaDna1), 'seq',
229
+ NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false);
218
230
  });
219
- test('Rna1', async () => {
220
- await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.RNA, 4, false);
231
+ test('FastaRna1', async () => {
232
+ await _testPos(readCsv(csvTests.fastaRna1), 'seq',
233
+ NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false);
221
234
  });
222
- test('AA1', async () => {
223
- await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
235
+ test('FastaPt1', async () => {
236
+ await _testPos(readCsv(csvTests.fastaPt1), 'seq',
237
+ NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
224
238
  });
225
- test('MsaDna1', async () => {
226
- await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.DNA, 4, false);
239
+ test('FastaUn', async () => {
240
+ await _testPos(readCsv(csvTests.fastaUn), 'seq',
241
+ NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 12, true);
242
+ });
243
+ test('FastaMsaDna1', async () => {
244
+ await _testPos(readCsv(csvTests.fastaMsaDna1), 'seq',
245
+ NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false);
227
246
  });
228
247
 
229
- test('MsaAA1', async () => {
230
- await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', bio.NOTATION.FASTA,
231
- bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.PT, 20, false);
248
+ test('FastaMsaPt1', async () => {
249
+ await _testPos(readCsv(csvTests.fastaMsaPt1), 'seq',
250
+ NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false);
232
251
  });
233
252
 
234
253
  test('SepDna', async () => {
235
- await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.DNA, 4, false, '*');
254
+ await _testPos(readCsv(csvTests.sepDna), 'seq',
255
+ NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false, '*');
236
256
  });
237
257
  test('SepRna', async () => {
238
- await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.RNA, 4, false, '*');
258
+ await _testPos(readCsv(csvTests.sepRna), 'seq',
259
+ NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false, '*');
239
260
  });
240
261
  test('SepPt', async () => {
241
- await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq',
242
- bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false, '-');
262
+ await _testPos(readCsv(csvTests.sepPt), 'seq',
263
+ NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.PT, 20, false, '-');
243
264
  });
244
265
  test('SepUn1', async () => {
245
- await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq',
246
- bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.UN, 8, true, '-');
266
+ await _testPos(readCsv(csvTests.sepUn1), 'seq',
267
+ NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 8, true, '-');
247
268
  });
248
269
  test('SepUn2', async () => {
249
- await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq',
250
- bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.UN, 9, true, '/');
270
+ await _testPos(readCsv(csvTests.sepUn2), 'seq',
271
+ NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 9, true, '/');
251
272
  });
252
273
 
253
274
  test('SepMsaN1', async () => {
254
- await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq',
255
- bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.DNA, 4, false, '-');
275
+ await _testPos(readCsv(csvTests.sepMsaDna1), 'seq',
276
+ NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false, '-');
256
277
  });
257
278
 
258
- test('SamplesFastaCsvPt', async () => {
259
- await _testPos(readSamples(Samples.fastaCsv), 'sequence', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
260
- });
261
- test('SamplesFastaCsvNegativeEntry', async () => {
262
- await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
263
- });
264
- test('SamplesFastaCsvNegativeLength', async () => {
265
- await _testNeg(readSamples(Samples.fastaCsv), 'Length');
266
- });
267
- test('SamplesFastaCsvNegativeUniProtKB', async () => {
268
- await _testNeg(readSamples(Samples.fastaCsv), 'UniProtKB');
279
+ test('samplesFastaCsv', async () => {
280
+ await _testDf(readSamples(Samples.fastaCsv), {
281
+ 'Sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
282
+ });
269
283
  });
270
284
 
271
- test('SamplesFastaFastaPt', async () => {
272
- await _testPos(readSamples(Samples.fastaFasta, readFileFasta),
273
- 'sequence', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
285
+ test('samplesFastaFasta', async () => {
286
+ await _testDf(readSamples(Samples.fastaFasta), {
287
+ 'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
288
+ });
274
289
  });
275
290
 
276
291
  // peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
277
292
  // test('samplesPeptidesComplexPositiveAlignedSequence', async () => {
278
293
  // await _testPos(readSamples(Samples.peptidesComplex), 'AlignedSequence', 'separator:SEQ:UN', '-');
279
294
  // });
280
- test('samplesPeptidesComplexNegativeID', async () => {
281
- await _testNeg(readSamples(Samples.peptidesComplex), 'ID');
282
- });
283
- test('SamplesPeptidesComplexNegativeMeasured', async () => {
284
- await _testNeg(readSamples(Samples.peptidesComplex), 'Measured');
285
- });
286
- test('SamplesPeptidesComplexNegativeValue', async () => {
287
- await _testNeg(readSamples(Samples.peptidesComplex), 'Value');
288
- });
289
-
290
- test('samplesMsaComplexUn', async () => {
291
- await _testPos(readSamples(Samples.msaComplex), 'MSA',
292
- bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.UN, 161, true, '/');
293
- });
294
- test('samplesMsaComplexNegativeActivity', async () => {
295
- await _testNeg(readSamples(Samples.msaComplex), 'Activity');
295
+ test('samplesPeptidesComplex', async () => {
296
+ await _testDf(readSamples(Samples.peptidesComplex), {} /* no positive */);
296
297
  });
297
298
 
298
- test('samplesIdCsvNegativeID', async () => {
299
- await _testNeg(readSamples(Samples.testIdCsv), 'ID');
299
+ test('samplesMsaComplex', async () => {
300
+ await _testDf(readSamples(Samples.msaComplex), {
301
+ 'MSA': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 161, true, '/'),
302
+ });
300
303
  });
301
304
 
302
- test('samplesSarSmallCsvNegativeSmiles', async () => {
303
- await _testNeg(readSamples(Samples.testSmilesCsv), 'smiles');
305
+ test('samplesIdCsv', async () => {
306
+ await _testDf(readSamples(Samples.testIdCsv), {} /* no positive */);
304
307
  });
305
308
 
306
- test('samplesHelmCsvHELM', async () => {
307
- await _testPos(readSamples(Samples.helmCsv), 'HELM', bio.NOTATION.HELM, null, null, 160, true, null);
309
+ test('samplesSarSmallCsv', async () => {
310
+ await _testDf(readSamples(Samples.testSmilesCsv), {} /* nopositive */);
308
311
  });
309
312
 
310
- test('samplesHelmCsvNegativeActivity', async () => {
311
- await _testNeg(readSamples(Samples.helmCsv), 'Activity');
313
+ test('samplesHelmCsv', async () => {
314
+ await _testDf(readSamples(Samples.helmCsv), {
315
+ 'HELM': new PosCol(NOTATION.HELM, null, null, 160, true),
316
+ });
312
317
  });
313
318
 
314
- // sample_testHelm.csb
319
+ // sample_testHelm.csv
315
320
  // columns: ID,Test type,HELM string,Valid?,Mol Weight,Mol Formula,SMILES
316
- test('samplesTestHelmNegativeID', async () => {
317
- await _testNeg(readSamples(Samples.testHelmCsv), 'ID');
318
- });
319
- test('samplesTestHelmNegativeTestType', async () => {
320
- await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
321
- });
322
- test('samplesTestHelmPositiveHelmString', async () => {
323
- await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', bio.NOTATION.HELM, null, null, 9, true, null);
324
- });
325
- test('samplesTestHelmNegativeValid', async () => {
326
- await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
327
- });
328
- test('samplesTestHelmNegativeMolWeight', async () => {
329
- await _testNeg(readSamples(Samples.testHelmCsv), 'Mol Weight');
330
- });
331
- test('samplesTestHelmNegativeMolFormula', async () => {
332
- await _testNeg(readSamples(Samples.testHelmCsv), 'Mol Formula');
321
+ test('samplesTestHelmCsv', async () => {
322
+ await _testDf(readSamples(Samples.testHelmCsv), {
323
+ 'HELM string': new PosCol(NOTATION.HELM, null, null, 9, true),
324
+ });
333
325
  });
334
- test('samplesTestHelmNegativeSmiles', async () => {
335
- await _testNeg(readSamples(Samples.testHelmCsv), 'Smiles');
336
- });
337
-
338
- test('samplesTestDemogNegativeAll', async () => {
339
- const dfFunc: DfReaderFunc = readSamples(Samples.testDemogCsv);
340
- const df: DG.DataFrame = await dfFunc();
341
326
 
342
- for (const col of df.columns.toList())
343
- await _testNeg(dfFunc, col.name);
327
+ test('samplesTestDemogCsv', async () => {
328
+ await _testDf(readSamples(Samples.testDemogCsv), {} /* no positive */);
344
329
  });
345
330
 
346
- test('samplesTestSmiles2NegativeSmiles', async () => {
347
- await _testNeg(readSamples(Samples.testSmiles2Csv), 'SMILES');
331
+ test('samplesTestSmiles2Csv', async () => {
332
+ await _testDf(readSamples(Samples.testSmiles2Csv), {} /* no positive */);
348
333
  });
349
334
 
350
- test('samplesTestSmilesShortNegativeSmiles', async () => {
351
- await _testNeg(readSamples(Samples.testSmilesShort), 'SMILES');
335
+ test('samplesTestSmilesShort', async () => {
336
+ await _testDf(readSamples(Samples.testSmilesShort), {} /* no positive */);
352
337
  });
353
338
 
354
339
  test('samplesTestActivityCliffsNegativeSmiles', async () => {
355
- await _testNeg(readSamples(Samples.testActivityCliffsCsv), 'smiles');
340
+ await _testDf(readSamples(Samples.testActivityCliffsCsv), {} /* no positive */);
356
341
  });
357
342
 
358
- test('samplesFastaPtPosSequence', async () => {
359
- await _testPos(readSamples(Samples.fastaPtCsv), 'sequence',
360
- bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
343
+ test('samplesFastaPtCsv', async () => {
344
+ await _testDf(readSamples(Samples.fastaPtCsv), {
345
+ 'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
346
+ });
361
347
  });
362
348
 
363
- test('samplesTestCerealNegativeCerealName', async () => {
364
- await _testNeg(readSamples(Samples.testCerealCsv), 'cereal_name');
349
+ test('samplesTestCerealCsv', async () => {
350
+ await _testDf(readSamples(Samples.testCerealCsv), {} /* no positive */);
365
351
  });
366
352
 
367
- test('samplesTestSpgi100NegativeStereoCategory', async () => {
368
- await _testNeg(readSamples(Samples.testSpgi100), 'Stereo Category');
369
- });
370
- test('samplesTestSpgi100NegativeScaffoldNames', async () => {
371
- await _testNeg(readSamples(Samples.testSpgi100), 'Scaffold Names');
372
- });
373
- test('samplesTestSpgi100NegativePrimaryScaffoldName', async () => {
374
- await _testNeg(readSamples(Samples.testSpgi100), 'Primary Scaffold Name');
375
- });
376
- test('samplesTestSpgi100NegativeSampleName', async () => {
377
- await _testNeg(readSamples(Samples.testSpgi100), 'Sample Name');
353
+ test('samplesTestUnichemSources', async () => {
354
+ await _testDf(readSamples(Samples.testUnichemSources), {} /* no positive */);
378
355
  });
379
356
 
380
- test('samplesTestUnichemSourcesNegativeSrcUrl', async () => {
381
- await _testNeg(readSamples(Samples.testUnichemSources), 'src_url');
382
- });
383
- test('samplesTestUnichemSourcesNegativeBaseIdUrl', async () => {
384
- await _testNeg(readSamples(Samples.testUnichemSources), 'base_id_url');
357
+ test('samplesTestDmvOffices', async () => {
358
+ await _testDf(readSamples(Samples.testDmvOffices), {} /* no positive */);
385
359
  });
386
360
 
387
- test('samplesTestDmvOfficesNegativeOfficeName', async () => {
388
- await _testNeg(readSamples(Samples.testDmvOffices), 'Office Name');
361
+ test('samplesTestAlertCollection', async () => {
362
+ await _testDf(readSamples(Samples.testAlertCollection), {} /* no positive */);
389
363
  });
390
- test('samplesTestDmvOfficesNegativeCity', async () => {
391
- await _testNeg(readSamples(Samples.testDmvOffices), 'City');
364
+
365
+ test('samplesTestSpgi', async () => {
366
+ await _testDf(readSamples(Samples.testSpgi), {} /* no positive */);
392
367
  });
393
368
 
394
- test('samplesTestAlertCollectionNegativeSmarts', async () => {
395
- await _testNeg(readSamples(Samples.testAlertCollection), 'smarts');
369
+ test('samplesTestSpgi100', async () => {
370
+ await _testDf(readSamples(Samples.testSpgi100), {} /* no positive */);
396
371
  });
397
372
 
398
- test('samplesTestSpgiNegativeVals', async () => {
399
- await _testNeg(readSamples(Samples.testSpgi), 'vals');
373
+ test('samplesTestUrl', async () => {
374
+ await _testDf(readSamples(Samples.testUrl), {} /* no positive */);
400
375
  });
401
376
  });
402
377
 
403
378
  export async function _testNeg(readDf: DfReaderFunc, colName: string) {
404
379
  const df: DG.DataFrame = await readDf();
405
- const col: DG.Column = df.col(colName)!;
380
+ const col: DG.Column = df.getCol(colName)!;
406
381
  const semType: string = await grok.functions
407
382
  .call('Bio:detectMacromolecule', {col: col}) as unknown as string;
408
383
  if (semType)
@@ -429,14 +404,14 @@ export async function _testPos(
429
404
  if (semType)
430
405
  col.semType = semType;
431
406
 
432
- expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
407
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
433
408
  expect(col.getTag(DG.TAGS.UNITS), units);
434
- expect(col.getTag(bio.TAGS.aligned), aligned);
435
- expect(col.getTag(bio.TAGS.alphabet), alphabet);
409
+ expect(col.getTag(bioTAGS.aligned), aligned);
410
+ expect(col.getTag(bioTAGS.alphabet), alphabet);
436
411
  if (separator)
437
- expect(col.getTag(bio.TAGS.separator), separator);
412
+ expect(col.getTag(bioTAGS.separator), separator);
438
413
 
439
- const uh = new bio.UnitsHandler(col);
414
+ const uh = new UnitsHandler(col);
440
415
  expect(uh.getAlphabetSize(), alphabetSize);
441
416
  expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
442
417
  if (!uh.isHelm()) {
@@ -445,3 +420,40 @@ export async function _testPos(
445
420
  }
446
421
  }
447
422
 
423
+ class PosCol {
424
+ constructor(
425
+ public readonly units: string,
426
+ public readonly aligned: string | null,
427
+ public readonly alphabet: string | null,
428
+ public readonly alphabetSize: number,
429
+ public readonly alphabetIsMultichar: boolean,
430
+ public readonly separator?: string
431
+ ) { };
432
+ };
433
+
434
+ export async function _testDf(readDf: DfReaderFunc, posCols: { [colName: string]: PosCol }): Promise<void> {
435
+ const df: DG.DataFrame = await readDf();
436
+ const errList: string[] = [];
437
+ for (const colName of df.columns.names()) {
438
+ if (colName in posCols) {
439
+ const p = posCols[colName];
440
+ try {
441
+ await _testPos(readDf, colName, p.units, p.aligned, p.alphabet,
442
+ p.alphabetSize, p.alphabetIsMultichar, p.separator);
443
+ } catch (err: any) {
444
+ const errMsg: string = err.toString();
445
+ errList.push(`Positive col '${colName}' failed: ${errMsg}`);
446
+ }
447
+ } else {
448
+ try {
449
+ await _testNeg(readDf, colName);
450
+ } catch (err: any) {
451
+ const errMsg: string = err.toString();
452
+ errList.push(`Negative col '${colName}' failed: ${errMsg}`);
453
+ }
454
+ }
455
+ }
456
+
457
+ if (errList.length > 0)
458
+ throw new Error(errList.join('\n'));
459
+ }
@@ -1,15 +1,14 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as grok from 'datagrok-api/grok';
4
- import * as bio from '@datagrok-libraries/bio';
5
4
 
6
5
  import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
7
6
  import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
7
+ import {splitterAsFasta} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
8
 
9
9
  type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
10
10
 
11
11
  category('fastaExport', () => {
12
-
13
12
  enum WrapDataTest {
14
13
  single = 'single',
15
14
  multi = 'multi'
@@ -88,7 +87,7 @@ MRGGL
88
87
  });
89
88
 
90
89
  function _testWrapSequence(testKey: string, lineWidth: number = 10) {
91
- const splitter = bio.splitterAsFasta;
90
+ const splitter = splitterAsFasta;
92
91
 
93
92
  const srcSeq: string = wrapData[testKey].src;
94
93
  const wrapRes: string[] = wrapSequence(srcSeq, splitter, lineWidth);
@@ -0,0 +1,34 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as ui from 'datagrok-api/ui';
4
+
5
+ import {test, after, before, category, expect} from '@datagrok-libraries/utils/src/test';
6
+
7
+ import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
8
+ import {LIB_STORAGE_NAME} from '../utils/monomer-lib';
9
+
10
+
11
+ category('monomerLibraries', () => {
12
+ let monomerLibHelper: IMonomerLibHelper;
13
+ /** Backup actual user's monomer libraries settings */
14
+ let userLibrariesSettings: any = null;
15
+
16
+ before(async () => {
17
+ monomerLibHelper = await getMonomerLibHelper();
18
+ userLibrariesSettings = await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true);
19
+ });
20
+
21
+ after(async () => {
22
+ await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, userLibrariesSettings, true);
23
+ });
24
+
25
+ test('default', async () => {
26
+ // Clear settings to test default
27
+ await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
28
+ await monomerLibHelper.loadLibraries(true); // test defaultLib
29
+
30
+ // Currently default monomer lib is empty
31
+ const currentMonomerLib = monomerLibHelper.getBioLib();
32
+ expect(currentMonomerLib.getTypes().length, 0);
33
+ });
34
+ });
@@ -0,0 +1,21 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ import {category, expect, test} from '@datagrok-libraries/utils/src/test';
4
+ import {runPepsea} from '../utils/pepsea';
5
+
6
+ category('PepSeA', () => {
7
+ const testCsv = `HELM,MSA
8
+ "PEPTIDE1{F.L.R.G.W.[MeF].Y.S.N.N.C}$$$$","F.L.R.G.W.MeF.Y..S.N.N.C"
9
+ "PEPTIDE1{F.L.R.G.Y.[MeF].Y.W.N.C}$$$$","F.L.R.G.Y.MeF.Y.W...N.C"
10
+ "PEPTIDE1{F.G.Y.[MeF].Y.W.S.D.N.C}$$$$","F...G.Y.MeF.Y.W.S.D.N.C"
11
+ "PEPTIDE1{F.L.R.G.Y.[MeF].Y.W.S.N.D.C}$$$$","F.L.R.G.Y.MeF.Y.W.S.N.D.C"
12
+ "PEPTIDE1{F.V.R.G.Y.[MeF].Y.W.S.N.C}$$$$","F.V.R.G.Y.MeF.Y.W.S..N.C"`;
13
+
14
+ test('Basic alignment', async () => {
15
+ const table = DG.DataFrame.fromCsv(testCsv);
16
+ const alignedCol = await runPepsea(table.getCol('HELM'), 'msa(HELM)');
17
+ const alignedTestCol = table.getCol('MSA');
18
+ for (let i = 0; i < alignedCol.length; ++i)
19
+ expect(alignedCol.get(i) == alignedTestCol.get(i), true);
20
+ }, {skipReason: 'GROK-12764'});
21
+ });