@datagrok/bio 2.1.12 → 2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +11 -12
  2. package/css/helm.css +10 -0
  3. package/detectors.js +83 -59
  4. package/dist/package-test.js +2 -13168
  5. package/dist/package-test.js.map +1 -0
  6. package/dist/package.js +2 -10560
  7. package/dist/package.js.map +1 -0
  8. package/dockerfiles/Dockerfile +86 -0
  9. package/files/icons/composition-analysis.svg +17 -0
  10. package/files/icons/sequence-diversity-viewer.svg +4 -0
  11. package/files/icons/sequence-similarity-viewer.svg +4 -0
  12. package/files/icons/vdregions-viewer.svg +22 -0
  13. package/files/icons/weblogo-viewer.svg +7 -0
  14. package/files/tests/testUrl.csv +11 -0
  15. package/files/tests/toAtomicLevelTest.csv +4 -0
  16. package/package.json +24 -25
  17. package/src/analysis/sequence-activity-cliffs.ts +11 -9
  18. package/src/analysis/sequence-search-base-viewer.ts +2 -1
  19. package/src/analysis/sequence-similarity-viewer.ts +3 -3
  20. package/src/analysis/sequence-space.ts +2 -1
  21. package/src/calculations/monomerLevelMols.ts +4 -4
  22. package/src/package-test.ts +9 -2
  23. package/src/package.ts +215 -131
  24. package/src/substructure-search/substructure-search.ts +19 -16
  25. package/src/tests/Palettes-test.ts +1 -1
  26. package/src/tests/WebLogo-positions-test.ts +113 -57
  27. package/src/tests/_first-tests.ts +9 -0
  28. package/src/tests/activity-cliffs-tests.ts +8 -7
  29. package/src/tests/activity-cliffs-utils.ts +17 -9
  30. package/src/tests/bio-tests.ts +4 -5
  31. package/src/tests/checkInputColumn-tests.ts +1 -1
  32. package/src/tests/converters-test.ts +52 -17
  33. package/src/tests/detectors-benchmark-tests.ts +3 -2
  34. package/src/tests/detectors-tests.ts +177 -172
  35. package/src/tests/fasta-export-tests.ts +1 -1
  36. package/src/tests/monomer-libraries-tests.ts +34 -0
  37. package/src/tests/pepsea-tests.ts +21 -0
  38. package/src/tests/renderers-test.ts +21 -19
  39. package/src/tests/sequence-space-test.ts +6 -4
  40. package/src/tests/similarity-diversity-tests.ts +4 -4
  41. package/src/tests/splitters-test.ts +4 -5
  42. package/src/tests/substructure-filters-tests.ts +23 -1
  43. package/src/tests/utils/sequences-generators.ts +1 -1
  44. package/src/tests/utils.ts +2 -1
  45. package/src/tests/viewers.ts +16 -0
  46. package/src/utils/cell-renderer.ts +88 -35
  47. package/src/utils/constants.ts +7 -6
  48. package/src/utils/convert.ts +8 -2
  49. package/src/utils/monomer-lib.ts +174 -0
  50. package/src/utils/multiple-sequence-alignment.ts +44 -20
  51. package/src/utils/pepsea.ts +78 -0
  52. package/src/utils/save-as-fasta.ts +2 -1
  53. package/src/utils/ui-utils.ts +15 -3
  54. package/src/viewers/vd-regions-viewer.ts +113 -72
  55. package/src/viewers/web-logo-viewer.ts +1031 -0
  56. package/src/widgets/bio-substructure-filter.ts +38 -24
  57. package/tsconfig.json +71 -72
  58. package/webpack.config.js +4 -11
  59. package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -9039
@@ -5,7 +5,8 @@ import * as DG from 'datagrok-api/dg';
5
5
  import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
6
6
 
7
7
  import {importFasta} from '../package';
8
- import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS, UnitsHandler} from '@datagrok-libraries/bio';
8
+ import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
9
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
10
 
10
11
  /*
11
12
  // snippet to list df columns of semType='Macromolecule' (false positive)
@@ -21,103 +22,115 @@ for (let i = 0; i < df.columns.length; i++) {
21
22
  type DfReaderFunc = () => Promise<DG.DataFrame>;
22
23
 
23
24
  category('detectors', () => {
24
- const csvDf1: string = `col1
25
- 1
26
- 2
27
- 3`;
25
+ const enum csvTests {
26
+ negEmpty = 'negEmpty',
27
+ neg1 = 'neg1',
28
+ neg2 = 'neg2',
29
+ neg3 = 'neg3',
30
+ negSmiles = 'negSmiles',
31
+ fastaDna1 = 'csvFastaDna1',
32
+ fastaRna1 = 'fastaRna1',
33
+ fastaPt1 = 'fastaPt1',
34
+ fastaUn = 'fastaUn',
35
+ sepDna = 'sepDna',
36
+ sepRna = 'sepRna',
37
+ sepPt = 'sepPt',
38
+ sepUn1 = 'sepUn1',
39
+ sepUn2 = 'sepUn2',
40
+ sepMsaDna1 = 'sepMsaDna1',
41
+ fastaMsaDna1 = 'fastaMsaDna1',
42
+ fastaMsaPt1 = 'fastaMsaPt1',
43
+ }
28
44
 
29
- const csvDfEmpty: string = `id,col1
45
+ const csvData = new class {
46
+ [csvTests.negEmpty]: string = `id,col1
30
47
  1,
31
48
  2,
32
49
  3,
33
50
  4,
34
51
  5,`;
35
-
36
- const csvDf2: string = `col1
52
+ [csvTests.neg1]: string = `col1
53
+ 1
54
+ 2
55
+ 3`;
56
+ [csvTests.neg2]: string = `col1
37
57
  4
38
58
  5
39
59
  6
40
60
  7`;
41
-
42
- const csvDf3: string = `col1
61
+ [csvTests.neg3]: string = `col1
43
62
  8
44
63
  9
45
64
  10
46
65
  11
47
66
  12`;
48
-
49
- const csvDfSmiles: string = `col1
67
+ [csvTests.negSmiles]: string = `col1
50
68
  CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
51
69
  C1CCCCC1
52
70
  CCCCCC
53
71
  `;
54
-
55
- const csvDfDna1: string = `seq
72
+ [csvTests.fastaDna1]: string = `seq
56
73
  ACGTC
57
74
  CAGTGT
58
75
  TTCAAC
59
76
  `;
60
-
61
- const csvDfRna1: string = `seq
77
+ [csvTests.fastaRna1]: string = `seq
62
78
  ACGUC
63
79
  CAGUGU
64
80
  UUCAAC
65
81
  `;
66
-
67
- /** Pure amino acids sequence */
68
- const csvDfPt1: string = `seq
82
+ /** Pure amino acids sequence */
83
+ [csvTests.fastaPt1]: string = `seq
69
84
  FWPHEY
70
85
  YNRQWYV
71
86
  MKPSEYV
72
87
  `;
73
-
74
- const csvDfSepDna: string = `seq
88
+ [csvTests.fastaUn]: string = `seq
89
+ [meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
90
+ [meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
91
+ [Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
92
+ `;
93
+ [csvTests.sepDna]: string = `seq
75
94
  A*C*G*T*C
76
95
  C*A*G*T*G*T
77
96
  T*T*C*A*A*C
78
97
  `;
79
-
80
- const csvDfSepRna: string = `seq
98
+ [csvTests.sepRna]: string = `seq
81
99
  A*C*G*U*C
82
100
  C*A*G*U*G*U
83
101
  U*U*C*A*A*C
84
102
  `;
85
-
86
- const csvDfSepPt: string = `seq
103
+ [csvTests.sepPt]: string = `seq
87
104
  F-W-P-H-E-Y
88
105
  Y-N-R-Q-W-Y-V
89
106
  M-K-P-S-E-Y-V
90
107
  `;
91
-
92
- const csvDfSepUn1: string = `seq
108
+ [csvTests.sepUn1]: string = `seq
93
109
  abc-dfgg-abc1-cfr3-rty-wert
94
110
  rut12-her2-rty-wert-abc-abc1-dfgg
95
111
  rut12-rty-her2-abc-cfr3-wert-rut12
96
112
  `;
97
-
98
- const csvDfSepUn2: string = `seq
113
+ [csvTests.sepUn2]: string = `seq
99
114
  abc/dfgg/abc1/cfr3/rty/wert
100
115
  rut12/her2/rty/wert//abc/abc1/dfgg
101
116
  rut12/rty/her2/abc/cfr3//wert/rut12
102
117
  `;
103
-
104
- const csvDfSepMsaDna1: string = `seq
118
+ [csvTests.sepMsaDna1]: string = `seq
105
119
  A-C--G-T--C-T
106
120
  C-A-C--T--G-T
107
121
  A-C-C-G-T-A-C-T
108
122
  `;
109
-
110
- const csvDfMsaDna1: string = `seq
123
+ [csvTests.fastaMsaDna1]: string = `seq
111
124
  AC-GT-CT
112
125
  CAC-T-GT
113
126
  ACCGTACT
114
127
  `;
115
-
116
- const csvDfMsaPt1: string = `seq
128
+ [csvTests.fastaMsaPt1]: string = `seq
117
129
  FWR-WYV-KHP
118
130
  YNR-WYV-KHP
119
131
  MWRSWY-CKHP
120
132
  `;
133
+ }();
121
134
 
122
135
  const enum Samples {
123
136
  peptidesComplex = 'peptidesComplex',
@@ -135,11 +148,12 @@ MWRSWY-CKHP
135
148
  testSmilesShort = 'testSmilesShort',
136
149
  testCerealCsv = 'testCerealCsv',
137
150
  testActivityCliffsCsv = 'testActivityCliffsCsv',
138
- testSpgi100 = 'testSpgi100',
139
151
  testUnichemSources = 'testUnichemSources',
140
152
  testDmvOffices = 'testDmvOffices',
141
153
  testAlertCollection = 'testAlertCollection',
142
154
  testSpgi = 'testSpgi',
155
+ testSpgi100 = 'testSpgi100',
156
+ testUrl = 'testUrl',
143
157
  }
144
158
 
145
159
  const samples: { [key: string]: string } = {
@@ -158,11 +172,12 @@ MWRSWY-CKHP
158
172
  [Samples.testSmilesShort]: 'System:AppData/Bio/tests/testSmilesShort.csv',
159
173
  [Samples.testActivityCliffsCsv]: 'System:AppData/Bio/tests/testActivityCliffs.csv', // smiles
160
174
  [Samples.testCerealCsv]: 'System:AppData/Bio/tests/testCereal.csv',
161
- [Samples.testSpgi100]: 'System:AppData/Bio/tests/testSpgi100.csv',
162
175
  [Samples.testUnichemSources]: 'System:AppData/Bio/tests/testUnichemSources.csv',
163
176
  [Samples.testDmvOffices]: 'System:AppData/Bio/tests/testDmvOffices.csv',
164
177
  [Samples.testAlertCollection]: 'System:AppData/Bio/tests/testAlertCollection.csv',
178
+ [Samples.testSpgi100]: 'System:AppData/Bio/tests/testSpgi100.csv',
165
179
  [Samples.testSpgi]: 'System:AppData/Bio/tests/SPGI-derived.csv',
180
+ [Samples.testUrl]: 'System:AppData/Bio/tests/testUrl.csv',
166
181
  };
167
182
 
168
183
  const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
@@ -192,225 +207,177 @@ MWRSWY-CKHP
192
207
  return df;
193
208
  }
194
209
 
195
- const _csvDfs: { [key: string]: Promise<DG.DataFrame> } = {};
196
- const readCsv: (key: string, csv: string) => DfReaderFunc = (key: string, csv: string) => {
210
+ const readCsv: (key: csvTests) => DfReaderFunc = (key: keyof typeof csvData) => {
197
211
  return async () => {
198
- if (!(key in _csvDfs)) {
199
- _csvDfs[key] = (async (): Promise<DG.DataFrame> => {
200
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
201
- await grok.data.detectSemanticTypes(df);
202
- return df;
203
- })();
204
- }
205
- return _csvDfs[key];
212
+ // Always recreate test data frame from CSV for reproducible detector behavior in tests.
213
+ const csv: string = csvData[key];
214
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
215
+ await grok.data.detectSemanticTypes(df);
216
+ return df;
206
217
  };
207
218
  };
208
219
 
209
220
 
210
- test('NegativeEmpty', async () => { await _testNeg(readCsv('csvDfEmpty', csvDfEmpty), 'col1'); });
211
- test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
212
- test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
213
- test('Negative3', async () => { await _testNeg(readCsv('csvDf3', csvDf3), 'col1'); });
214
- test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
221
+ test('NegativeEmpty', async () => { await _testNeg(readCsv(csvTests.negEmpty), 'col1'); });
222
+ test('Negative1', async () => { await _testNeg(readCsv(csvTests.neg1), 'col1'); });
223
+ test('Negative2', async () => { await _testNeg(readCsv(csvTests.neg2), 'col1'); });
224
+ test('Negative3', async () => { await _testNeg(readCsv(csvTests.neg3), 'col1'); });
225
+ test('NegativeSmiles', async () => { await _testNeg(readCsv(csvTests.negSmiles), 'col1'); });
215
226
 
216
- test('Dna1', async () => {
217
- await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq',
227
+ test('FastaDna1', async () => {
228
+ await _testPos(readCsv(csvTests.fastaDna1), 'seq',
218
229
  NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false);
219
230
  });
220
- test('Rna1', async () => {
221
- await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq',
231
+ test('FastaRna1', async () => {
232
+ await _testPos(readCsv(csvTests.fastaRna1), 'seq',
222
233
  NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false);
223
234
  });
224
- test('AA1', async () => {
225
- await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq',
235
+ test('FastaPt1', async () => {
236
+ await _testPos(readCsv(csvTests.fastaPt1), 'seq',
226
237
  NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
227
238
  });
228
- test('MsaDna1', async () => {
229
- await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq',
239
+ test('FastaUn', async () => {
240
+ await _testPos(readCsv(csvTests.fastaUn), 'seq',
241
+ NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 12, true);
242
+ });
243
+ test('FastaMsaDna1', async () => {
244
+ await _testPos(readCsv(csvTests.fastaMsaDna1), 'seq',
230
245
  NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false);
231
246
  });
232
247
 
233
- test('MsaAA1', async () => {
234
- await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', NOTATION.FASTA,
235
- ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false);
248
+ test('FastaMsaPt1', async () => {
249
+ await _testPos(readCsv(csvTests.fastaMsaPt1), 'seq',
250
+ NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false);
236
251
  });
237
252
 
238
253
  test('SepDna', async () => {
239
- await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq',
254
+ await _testPos(readCsv(csvTests.sepDna), 'seq',
240
255
  NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false, '*');
241
256
  });
242
257
  test('SepRna', async () => {
243
- await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq',
258
+ await _testPos(readCsv(csvTests.sepRna), 'seq',
244
259
  NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false, '*');
245
260
  });
246
261
  test('SepPt', async () => {
247
- await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq',
262
+ await _testPos(readCsv(csvTests.sepPt), 'seq',
248
263
  NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.PT, 20, false, '-');
249
264
  });
250
265
  test('SepUn1', async () => {
251
- await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq',
266
+ await _testPos(readCsv(csvTests.sepUn1), 'seq',
252
267
  NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 8, true, '-');
253
268
  });
254
269
  test('SepUn2', async () => {
255
- await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq',
270
+ await _testPos(readCsv(csvTests.sepUn2), 'seq',
256
271
  NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 9, true, '/');
257
272
  });
258
273
 
259
274
  test('SepMsaN1', async () => {
260
- await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq',
275
+ await _testPos(readCsv(csvTests.sepMsaDna1), 'seq',
261
276
  NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false, '-');
262
277
  });
263
278
 
264
- test('SamplesFastaCsvPt', async () => {
265
- await _testPos(readSamples(Samples.fastaCsv), 'sequence',
266
- NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
267
- });
268
- test('SamplesFastaCsvNegativeEntry', async () => {
269
- await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
270
- });
271
- test('SamplesFastaCsvNegativeLength', async () => {
272
- await _testNeg(readSamples(Samples.fastaCsv), 'Length');
273
- });
274
- test('SamplesFastaCsvNegativeUniProtKB', async () => {
275
- await _testNeg(readSamples(Samples.fastaCsv), 'UniProtKB');
279
+ test('samplesFastaCsv', async () => {
280
+ await _testDf(readSamples(Samples.fastaCsv), {
281
+ 'Sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
282
+ });
276
283
  });
277
284
 
278
- test('SamplesFastaFastaPt', async () => {
279
- await _testPos(readSamples(Samples.fastaFasta, readFileFasta),
280
- 'sequence', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
285
+ test('samplesFastaFasta', async () => {
286
+ await _testDf(readSamples(Samples.fastaFasta), {
287
+ 'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
288
+ });
281
289
  });
282
290
 
283
291
  // peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
284
292
  // test('samplesPeptidesComplexPositiveAlignedSequence', async () => {
285
293
  // await _testPos(readSamples(Samples.peptidesComplex), 'AlignedSequence', 'separator:SEQ:UN', '-');
286
294
  // });
287
- test('samplesPeptidesComplexNegativeID', async () => {
288
- await _testNeg(readSamples(Samples.peptidesComplex), 'ID');
289
- });
290
- test('SamplesPeptidesComplexNegativeMeasured', async () => {
291
- await _testNeg(readSamples(Samples.peptidesComplex), 'Measured');
292
- });
293
- test('SamplesPeptidesComplexNegativeValue', async () => {
294
- await _testNeg(readSamples(Samples.peptidesComplex), 'Value');
295
+ test('samplesPeptidesComplex', async () => {
296
+ await _testDf(readSamples(Samples.peptidesComplex), {} /* no positive */);
295
297
  });
296
298
 
297
- test('samplesMsaComplexUn', async () => {
298
- await _testPos(readSamples(Samples.msaComplex), 'MSA',
299
- NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 161, true, '/');
300
- });
301
- test('samplesMsaComplexNegativeActivity', async () => {
302
- await _testNeg(readSamples(Samples.msaComplex), 'Activity');
299
+ test('samplesMsaComplex', async () => {
300
+ await _testDf(readSamples(Samples.msaComplex), {
301
+ 'MSA': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 161, true, '/'),
302
+ });
303
303
  });
304
304
 
305
- test('samplesIdCsvNegativeID', async () => {
306
- await _testNeg(readSamples(Samples.testIdCsv), 'ID');
305
+ test('samplesIdCsv', async () => {
306
+ await _testDf(readSamples(Samples.testIdCsv), {} /* no positive */);
307
307
  });
308
308
 
309
- test('samplesSarSmallCsvNegativeSmiles', async () => {
310
- await _testNeg(readSamples(Samples.testSmilesCsv), 'smiles');
309
+ test('samplesSarSmallCsv', async () => {
310
+ await _testDf(readSamples(Samples.testSmilesCsv), {} /* nopositive */);
311
311
  });
312
312
 
313
- test('samplesHelmCsvHELM', async () => {
314
- await _testPos(readSamples(Samples.helmCsv), 'HELM',
315
- NOTATION.HELM, null, null, 160, true, null);
313
+ test('samplesHelmCsv', async () => {
314
+ await _testDf(readSamples(Samples.helmCsv), {
315
+ 'HELM': new PosCol(NOTATION.HELM, null, null, 160, true),
316
+ });
316
317
  });
317
318
 
318
- test('samplesHelmCsvNegativeActivity', async () => {
319
- await _testNeg(readSamples(Samples.helmCsv), 'Activity');
320
- });
321
-
322
- // sample_testHelm.csb
319
+ // sample_testHelm.csv
323
320
  // columns: ID,Test type,HELM string,Valid?,Mol Weight,Mol Formula,SMILES
324
- test('samplesTestHelmNegativeID', async () => {
325
- await _testNeg(readSamples(Samples.testHelmCsv), 'ID');
326
- });
327
- test('samplesTestHelmNegativeTestType', async () => {
328
- await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
329
- });
330
- test('samplesTestHelmPositiveHelmString', async () => {
331
- await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', NOTATION.HELM, null, null, 9, true, null);
332
- });
333
- test('samplesTestHelmNegativeValid', async () => {
334
- await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
335
- });
336
- test('samplesTestHelmNegativeMolWeight', async () => {
337
- await _testNeg(readSamples(Samples.testHelmCsv), 'Mol Weight');
338
- });
339
- test('samplesTestHelmNegativeMolFormula', async () => {
340
- await _testNeg(readSamples(Samples.testHelmCsv), 'Mol Formula');
341
- });
342
- test('samplesTestHelmNegativeSmiles', async () => {
343
- await _testNeg(readSamples(Samples.testHelmCsv), 'Smiles');
321
+ test('samplesTestHelmCsv', async () => {
322
+ await _testDf(readSamples(Samples.testHelmCsv), {
323
+ 'HELM string': new PosCol(NOTATION.HELM, null, null, 9, true),
324
+ });
344
325
  });
345
326
 
346
- test('samplesTestDemogNegativeAll', async () => {
347
- const dfFunc: DfReaderFunc = readSamples(Samples.testDemogCsv);
348
- const df: DG.DataFrame = await dfFunc();
349
-
350
- for (const col of df.columns.toList())
351
- await _testNeg(dfFunc, col.name);
327
+ test('samplesTestDemogCsv', async () => {
328
+ await _testDf(readSamples(Samples.testDemogCsv), {} /* no positive */);
352
329
  });
353
330
 
354
- test('samplesTestSmiles2NegativeSmiles', async () => {
355
- await _testNeg(readSamples(Samples.testSmiles2Csv), 'SMILES');
331
+ test('samplesTestSmiles2Csv', async () => {
332
+ await _testDf(readSamples(Samples.testSmiles2Csv), {} /* no positive */);
356
333
  });
357
334
 
358
- test('samplesTestSmilesShortNegativeSmiles', async () => {
359
- await _testNeg(readSamples(Samples.testSmilesShort), 'SMILES');
335
+ test('samplesTestSmilesShort', async () => {
336
+ await _testDf(readSamples(Samples.testSmilesShort), {} /* no positive */);
360
337
  });
361
338
 
362
339
  test('samplesTestActivityCliffsNegativeSmiles', async () => {
363
- await _testNeg(readSamples(Samples.testActivityCliffsCsv), 'smiles');
340
+ await _testDf(readSamples(Samples.testActivityCliffsCsv), {} /* no positive */);
364
341
  });
365
342
 
366
- test('samplesFastaPtPosSequence', async () => {
367
- await _testPos(readSamples(Samples.fastaPtCsv), 'sequence',
368
- NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
343
+ test('samplesFastaPtCsv', async () => {
344
+ await _testDf(readSamples(Samples.fastaPtCsv), {
345
+ 'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
346
+ });
369
347
  });
370
348
 
371
- test('samplesTestCerealNegativeCerealName', async () => {
372
- await _testNeg(readSamples(Samples.testCerealCsv), 'cereal_name');
349
+ test('samplesTestCerealCsv', async () => {
350
+ await _testDf(readSamples(Samples.testCerealCsv), {} /* no positive */);
373
351
  });
374
352
 
375
- test('samplesTestSpgi100NegativeStereoCategory', async () => {
376
- await _testNeg(readSamples(Samples.testSpgi100), 'Stereo Category');
377
- });
378
- test('samplesTestSpgi100NegativeScaffoldNames', async () => {
379
- await _testNeg(readSamples(Samples.testSpgi100), 'Scaffold Names');
380
- });
381
- test('samplesTestSpgi100NegativePrimaryScaffoldName', async () => {
382
- await _testNeg(readSamples(Samples.testSpgi100), 'Primary Scaffold Name');
383
- });
384
- test('samplesTestSpgi100NegativeSampleName', async () => {
385
- await _testNeg(readSamples(Samples.testSpgi100), 'Sample Name');
353
+ test('samplesTestUnichemSources', async () => {
354
+ await _testDf(readSamples(Samples.testUnichemSources), {} /* no positive */);
386
355
  });
387
356
 
388
- test('samplesTestUnichemSourcesNegativeSrcUrl', async () => {
389
- await _testNeg(readSamples(Samples.testUnichemSources), 'src_url');
390
- });
391
- test('samplesTestUnichemSourcesNegativeBaseIdUrl', async () => {
392
- await _testNeg(readSamples(Samples.testUnichemSources), 'base_id_url');
357
+ test('samplesTestDmvOffices', async () => {
358
+ await _testDf(readSamples(Samples.testDmvOffices), {} /* no positive */);
393
359
  });
394
360
 
395
- test('samplesTestDmvOfficesNegativeOfficeName', async () => {
396
- await _testNeg(readSamples(Samples.testDmvOffices), 'Office Name');
361
+ test('samplesTestAlertCollection', async () => {
362
+ await _testDf(readSamples(Samples.testAlertCollection), {} /* no positive */);
397
363
  });
398
- test('samplesTestDmvOfficesNegativeCity', async () => {
399
- await _testNeg(readSamples(Samples.testDmvOffices), 'City');
364
+
365
+ test('samplesTestSpgi', async () => {
366
+ await _testDf(readSamples(Samples.testSpgi), {} /* no positive */);
400
367
  });
401
368
 
402
- test('samplesTestAlertCollectionNegativeSmarts', async () => {
403
- await _testNeg(readSamples(Samples.testAlertCollection), 'smarts');
369
+ test('samplesTestSpgi100', async () => {
370
+ await _testDf(readSamples(Samples.testSpgi100), {} /* no positive */);
404
371
  });
405
372
 
406
- test('samplesTestSpgiNegativeVals', async () => {
407
- await _testNeg(readSamples(Samples.testSpgi), 'vals');
373
+ test('samplesTestUrl', async () => {
374
+ await _testDf(readSamples(Samples.testUrl), {} /* no positive */);
408
375
  });
409
376
  });
410
377
 
411
378
  export async function _testNeg(readDf: DfReaderFunc, colName: string) {
412
379
  const df: DG.DataFrame = await readDf();
413
- const col: DG.Column = df.col(colName)!;
380
+ const col: DG.Column = df.getCol(colName)!;
414
381
  const semType: string = await grok.functions
415
382
  .call('Bio:detectMacromolecule', {col: col}) as unknown as string;
416
383
  if (semType)
@@ -437,7 +404,7 @@ export async function _testPos(
437
404
  if (semType)
438
405
  col.semType = semType;
439
406
 
440
- expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
407
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
441
408
  expect(col.getTag(DG.TAGS.UNITS), units);
442
409
  expect(col.getTag(bioTAGS.aligned), aligned);
443
410
  expect(col.getTag(bioTAGS.alphabet), alphabet);
@@ -452,3 +419,41 @@ export async function _testPos(
452
419
  expect(uh.alphabet, alphabet);
453
420
  }
454
421
  }
422
+
423
+ class PosCol {
424
+ constructor(
425
+ public readonly units: string,
426
+ public readonly aligned: string | null,
427
+ public readonly alphabet: string | null,
428
+ public readonly alphabetSize: number,
429
+ public readonly alphabetIsMultichar: boolean,
430
+ public readonly separator?: string
431
+ ) { };
432
+ };
433
+
434
+ export async function _testDf(readDf: DfReaderFunc, posCols: { [colName: string]: PosCol }): Promise<void> {
435
+ const df: DG.DataFrame = await readDf();
436
+ const errList: string[] = [];
437
+ for (const colName of df.columns.names()) {
438
+ if (colName in posCols) {
439
+ const p = posCols[colName];
440
+ try {
441
+ await _testPos(readDf, colName, p.units, p.aligned, p.alphabet,
442
+ p.alphabetSize, p.alphabetIsMultichar, p.separator);
443
+ } catch (err: any) {
444
+ const errMsg: string = err.toString();
445
+ errList.push(`Positive col '${colName}' failed: ${errMsg}`);
446
+ }
447
+ } else {
448
+ try {
449
+ await _testNeg(readDf, colName);
450
+ } catch (err: any) {
451
+ const errMsg: string = err.toString();
452
+ errList.push(`Negative col '${colName}' failed: ${errMsg}`);
453
+ }
454
+ }
455
+ }
456
+
457
+ if (errList.length > 0)
458
+ throw new Error(errList.join('\n'));
459
+ }
@@ -4,7 +4,7 @@ import * as grok from 'datagrok-api/grok';
4
4
 
5
5
  import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
6
6
  import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
7
- import {splitterAsFasta} from '@datagrok-libraries/bio';
7
+ import {splitterAsFasta} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
8
 
9
9
  type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
10
10
 
@@ -0,0 +1,34 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as ui from 'datagrok-api/ui';
4
+
5
+ import {test, after, before, category, expect} from '@datagrok-libraries/utils/src/test';
6
+
7
+ import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
8
+ import {LIB_STORAGE_NAME} from '../utils/monomer-lib';
9
+
10
+
11
+ category('monomerLibraries', () => {
12
+ let monomerLibHelper: IMonomerLibHelper;
13
+ /** Backup actual user's monomer libraries settings */
14
+ let userLibrariesSettings: any = null;
15
+
16
+ before(async () => {
17
+ monomerLibHelper = await getMonomerLibHelper();
18
+ userLibrariesSettings = await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true);
19
+ });
20
+
21
+ after(async () => {
22
+ await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, userLibrariesSettings, true);
23
+ });
24
+
25
+ test('default', async () => {
26
+ // Clear settings to test default
27
+ await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
28
+ await monomerLibHelper.loadLibraries(true); // test defaultLib
29
+
30
+ // Currently default monomer lib is empty
31
+ const currentMonomerLib = monomerLibHelper.getBioLib();
32
+ expect(currentMonomerLib.getTypes().length, 0);
33
+ });
34
+ });
@@ -0,0 +1,21 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ import {category, expect, test} from '@datagrok-libraries/utils/src/test';
4
+ import {runPepsea} from '../utils/pepsea';
5
+
6
+ category('PepSeA', () => {
7
+ const testCsv = `HELM,MSA
8
+ "PEPTIDE1{F.L.R.G.W.[MeF].Y.S.N.N.C}$$$$","F.L.R.G.W.MeF.Y..S.N.N.C"
9
+ "PEPTIDE1{F.L.R.G.Y.[MeF].Y.W.N.C}$$$$","F.L.R.G.Y.MeF.Y.W...N.C"
10
+ "PEPTIDE1{F.G.Y.[MeF].Y.W.S.D.N.C}$$$$","F...G.Y.MeF.Y.W.S.D.N.C"
11
+ "PEPTIDE1{F.L.R.G.Y.[MeF].Y.W.S.N.D.C}$$$$","F.L.R.G.Y.MeF.Y.W.S.N.D.C"
12
+ "PEPTIDE1{F.V.R.G.Y.[MeF].Y.W.S.N.C}$$$$","F.V.R.G.Y.MeF.Y.W.S..N.C"`;
13
+
14
+ test('Basic alignment', async () => {
15
+ const table = DG.DataFrame.fromCsv(testCsv);
16
+ const alignedCol = await runPepsea(table.getCol('HELM'), 'msa(HELM)');
17
+ const alignedTestCol = table.getCol('MSA');
18
+ for (let i = 0; i < alignedCol.length; ++i)
19
+ expect(alignedCol.get(i) == alignedTestCol.get(i), true);
20
+ }, {skipReason: 'GROK-12764'});
21
+ });