@datagrok/bio 2.4.30 → 2.4.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/.eslintrc.json +6 -8
  2. package/README.md +22 -7
  3. package/detectors.js +21 -12
  4. package/dist/1.js +2 -0
  5. package/dist/1.js.map +1 -0
  6. package/dist/18.js +2 -0
  7. package/dist/18.js.map +1 -0
  8. package/dist/190.js +2 -0
  9. package/dist/190.js.map +1 -0
  10. package/dist/452.js +2 -0
  11. package/dist/452.js.map +1 -0
  12. package/dist/729.js +2 -0
  13. package/dist/729.js.map +1 -0
  14. package/dist/package-test.js +1 -1
  15. package/dist/package-test.js.map +1 -1
  16. package/dist/package.js +1 -1
  17. package/dist/package.js.map +1 -1
  18. package/files/libraries/broken-lib.sdf +136 -0
  19. package/files/libraries/group1/mock-lib-3.json +74 -0
  20. package/files/libraries/mock-lib-2.json +48 -0
  21. package/files/tests/100_3_clustests.csv +100 -0
  22. package/files/tests/100_3_clustests_empty_vals.csv +100 -0
  23. package/files/tests/peptides_motif-with-random_10000.csv +9998 -0
  24. package/package.json +4 -4
  25. package/scripts/sequence_generator.py +185 -48
  26. package/src/analysis/sequence-activity-cliffs.ts +9 -11
  27. package/src/analysis/sequence-diversity-viewer.ts +8 -3
  28. package/src/analysis/sequence-search-base-viewer.ts +4 -3
  29. package/src/analysis/sequence-similarity-viewer.ts +13 -7
  30. package/src/analysis/sequence-space.ts +15 -12
  31. package/src/analysis/workers/mm-distance-array-service.ts +48 -0
  32. package/src/analysis/workers/mm-distance-array-worker.ts +29 -0
  33. package/src/analysis/workers/mm-distance-worker-creator.ts +6 -9
  34. package/src/apps/web-logo-app.ts +34 -0
  35. package/src/calculations/monomerLevelMols.ts +10 -12
  36. package/src/demo/bio01-similarity-diversity.ts +4 -5
  37. package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +6 -7
  38. package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +8 -8
  39. package/src/demo/bio03-atomic-level.ts +1 -4
  40. package/src/demo/bio05-helm-msa-sequence-space.ts +8 -5
  41. package/src/demo/utils.ts +4 -3
  42. package/src/package-test.ts +1 -2
  43. package/src/package.ts +138 -83
  44. package/src/seq_align.ts +482 -483
  45. package/src/substructure-search/substructure-search.ts +3 -3
  46. package/src/tests/Palettes-test.ts +1 -1
  47. package/src/tests/WebLogo-positions-test.ts +12 -35
  48. package/src/tests/_first-tests.ts +1 -1
  49. package/src/tests/activity-cliffs-tests.ts +10 -6
  50. package/src/tests/activity-cliffs-utils.ts +6 -4
  51. package/src/tests/bio-tests.ts +20 -25
  52. package/src/tests/checkInputColumn-tests.ts +5 -11
  53. package/src/tests/converters-test.ts +19 -37
  54. package/src/tests/detectors-benchmark-tests.ts +35 -37
  55. package/src/tests/detectors-tests.ts +29 -34
  56. package/src/tests/detectors-weak-and-likely-tests.ts +11 -21
  57. package/src/tests/fasta-export-tests.ts +3 -3
  58. package/src/tests/fasta-handler-test.ts +2 -3
  59. package/src/tests/lib-tests.ts +2 -4
  60. package/src/tests/mm-distance-tests.ts +25 -17
  61. package/src/tests/monomer-libraries-tests.ts +1 -1
  62. package/src/tests/msa-tests.ts +12 -9
  63. package/src/tests/pepsea-tests.ts +6 -3
  64. package/src/tests/renderers-test.ts +13 -11
  65. package/src/tests/sequence-space-test.ts +10 -7
  66. package/src/tests/sequence-space-utils.ts +7 -3
  67. package/src/tests/similarity-diversity-tests.ts +47 -61
  68. package/src/tests/splitters-test.ts +14 -20
  69. package/src/tests/to-atomic-level-tests.ts +9 -17
  70. package/src/tests/units-handler-splitted-tests.ts +106 -0
  71. package/src/tests/units-handler-tests.ts +22 -26
  72. package/src/tests/utils/sequences-generators.ts +6 -2
  73. package/src/tests/utils.ts +10 -4
  74. package/src/tests/viewers.ts +1 -1
  75. package/src/utils/atomic-works.ts +49 -57
  76. package/src/utils/cell-renderer.ts +25 -8
  77. package/src/utils/check-input-column.ts +19 -4
  78. package/src/utils/constants.ts +3 -3
  79. package/src/utils/convert.ts +56 -23
  80. package/src/utils/monomer-lib.ts +83 -64
  81. package/src/utils/multiple-sequence-alignment-ui.ts +24 -21
  82. package/src/utils/multiple-sequence-alignment.ts +2 -2
  83. package/src/utils/pepsea.ts +17 -7
  84. package/src/utils/save-as-fasta.ts +11 -4
  85. package/src/utils/ui-utils.ts +1 -1
  86. package/src/viewers/vd-regions-viewer.ts +21 -22
  87. package/src/viewers/web-logo-viewer.ts +189 -154
  88. package/src/widgets/bio-substructure-filter.ts +9 -6
  89. package/src/widgets/representations.ts +11 -12
  90. package/tsconfig.json +1 -1
  91. package/dist/258.js +0 -2
  92. package/dist/258.js.map +0 -1
  93. package/dist/562.js +0 -2
  94. package/dist/562.js.map +0 -1
  95. package/dist/705.js +0 -2
  96. package/dist/705.js.map +0 -1
  97. package/dist/925.js +0 -2
  98. package/dist/925.js.map +0 -1
  99. package/src/analysis/workers/mm-distance-worker.ts +0 -16
@@ -2,13 +2,11 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
5
+ import {before, category, test, expect} from '@datagrok-libraries/utils/src/test';
6
6
  import {ALPHABET, getAlphabet, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
- import {Column} from 'datagrok-api/dg';
8
7
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
8
 
10
9
  category('detectorsBenchmark', () => {
11
-
12
10
  let detectFunc: DG.Func;
13
11
 
14
12
  before(async () => {
@@ -23,38 +21,38 @@ category('detectorsBenchmark', () => {
23
21
  // -- fasta --
24
22
 
25
23
  test('fastaDnaShorts50Few50', async () => {
26
- const et: number = await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 50, 50);
24
+ await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 50, 50);
27
25
  },
28
26
  {skipReason: '#1192'});
29
27
 
30
28
  test('fastaDnaShorts50Many1E6', async () => {
31
- const et: number = await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 50, 1E6);
29
+ await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 50, 1E6);
32
30
  },
33
31
  {skipReason: '#1192'});
34
32
 
35
33
  test('fastaDnaLong1e6Few50', async () => {
36
- const et: number = await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 1E6, 50);
34
+ await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 1E6, 50);
37
35
  },
38
36
  {skipReason: '#1192'});
39
37
 
40
38
  // -- separator --
41
39
 
42
40
  test('separatorDnaShorts50Few50', async () => {
43
- const et: number = await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 50, '/');
41
+ detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 50, '/');
44
42
  }, {skipReason: '#1192'});
45
43
 
46
44
  test('separatorDnaShorts50Many1E6', async () => {
47
- const et: number = await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 1E6, '/');
45
+ detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 1E6, '/');
48
46
  },
49
47
  { /* skipReason: 'slow transmit large dataset to detector' */});
50
48
 
51
49
  test('separatorDnaLong1e6Few50', async () => {
52
- const et: number = await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 1E6, 50, '/');
50
+ detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 1E6, 50, '/');
53
51
  },
54
52
  {skipReason: '#1192'});
55
53
 
56
54
  async function detectMacromoleculeBenchmark(
57
- maxET: number, notation: NOTATION, alphabet: ALPHABET, length: number, count: number, separator?: string
55
+ maxET: number, notation: NOTATION, alphabet: ALPHABET, length: number, count: number, separator?: string,
58
56
  ): Promise<number> {
59
57
  return await benchmark<DG.FuncCall, DG.Column>(10,
60
58
  (): DG.FuncCall => {
@@ -70,48 +68,48 @@ category('detectorsBenchmark', () => {
70
68
  semType: DG.SEMTYPE.MACROMOLECULE,
71
69
  notation: notation,
72
70
  alphabet: alphabet,
73
- separator: separator
71
+ separator: separator,
74
72
  });
75
73
  });
76
74
  }
77
75
 
78
76
  function generate(
79
- notation: NOTATION, alphabet: string[], length: number, count: number, separator?: string
77
+ notation: NOTATION, alphabet: string[], length: number, count: number, separator?: string,
80
78
  ): DG.Column {
81
79
  let seqMerger: (seqMList: string[], separator?: string) => string;
82
80
 
83
81
  switch (notation) {
84
- case NOTATION.FASTA:
85
- seqMerger = (seqMList: string[]): string => {
86
- let res: string = '';
87
- for (let j = 0; j < seqMList.length; j++) {
88
- const m = seqMList[j];
89
- res += m.length == 1 ? m : `[${m}]`;
90
- }
91
- return res;
92
- };
93
- break;
94
- case NOTATION.SEPARATOR:
95
- seqMerger = (seqMList: string[], separator?: string): string => {
96
- return seqMList.join(separator);
97
- };
98
- break;
99
- default:
100
- throw new Error(`Not supported notation '${notation}'.`);
82
+ case NOTATION.FASTA:
83
+ seqMerger = (seqMList: string[]): string => {
84
+ let res: string = '';
85
+ for (let j = 0; j < seqMList.length; j++) {
86
+ const m = seqMList[j];
87
+ res += m.length == 1 ? m : `[${m}]`;
88
+ }
89
+ return res;
90
+ };
91
+ break;
92
+ case NOTATION.SEPARATOR:
93
+ seqMerger = (seqMList: string[], separator?: string): string => {
94
+ return seqMList.join(separator);
95
+ };
96
+ break;
97
+ default:
98
+ throw new Error(`Not supported notation '${notation}'.`);
101
99
  }
102
100
 
103
101
  const buildSeq = (alphabet: string[], length: number): string => {
104
102
  const seqMList = new Array<string>(length);
105
- for (let j = 0; j < length; j++) {
103
+ for (let j = 0; j < length; j++)
106
104
  seqMList[j] = alphabet[Math.floor(Math.random() * alphabet.length)];
107
- }
105
+
108
106
  return seqMerger(seqMList, separator);
109
107
  };
110
108
 
111
109
  const seqList: string[] = Array(count);
112
- for (let i = 0; i < count; i++) {
110
+ for (let i = 0; i < count; i++)
113
111
  seqList[i] = buildSeq(alphabet, length);
114
- }
112
+
115
113
 
116
114
  return DG.Column.fromStrings('seq', seqList);
117
115
  }
@@ -123,13 +121,13 @@ category('detectorsBenchmark', () => {
123
121
  funcCall.callSync();
124
122
  const semType = funcCall.getOutputParamValue();
125
123
 
126
- const col: DG.Column = funcCall.inputs.col;
124
+ const col: DG.Column = funcCall.inputs.col as unknown as DG.Column;
127
125
  if (semType) col.semType = semType;
128
126
  return col;
129
127
  }
130
128
 
131
129
  function checkDetectorRes(col: DG.Column, tgt: TgtType): void {
132
- const uh = new UnitsHandler(col);
130
+ const uh = UnitsHandler.getOrCreate(col);
133
131
  expect(col.semType, tgt.semType);
134
132
  expect(uh.notation, tgt.notation);
135
133
  expect(uh.alphabet, tgt.alphabet);
@@ -138,9 +136,9 @@ category('detectorsBenchmark', () => {
138
136
  });
139
137
 
140
138
 
141
- /** Returns ET [ms] of test() */
139
+ //Returns ET [ms] of test()
142
140
  async function benchmark<TData, TRes>(
143
- maxET: number, prepare: () => TData, test: (data: TData) => Promise<TRes>, check: (res: TRes) => void
141
+ maxET: number, prepare: () => TData, test: (data: TData) => Promise<TRes>, check: (res: TRes) => void,
144
142
  ): Promise<number> {
145
143
  const data: TData = prepare();
146
144
 
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
5
+ import {category, test, expect} from '@datagrok-libraries/utils/src/test';
6
6
 
7
7
  import {importFasta} from '../package';
8
8
  import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
@@ -67,69 +67,56 @@ category('detectors', () => {
67
67
  [csvTests.negSmiles]: string = `col1
68
68
  CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
69
69
  C1CCCCC1
70
- CCCCCC
71
- `;
70
+ CCCCCC`;
72
71
  [csvTests.fastaDna1]: string = `seq
73
72
  ACGTC
74
73
  CAGTGT
75
- TTCAAC
76
- `;
74
+ TTCAAC`;
77
75
  [csvTests.fastaRna1]: string = `seq
78
76
  ACGUC
79
77
  CAGUGU
80
- UUCAAC
81
- `;
78
+ UUCAAC`;
82
79
  /** Pure amino acids sequence */
83
80
  [csvTests.fastaPt1]: string = `seq
84
81
  FWPHEY
85
82
  YNRQWYV
86
- MKPSEYV
87
- `;
83
+ MKPSEYV`;
88
84
  [csvTests.fastaUn]: string = `seq
89
85
  [meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
90
86
  [meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
91
- [Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
92
- `;
87
+ [Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]`;
93
88
  [csvTests.sepDna]: string = `seq
94
89
  A*C*G*T*C
95
90
  C*A*G*T*G*T
96
- T*T*C*A*A*C
97
- `;
91
+ T*T*C*A*A*C`;
98
92
  [csvTests.sepRna]: string = `seq
99
93
  A*C*G*U*C
100
94
  C*A*G*U*G*U
101
- U*U*C*A*A*C
102
- `;
95
+ U*U*C*A*A*C`;
103
96
  [csvTests.sepPt]: string = `seq
104
97
  F-W-P-H-E-Y
105
98
  Y-N-R-Q-W-Y-V
106
- M-K-P-S-E-Y-V
107
- `;
99
+ M-K-P-S-E-Y-V`;
108
100
  [csvTests.sepUn1]: string = `seq
109
101
  abc-dfgg-abc1-cfr3-rty-wert
110
102
  rut12-her2-rty-wert-abc-abc1-dfgg
111
- rut12-rty-her2-abc-cfr3-wert-rut12
112
- `;
103
+ rut12-rty-her2-abc-cfr3-wert-rut12`;
113
104
  [csvTests.sepUn2]: string = `seq
114
105
  abc/dfgg/abc1/cfr3/rty/wert
115
106
  rut12/her2/rty/wert//abc/abc1/dfgg
116
- rut12/rty/her2/abc/cfr3//wert/rut12
117
- `;
107
+ rut12/rty/her2/abc/cfr3//wert/rut12`;
118
108
  [csvTests.sepMsaDna1]: string = `seq
119
109
  A-C--G-T--C-T
120
110
  C-A-C--T--G-T
121
- A-C-C-G-T-A-C-T
122
- `;
111
+ A-C-C-G-T-A-C-T`;
123
112
  [csvTests.fastaMsaDna1]: string = `seq
124
113
  AC-GT-CT
125
114
  CAC-T-GT
126
- ACCGTACT
127
- `;
115
+ ACCGTACT`;
128
116
  [csvTests.fastaMsaPt1]: string = `seq
129
117
  FWR-WYV-KHP
130
118
  YNR-WYV-KHP
131
- MWRSWY-CKHP
132
- `;
119
+ MWRSWY-CKHP`;
133
120
  }();
134
121
 
135
122
  const enum Samples {
@@ -201,7 +188,7 @@ MWRSWY-CKHP
201
188
  return df;
202
189
  }
203
190
 
204
- async function readFileFasta(file: string): Promise<DG.DataFrame> {
191
+ async function _readFileFasta(file: string): Promise<DG.DataFrame> {
205
192
  const txt: string = await grok.dapi.files.readAsText(file);
206
193
  const df: DG.DataFrame = importFasta(txt)[0];
207
194
  return df;
@@ -223,6 +210,8 @@ MWRSWY-CKHP
223
210
  test('Negative2', async () => { await _testNeg(readCsv(csvTests.neg2), 'col1'); });
224
211
  test('Negative3', async () => { await _testNeg(readCsv(csvTests.neg3), 'col1'); });
225
212
  test('NegativeSmiles', async () => { await _testNeg(readCsv(csvTests.negSmiles), 'col1'); });
213
+ test('NegativeStartEnd', async () => { await _testNegList(['START', 'END']); });
214
+ test('NegativeStartEndIntermediate', async () => { await _testNegList(['START', 'END', 'INTERMEDIATE']); });
226
215
 
227
216
  test('FastaDna1', async () => {
228
217
  await _testPos(readCsv(csvTests.fastaDna1), 'seq',
@@ -375,6 +364,15 @@ MWRSWY-CKHP
375
364
  });
376
365
  });
377
366
 
367
+ export async function _testNegList(list: string[]): Promise<void> {
368
+ const col: DG.Column = DG.Column.fromList(DG.TYPE.STRING, 'col1', list);
369
+ const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
370
+ if (col.semType === DG.SEMTYPE.MACROMOLECULE) {
371
+ const msg = `Negative test detected semType='${col.semType}', units='${col.getTag(DG.TAGS.UNITS)}'.`;
372
+ throw new Error(msg);
373
+ }
374
+ }
375
+
378
376
  export async function _testNeg(readDf: DfReaderFunc, colName: string) {
379
377
  const df: DG.DataFrame = await readDf();
380
378
  const col: DG.Column = df.getCol(colName)!;
@@ -386,16 +384,13 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
386
384
  if (col.semType === DG.SEMTYPE.MACROMOLECULE) {
387
385
  const msg = `Negative test detected semType='${col.semType}', units='${col.getTag(DG.TAGS.UNITS)}'.`;
388
386
  throw new Error(msg);
389
- // col.semType = '';
390
- // col.setTag(DG.TAGS.UNITS, '');
391
- // col.setTag(NOTATION.SEPARATOR, '');
392
387
  }
393
388
  }
394
389
 
395
390
  export async function _testPos(
396
391
  readDf: DfReaderFunc, colName: string, units: string,
397
392
  aligned: string | null, alphabet: string | null, alphabetSize: number, alphabetIsMultichar: boolean,
398
- separator: string | null = null
393
+ separator: string | null = null,
399
394
  ) {
400
395
  const df: DG.DataFrame = await readDf();
401
396
  const col: DG.Column = df.col(colName)!;
@@ -411,7 +406,7 @@ export async function _testPos(
411
406
  if (separator)
412
407
  expect(col.getTag(bioTAGS.separator), separator);
413
408
 
414
- const uh = new UnitsHandler(col);
409
+ const uh = UnitsHandler.getOrCreate(col);
415
410
  expect(uh.getAlphabetSize(), alphabetSize);
416
411
  expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
417
412
  if (!uh.isHelm()) {
@@ -427,7 +422,7 @@ class PosCol {
427
422
  public readonly alphabet: string | null,
428
423
  public readonly alphabetSize: number,
429
424
  public readonly alphabetIsMultichar: boolean,
430
- public readonly separator?: string
425
+ public readonly separator?: string,
431
426
  ) { };
432
427
  }
433
428
 
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
5
+ import {category, test} from '@datagrok-libraries/utils/src/test';
6
6
  import {ALIGNMENT, ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
7
  import {_testNeg, _testPos} from './detectors-tests';
8
8
  import {DfReaderFunc} from './types';
@@ -31,61 +31,51 @@ category('detectors:weak-and-likely', () => {
31
31
  1,TTTTT
32
32
  2,TTTTT
33
33
  3,TTTTT
34
- 4,TTTTT
35
- `,
34
+ 4,TTTTT`,
36
35
  [csvTests.fastaDnaWeak1LikelyName]: `id,seq
37
36
  1,TTTTT
38
37
  2,TTTTT
39
38
  3,TTTTT
40
- 4,TTTTT
41
- `,
39
+ 4,TTTTT`,
42
40
  [csvTests.fastaRnaWeak1]: `id,colName
43
41
  1,UUUUU
44
42
  2,UUUUU
45
43
  3,UUUUU
46
- 4,UUUUU
47
- `,
44
+ 4,UUUUU`,
48
45
  [csvTests.fastaRnaWeak1LikelyName]: `id,seq
49
46
  1,UUUUU
50
47
  2,UUUUU
51
48
  3,UUUUU
52
- 4,UUUUU
53
- `,
49
+ 4,UUUUU`,
54
50
  [csvTests.fastaPtWeak1]: `id,colName
55
51
  1,SLSLSPGK
56
52
  2,SLSLSPGK
57
53
  3,SLSLSPGK
58
- 4,SLSLSPGK
59
- `,
54
+ 4,SLSLSPGK`,
60
55
  [csvTests.fastaPtWeak1LikelyName]: `id,seq
61
56
  1,SLSLSPGK
62
57
  2,SLSLSPGK
63
58
  3,SLSLSPGK
64
- 4,SLSLSPGK
65
- `,
59
+ 4,SLSLSPGK`,
66
60
  [csvTests.fastaUn1]: `id,colName
67
61
  1,word
68
62
  2,other
69
63
  3,some
70
- 4,another
71
- `,
64
+ 4,another`,
72
65
  [csvTests.fastaUn1LikelyName]: `id,seq
73
66
  1,word
74
67
  2,other
75
68
  3,some
76
- 4,another
77
- `,
69
+ 4,another`,
78
70
  [csvTests.fastaUn2LikelyName]: `protein
79
71
  Boombastic
80
72
  Megafantastic
81
- "just-a-random-thought,oy!"
82
- `,
73
+ "just-a-random-thought,oy!"`,
83
74
  [csvTests.fastaUnMsa1LikelyName]: `id,seq
84
75
  1,word
85
76
  2,male
86
77
  3,bare
87
- 4,core
88
- `,
78
+ 4,core`,
89
79
  };
90
80
 
91
81
  const readCsv: (key: csvTests) => DfReaderFunc = (key: keyof typeof csvData) => {
@@ -47,7 +47,7 @@ MDYKETLLMP
47
47
  KTDFPMRGGL
48
48
  >3
49
49
  P
50
- `
50
+ `,
51
51
  },
52
52
  [SaveAsFastaTests.test2]: {
53
53
  srcCsv: `id,id2,seq
@@ -66,8 +66,8 @@ KTDFP
66
66
  MRGGL
67
67
  >seqC|3
68
68
  [MeA]
69
- `
70
- }
69
+ `,
70
+ },
71
71
  };
72
72
 
73
73
  test('wrapSequenceSingle', async () => {
@@ -5,7 +5,6 @@ import * as DG from 'datagrok-api/dg';
5
5
 
6
6
  import {category, expectArray, test} from '@datagrok-libraries/utils/src/test';
7
7
  import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
8
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
8
 
10
9
 
11
10
  category('fastaFileHandler', () => {
@@ -71,7 +70,7 @@ YHSPFHN
71
70
  const descriptionsArray = [
72
71
  'description:1', 'description:2', 'description:3', 'description:4',
73
72
  ];
74
- const descriptionCol = DG.Column.fromStrings('description', descriptionsArray);
73
+ const _descriptionCol = DG.Column.fromStrings('description', descriptionsArray);
75
74
 
76
75
  const sequencesArray = [
77
76
  'MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW',
@@ -86,7 +85,7 @@ YHSPFHN
86
85
  const parsedSequencesArray = ffh.sequencesArray;
87
86
  expectArray(
88
87
  [parsedDescriptionsArray, parsedSequencesArray],
89
- [descriptionsArray, sequencesArray]
88
+ [descriptionsArray, sequencesArray],
90
89
  );
91
90
  }
92
91
 
@@ -3,11 +3,9 @@ import * as grok from 'datagrok-api/grok';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
  import * as DG from 'datagrok-api/dg';
5
5
 
6
- import {category, expectArray, test} from '@datagrok-libraries/utils/src/test';
7
- import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
8
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
6
+ import {category} from '@datagrok-libraries/utils/src/test';
9
7
 
10
- category('monomer lib', () => {
8
+ category('monomer lib', () => {
11
9
  // test('monomerManager', async() => {
12
10
  // const df: DG.DataFrame = DG.DataFrame.fromCsv(await _package.files.readAsText('tests/test.csv'));
13
11
  // grok.shell.addTableView(df);
@@ -60,63 +60,67 @@ category('Distance', async () => {
60
60
 
61
61
  test('levenstein-sub', async () => {
62
62
  const df = mmDistanceFunctions[MmDistanceFunctionsNames.LEVENSHTEIN]();
63
- _testDistance(prot1, prot2, df, 1);
63
+ _testDistance(prot1, prot2, df, 0.2);
64
64
  });
65
65
  test('levenstein-del', async () => {
66
66
  const df = mmDistanceFunctions[MmDistanceFunctionsNames.LEVENSHTEIN]();
67
- _testDistance(prot3, prot4, df, 2);
67
+ _testDistance(prot3, prot4, df, 0.4);
68
68
  });
69
69
 
70
70
  test('hamming', async () => {
71
71
  const df = mmDistanceFunctions[MmDistanceFunctionsNames.HAMMING]();
72
- _testDistance(prot3, prot4, df, 3);
72
+ _testDistance(prot3, prot4, df, 0.6);
73
73
  });
74
74
 
75
75
  // Note that here the result is actually an inverted value of alignment score, which is coorelated with distance
76
76
  // tests using default BLOSUM62 matrix are in agreement with the results of the online tool
77
77
  test('needleman-blosum62', async () => {
78
78
  const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH]();
79
- _testDistance(prot1, prot2, df, -35);
79
+ _testDistance(prot1, prot2, df, 0.205);
80
80
  });
81
81
 
82
82
  test('needleman-blosum62-del', async () => {
83
83
  const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH]();
84
- _testDistance(prot3, prot4, df, -14);
84
+ _testDistance(prot3, prot4, df, 0.65);
85
85
  });
86
86
 
87
87
  test('needleman-custom-sub', async () => {
88
88
  const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
89
- {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
89
+ {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1},
90
90
  );
91
- _testDistance(prot1, prot2, df, -4);
91
+ _testDistance(prot1, prot2, df, 0.2);
92
92
  });
93
93
 
94
94
  test('needleman-custom-del', async () => {
95
95
  const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
96
- {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
96
+ {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1},
97
97
  );
98
- _testDistance(prot3, prot4, df, -1);
98
+ _testDistance(prot3, prot4, df, 0.8);
99
99
  });
100
100
 
101
101
  test('needleman-custom-zero-extend', async () => {
102
102
  const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
103
- {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 0}
103
+ {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 0},
104
104
  );
105
- _testDistance(prot5, prot6, df, -2);
105
+ _testDistance(prot5, prot6, df, 0.714);
106
106
  });
107
107
 
108
108
  test('needleman-custom-half-extend', async () => {
109
109
  const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
110
- {scoringMatrix, alphabetIndexes, gapOpen: 2, gapExtend: 1}
110
+ {scoringMatrix, alphabetIndexes, gapOpen: 2, gapExtend: 1},
111
111
  );
112
- _testDistance(prot5, prot6, df, 2);
112
+ _testDistance(prot5, prot6, df, 1.286);
113
113
  });
114
114
 
115
115
  test('needleman-custom-same-extend', async () => {
116
116
  const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
117
- {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
117
+ {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1},
118
118
  );
119
- _testDistance(prot5, prot6, df, 1);
119
+ if (DG.Test.isInBenchmark) {
120
+ const seq1 = Array(10000).fill('FWRY').join('');
121
+ const seq2 = Array(10000).fill('FYWRRY').join('');
122
+ _testDistance(seq1, seq2, df, 0.667);
123
+ } else { _testDistance(prot5, prot6, df, 1.143); }
120
124
  });
121
125
  });
122
126
 
@@ -128,11 +132,15 @@ async function _initMacromoleculeColumn(csv: string): Promise<UnitsHandler> {
128
132
  if (semType)
129
133
  seqCol.semType = semType;
130
134
  await grok.data.detectSemanticTypes(srcDf);
131
- const uh = new UnitsHandler(seqCol);
135
+ const uh = UnitsHandler.getOrCreate(seqCol);
132
136
  return uh;
133
137
  }
134
138
 
135
139
  function _testDistance(seq1: string, seq2: string, df: (a: string, b: string) => number, expected: number) {
136
140
  const d = df(seq1, seq2);
137
- expect(d, expected);
141
+ expect(Number(d.toFixed(3)), Number(expected.toFixed(3)));
142
+ }
143
+
144
+ export function mapToFixed(ar: Float32Array | number[]) {
145
+ return Array.from(ar).map((d) => Number(d.toFixed(3)));
138
146
  }
@@ -29,6 +29,6 @@ category('monomerLibraries', () => {
29
29
 
30
30
  // Currently default monomer lib set is of all files at LIB_PATH (at least HELMCoreLibrary.json)
31
31
  const currentMonomerLib = monomerLibHelper.getBioLib();
32
- expect(currentMonomerLib.getTypes().length > 0, true);
32
+ expect(currentMonomerLib.getPolymerTypes().length > 0, true);
33
33
  });
34
34
  });
@@ -6,6 +6,7 @@ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src
6
6
  import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
7
  import {runKalign} from '../utils/multiple-sequence-alignment';
8
8
  import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
9
+ import {awaitContainerStart} from './utils';
9
10
  //import * as grok from 'datagrok-api/grok';
10
11
 
11
12
  export const _package = new DG.Package();
@@ -75,31 +76,33 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
75
76
 
76
77
  test('isCorrect', async () => {
77
78
  await _testMsaIsCorrect(fromCsv, toCsv);
78
- });
79
+ }, {skipReason: 'GROK-13221'});
79
80
 
80
81
  test('isCorrectLong', async () => {
81
82
  await _testMsaIsCorrect(longFromCsv, longToCsv);
82
- });
83
+ }, {skipReason: 'GROK-13221'});
83
84
 
84
85
  test('isCorrectHelm', async () => {
86
+ await awaitContainerStart();
85
87
  await _testMSAOnColumn(helmFromCsv, helmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
86
- }, {skipReason: 'GROK-13053'});
88
+ }, {skipReason: 'GROK-13221'});
87
89
 
88
90
  test('isCorrectHelmLong', async () => {
91
+ await awaitContainerStart();
89
92
  await _testMSAOnColumn(longHelmFromCsv, longHelmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
90
- }, {skipReason: 'GROK-13053'});
93
+ }, {skipReason: 'GROK-13221'});
91
94
 
92
95
  test('isCorrectSeparator', async () => {
93
96
  await _testMSAOnColumn(
94
- SeparatorFromCsv, SeparatorToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT
97
+ SeparatorFromCsv, SeparatorToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT,
95
98
  );
96
- });
99
+ }, {skipReason: 'GROK-13221'});
97
100
 
98
101
  test('isCorrectSeparatorLong', async () => {
99
102
  await _testMSAOnColumn(
100
- SeparatorLongFromCsv, SeparatorLongToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT
103
+ SeparatorLongFromCsv, SeparatorLongToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT,
101
104
  );
102
- });
105
+ }, {skipReason: 'GROK-13221'});
103
106
  });
104
107
 
105
108
  async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void> {
@@ -119,7 +122,7 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
119
122
 
120
123
  async function _testMSAOnColumn(
121
124
  srcCsv: string, tgtCsv: string,
122
- srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, pepseaMethod?: string
125
+ srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, pepseaMethod?: string,
123
126
  ): Promise<void> {
124
127
  const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
125
128
  const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
@@ -2,6 +2,7 @@ import * as DG from 'datagrok-api/dg';
2
2
 
3
3
  import {category, expect, test} from '@datagrok-libraries/utils/src/test';
4
4
  import {runPepsea} from '../utils/pepsea';
5
+ import {awaitContainerStart} from './utils';
5
6
 
6
7
  category('PepSeA', () => {
7
8
  const testCsv = `HELM,MSA
@@ -12,10 +13,12 @@ category('PepSeA', () => {
12
13
  "PEPTIDE1{F.V.R.G.Y.[MeF].Y.W.S.N.C}$$$$","F.V.R.G.Y.MeF.Y.W.S..N.C"`;
13
14
 
14
15
  test('Basic alignment', async () => {
16
+ await awaitContainerStart();
15
17
  const table = DG.DataFrame.fromCsv(testCsv);
16
18
  const alignedCol = await runPepsea(table.getCol('HELM'), 'msa(HELM)');
19
+ expect(alignedCol !== null, true, 'PepSeA conainter has not started');
17
20
  const alignedTestCol = table.getCol('MSA');
18
- for (let i = 0; i < alignedCol.length; ++i)
19
- expect(alignedCol.get(i) == alignedTestCol.get(i), true);
20
- }, {skipReason: 'GROK-12764'});
21
+ for (let i = 0; i < alignedCol!.length; ++i)
22
+ expect(alignedCol!.get(i) == alignedTestCol.get(i), true);
23
+ }, {skipReason: 'GROK-13221'});
21
24
  });