@datagrok/bio 2.15.13 → 2.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/detectors.js +16 -11
  3. package/dist/455.js.map +1 -1
  4. package/dist/980.js +1 -1
  5. package/dist/980.js.map +1 -1
  6. package/dist/package-test.js +6 -6
  7. package/dist/package-test.js.map +1 -1
  8. package/dist/package.js +3 -3
  9. package/dist/package.js.map +1 -1
  10. package/package.json +14 -14
  11. package/src/analysis/sequence-activity-cliffs.ts +9 -8
  12. package/src/analysis/sequence-diversity-viewer.ts +6 -4
  13. package/src/analysis/sequence-similarity-viewer.ts +9 -6
  14. package/src/analysis/sequence-space.ts +3 -2
  15. package/src/calculations/monomerLevelMols.ts +4 -5
  16. package/src/demo/bio01-similarity-diversity.ts +4 -1
  17. package/src/package-test.ts +1 -1
  18. package/src/package-types.ts +34 -2
  19. package/src/package.ts +60 -76
  20. package/src/substructure-search/substructure-search.ts +15 -9
  21. package/src/tests/WebLogo-layout-tests.ts +1 -1
  22. package/src/tests/WebLogo-positions-test.ts +11 -5
  23. package/src/tests/WebLogo-project-tests.ts +1 -1
  24. package/src/tests/activity-cliffs-utils.ts +11 -14
  25. package/src/tests/bio-tests.ts +85 -79
  26. package/src/tests/checkInputColumn-tests.ts +15 -10
  27. package/src/tests/converters-test.ts +12 -5
  28. package/src/tests/detectors-benchmark-tests.ts +5 -2
  29. package/src/tests/detectors-tests.ts +51 -44
  30. package/src/tests/detectors-weak-and-likely-tests.ts +12 -5
  31. package/src/tests/fasta-export-tests.ts +13 -5
  32. package/src/tests/helm-tests.ts +85 -0
  33. package/src/tests/mm-distance-tests.ts +14 -7
  34. package/src/tests/monomer-libraries-tests.ts +1 -1
  35. package/src/tests/msa-tests.ts +33 -24
  36. package/src/tests/renderers-monomer-placer-tests.ts +2 -5
  37. package/src/tests/renderers-test.ts +15 -9
  38. package/src/tests/scoring.ts +9 -6
  39. package/src/tests/seq-handler-get-helm-tests.ts +7 -5
  40. package/src/tests/seq-handler-get-region-tests.ts +9 -3
  41. package/src/tests/seq-handler-splitted-tests.ts +11 -5
  42. package/src/tests/seq-handler-tests.ts +17 -10
  43. package/src/tests/sequence-space-utils.ts +9 -4
  44. package/src/tests/splitters-test.ts +5 -4
  45. package/src/tests/substructure-filters-tests.ts +22 -23
  46. package/src/tests/to-atomic-level-tests.ts +5 -3
  47. package/src/tests/to-atomic-level-ui-tests.ts +4 -1
  48. package/src/tests/utils/detectors-utils.ts +4 -4
  49. package/src/utils/calculate-scores.ts +11 -9
  50. package/src/utils/cell-renderer-custom.ts +27 -17
  51. package/src/utils/cell-renderer.ts +14 -8
  52. package/src/utils/check-input-column.ts +13 -9
  53. package/src/utils/context-menu.ts +4 -4
  54. package/src/utils/convert.ts +21 -14
  55. package/src/utils/get-region-func-editor.ts +8 -5
  56. package/src/utils/get-region.ts +4 -5
  57. package/src/utils/helm-to-molfile/converter/helm.ts +4 -4
  58. package/src/utils/helm-to-molfile/utils.ts +5 -6
  59. package/src/utils/macromolecule-column-widget.ts +6 -7
  60. package/src/utils/monomer-cell-renderer-base.ts +8 -1
  61. package/src/utils/monomer-lib/lib-manager.ts +3 -2
  62. package/src/utils/monomer-lib/monomer-colors.ts +10 -10
  63. package/src/utils/monomer-lib/monomer-lib-base.ts +6 -1
  64. package/src/utils/monomer-lib/monomer-lib.ts +15 -9
  65. package/src/utils/multiple-sequence-alignment-ui.ts +30 -30
  66. package/src/utils/save-as-fasta.ts +19 -12
  67. package/src/utils/seq-helper/seq-handler.ts +836 -0
  68. package/src/utils/seq-helper/seq-helper.ts +43 -19
  69. package/src/utils/sequence-to-mol.ts +7 -8
  70. package/src/utils/split-to-monomers.ts +7 -2
  71. package/src/utils/types.ts +8 -7
  72. package/src/utils/ui-utils.ts +2 -2
  73. package/src/viewers/web-logo-viewer.ts +18 -16
  74. package/src/widgets/bio-substructure-filter-helm.ts +5 -2
  75. package/src/widgets/bio-substructure-filter.ts +14 -24
  76. package/src/widgets/composition-analysis-widget.ts +6 -6
  77. package/src/widgets/representations.ts +7 -4
  78. package/src/tests/detectors-custom-notation-tests.ts +0 -37
  79. package/src/utils/cyclized.ts +0 -89
  80. package/src/utils/dimerized.ts +0 -10
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {category, test, expect, expectObject, expectArray} from '@datagrok-libraries/utils/src/test';
5
+ import {category, test, expect, expectObject, expectArray, before} from '@datagrok-libraries/utils/src/test';
6
6
  import {
7
7
  NOTATION, getAlphabetSimilarity, monomerToShort, pickUpPalette, splitterAsFasta, splitterAsHelm,
8
8
  } from '@datagrok-libraries/bio/src/utils/macromolecule';
@@ -11,11 +11,18 @@ import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
11
11
  import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
12
12
  import {getStatsForCol} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
13
13
  import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
14
+ import {ISeqHelper, getSeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
14
15
 
15
16
  /** GAP_SYMBOL */
16
17
  const g: string = GAP_SYMBOL;
17
18
 
18
19
  category('bio', () => {
20
+ let seqHelper: ISeqHelper;
21
+
22
+ before(async () => {
23
+ seqHelper = await getSeqHelper();
24
+ });
25
+
19
26
  const csvDfN1: string = `seq
20
27
  ACGTCT
21
28
  CAGTGT
@@ -74,6 +81,83 @@ PEPTIDE1{meI}$$$$`;
74
81
  test('testPickupPaletteN1e', async () => { await _testPickupPaletteN1e(csvDfN1e); });
75
82
  test('testPickupPaletteAA1', async () => { await _testPickupPaletteAA1(csvDfAA1); });
76
83
  test('testPickupPaletteX', async () => { await _testPickupPaletteX(csvDfX); });
84
+
85
+ function _testGetStats(csvDfN1: string) {
86
+ const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
87
+ const seqCol: DG.Column = dfN1.col('seq')!;
88
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
89
+ seqCol.meta.units = NOTATION.FASTA;
90
+ const stats = getStatsForCol(seqCol, 5, splitterAsFasta);
91
+
92
+ expectObject(stats.freq, {
93
+ 'A': 4,
94
+ 'C': 5,
95
+ 'G': 3,
96
+ 'T': 6,
97
+ });
98
+ expect(stats.sameLength, true);
99
+ }
100
+
101
+ async function _testGetAlphabetSimilarity() {
102
+ const freq: { [m: string]: number } = {
103
+ 'A': 2041,
104
+ 'C': 3015,
105
+ 'G': 3015,
106
+ 'T': 2048,
107
+ [g]: 1000,
108
+ };
109
+ const alphabet: Set<string> = new Set(Object.keys(Nucleotides.Names));
110
+ const res = getAlphabetSimilarity(freq, alphabet);
111
+
112
+ expect(res > 0.6, true);
113
+ }
114
+
115
+ async function _testPickupPaletteN1(csvDfN1: string) {
116
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
117
+ const col: DG.Column = df.col('seq')!;
118
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
119
+ col.meta.units = NOTATION.FASTA;
120
+ const cp = pickUpPalette(col, seqHelper);
121
+
122
+ expect(cp instanceof NucleotidesPalettes, true);
123
+ }
124
+
125
+ async function _testPickupPaletteN1e(csvDfN1e: string) {
126
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
127
+ const col: DG.Column = df.col('seq')!;
128
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
129
+ col.meta.units = NOTATION.FASTA;
130
+ const cp = pickUpPalette(col, seqHelper);
131
+
132
+ expect(cp instanceof NucleotidesPalettes, true);
133
+ }
134
+
135
+ async function _testPickupPaletteAA1(csvDfAA1: string) {
136
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
137
+ const col: DG.Column = df.col('seq')!;
138
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
139
+ col.meta.units = NOTATION.FASTA;
140
+ const cp = pickUpPalette(col, seqHelper);
141
+
142
+ expect(cp instanceof AminoacidsPalettes, true);
143
+ }
144
+
145
+ async function _testPickupPaletteX(csvDfX: string) {
146
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
147
+ const col: DG.Column = df.col('seq')!;
148
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
149
+ col.meta.units = NOTATION.FASTA;
150
+ const cp = pickUpPalette(col, seqHelper);
151
+
152
+ expect(cp instanceof UnknownSeqPalette, true);
153
+ }
154
+
155
+ async function _testPickupPaletteAA2(dfAA2: DG.DataFrame) {
156
+ const seqCol: DG.Column = dfAA2.col('seq')!;
157
+ const cp = pickUpPalette(seqCol, seqHelper);
158
+
159
+ expect(cp instanceof AminoacidsPalettes, true);
160
+ }
77
161
  });
78
162
 
79
163
  category('WebLogo.monomerToShort', () => {
@@ -123,81 +207,3 @@ category('WebLogo.monomerToShort', () => {
123
207
  expectArray(res, tgt);
124
208
  });
125
209
  });
126
-
127
-
128
- export async function _testGetStats(csvDfN1: string) {
129
- const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
130
- const seqCol: DG.Column = dfN1.col('seq')!;
131
- seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
132
- seqCol.meta.units = NOTATION.FASTA;
133
- const stats = getStatsForCol(seqCol, 5, splitterAsFasta);
134
-
135
- expectObject(stats.freq, {
136
- 'A': 4,
137
- 'C': 5,
138
- 'G': 3,
139
- 'T': 6,
140
- });
141
- expect(stats.sameLength, true);
142
- }
143
-
144
- export async function _testGetAlphabetSimilarity() {
145
- const freq: { [m: string]: number } = {
146
- 'A': 2041,
147
- 'C': 3015,
148
- 'G': 3015,
149
- 'T': 2048,
150
- [g]: 1000,
151
- };
152
- const alphabet: Set<string> = new Set(Object.keys(Nucleotides.Names));
153
- const res = getAlphabetSimilarity(freq, alphabet);
154
-
155
- expect(res > 0.6, true);
156
- }
157
-
158
- export async function _testPickupPaletteN1(csvDfN1: string) {
159
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
160
- const col: DG.Column = df.col('seq')!;
161
- col.semType = DG.SEMTYPE.MACROMOLECULE;
162
- col.meta.units = NOTATION.FASTA;
163
- const cp = pickUpPalette(col);
164
-
165
- expect(cp instanceof NucleotidesPalettes, true);
166
- }
167
-
168
- export async function _testPickupPaletteN1e(csvDfN1e: string) {
169
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
170
- const col: DG.Column = df.col('seq')!;
171
- col.semType = DG.SEMTYPE.MACROMOLECULE;
172
- col.meta.units = NOTATION.FASTA;
173
- const cp = pickUpPalette(col);
174
-
175
- expect(cp instanceof NucleotidesPalettes, true);
176
- }
177
-
178
- export async function _testPickupPaletteAA1(csvDfAA1: string) {
179
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
180
- const col: DG.Column = df.col('seq')!;
181
- col.semType = DG.SEMTYPE.MACROMOLECULE;
182
- col.meta.units = NOTATION.FASTA;
183
- const cp = pickUpPalette(col);
184
-
185
- expect(cp instanceof AminoacidsPalettes, true);
186
- }
187
-
188
- export async function _testPickupPaletteX(csvDfX: string) {
189
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
190
- const col: DG.Column = df.col('seq')!;
191
- col.semType = DG.SEMTYPE.MACROMOLECULE;
192
- col.meta.units = NOTATION.FASTA;
193
- const cp = pickUpPalette(col);
194
-
195
- expect(cp instanceof UnknownSeqPalette, true);
196
- }
197
-
198
- export async function _testPickupPaletteAA2(dfAA2: DG.DataFrame) {
199
- const seqCol: DG.Column = dfAA2.col('seq')!;
200
- const cp = pickUpPalette(seqCol);
201
-
202
- expect(cp instanceof AminoacidsPalettes, true);
203
- }
@@ -2,12 +2,20 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {category, test, expect} from '@datagrok-libraries/utils/src/test';
5
+ import {category, test, expect, before} from '@datagrok-libraries/utils/src/test';
6
6
 
7
7
  import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
+ import {getSeqHelper, ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
9
+
8
10
  import {checkInputColumn} from '../utils/check-input-column';
9
11
 
10
12
  category('checkInputColumn', () => {
13
+ let seqHelper: ISeqHelper;
14
+
15
+ before(async () => {
16
+ seqHelper = await getSeqHelper();
17
+ });
18
+
11
19
  const csv = `seq
12
20
  seq1,
13
21
  seq2,
@@ -22,9 +30,8 @@ seq4`;
22
30
  col.setTag(bioTAGS.alphabet, ALPHABET.DNA);
23
31
  col.setTag(bioTAGS.aligned, 'SEQ');
24
32
 
25
- const [res, _msg]: [boolean, string] = checkInputColumn(
26
- col, 'Test', [NOTATION.FASTA],
27
- [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
33
+ const [res, _msg]: [boolean, string] = checkInputColumn(col, 'Test', seqHelper,
34
+ [NOTATION.FASTA], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
28
35
 
29
36
  expect(res, true);
30
37
  });
@@ -37,9 +44,8 @@ seq4`;
37
44
  // col.setTag(bio.TAGS.alphabetSize, '11');
38
45
  col.setTag(bioTAGS.alphabetIsMultichar, 'true');
39
46
 
40
- const [res, _msg]: [boolean, string] = checkInputColumn(
41
- col, 'Test', [NOTATION.FASTA],
42
- [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
47
+ const [res, _msg]: [boolean, string] = checkInputColumn(col, 'Test', seqHelper,
48
+ [NOTATION.FASTA], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
43
49
 
44
50
  expect(res, false);
45
51
  });
@@ -54,9 +60,8 @@ seq4`;
54
60
  col.setTag(bioTAGS.alphabetIsMultichar, 'true');
55
61
  col.setTag(bioTAGS.aligned, 'SEQ');
56
62
 
57
- const [res, _msg]: [boolean, string] = checkInputColumn(
58
- col, 'Test', [NOTATION.FASTA],
59
- [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
63
+ const [res, _msg]: [boolean, string] = checkInputColumn(col, 'Test', seqHelper,
64
+ [NOTATION.FASTA], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
60
65
 
61
66
  expect(res, false);
62
67
  });
@@ -1,14 +1,21 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
 
4
- import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
4
+ import {before, category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
5
5
  import {NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
- import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
6
+ import {ISeqHelper, getSeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
7
+ import {ISeqHandler} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
7
8
 
8
9
  import {ConverterFunc} from './types';
9
10
 
10
11
 
11
12
  category('converters', () => {
13
+ let seqHelper: ISeqHelper;
14
+
15
+ before(async () => {
16
+ seqHelper = await getSeqHelper();
17
+ });
18
+
12
19
  enum Samples {
13
20
  fastaPt = 'fastaPt',
14
21
  separatorPt = 'separatorPt',
@@ -133,7 +140,7 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
133
140
  throw new Error(`Argument 'separator' is mandatory for target notation '${tgtNotation.toString()}'.`);
134
141
 
135
142
  return function(srcCol: DG.Column): DG.Column {
136
- const converterSh = SeqHandler.forColumn(srcCol);
143
+ const converterSh = seqHelper.getSeqHandler(srcCol);
137
144
  const resCol = converterSh.convert(tgtNotation, tgtSeparator);
138
145
  expect(resCol.meta.units, tgtNotation);
139
146
  return resCol;
@@ -152,8 +159,8 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
152
159
  const tgtCol: DG.Column = tgtDf.getCol('seq');
153
160
 
154
161
  expectArray(resCol.toList(), tgtCol.toList());
155
- const srcSh: SeqHandler = SeqHandler.forColumn(srcCol);
156
- const resSh: SeqHandler = SeqHandler.forColumn(resCol);
162
+ const srcSh: ISeqHandler = seqHelper.getSeqHandler(srcCol);
163
+ const resSh: ISeqHandler = seqHelper.getSeqHandler(resCol);
157
164
  for (const [tagName, tgtTagValue] of Object.entries(tgtCol.tags)) {
158
165
  if (
159
166
  !bioTagsSet.has(tagName) ||
@@ -4,14 +4,17 @@ import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import {before, category, test, expect} from '@datagrok-libraries/utils/src/test';
6
6
  import {ALPHABET, getAlphabet, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
- import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
7
+ import {ISeqHelper, getSeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
8
8
 
9
9
  import {_package} from '../package-test';
10
10
 
11
+
11
12
  category('detectorsBenchmark', () => {
13
+ let seqHelper: ISeqHelper;
12
14
  let detectFunc: DG.Func;
13
15
 
14
16
  before(async () => {
17
+ seqHelper = await getSeqHelper();
15
18
  const funcList: DG.Func[] = DG.Func.find({package: 'Bio', name: 'detectMacromolecule'});
16
19
  detectFunc = funcList[0];
17
20
 
@@ -125,7 +128,7 @@ category('detectorsBenchmark', () => {
125
128
  }
126
129
 
127
130
  function checkDetectorRes(col: DG.Column, tgt: TgtType): void {
128
- const sh = SeqHandler.forColumn(col);
131
+ const sh = seqHelper.getSeqHandler(col);
129
132
  expect(col.semType === tgt.semType, true);
130
133
  expect(sh.notation === tgt.notation, true);
131
134
  expect(sh.alphabet === tgt.alphabet, true);
@@ -2,13 +2,12 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {category, test, expect} from '@datagrok-libraries/utils/src/test';
5
+ import {category, test, expect, before} from '@datagrok-libraries/utils/src/test';
6
6
 
7
7
  import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
- import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
8
+ import {ISeqHelper, getSeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
9
9
 
10
10
  import {_testNeg, _testPos, DetectorTestData, DfReaderFunc, PosCol} from './utils/detectors-utils';
11
- import {importFasta} from '../package';
12
11
 
13
12
  /*
14
13
  // snippet to list df columns of semType='Macromolecule' (false positive)
@@ -22,6 +21,12 @@ for (let i = 0; i < df.columns.length; i++) {
22
21
  */
23
22
 
24
23
  category('detectors', () => {
24
+ let seqHelper: ISeqHelper;
25
+
26
+ before(async () => {
27
+ seqHelper = await getSeqHelper();
28
+ });
29
+
25
30
  const enum csvTests {
26
31
  fastaDna1 = 'csvFastaDna1',
27
32
  fastaRna1 = 'fastaRna1',
@@ -177,7 +182,7 @@ PEPTIDE1{Ad(1).S.W.Y.C.K.H.P.M.W.A.A.A.A.C(1)-G-NH2}$$$$`,
177
182
  for (const negColName of testData.neg ?? [])
178
183
  await _testNeg(reader, negColName);
179
184
  for (const [posColName, posCol] of Object.entries(testData.pos ?? {})) {
180
- await _testPos(reader, posColName, posCol.units, posCol.aligned,
185
+ await _testPos(reader, posColName, seqHelper, posCol.units, posCol.aligned,
181
186
  posCol.alphabet, posCol.alphabetSize, posCol.alphabetIsMultichar, posCol.separator);
182
187
  }
183
188
  });
@@ -318,7 +323,7 @@ MWRSWY-CKHPMWRSWY-CKHP`;
318
323
 
319
324
  async function _readFileFasta(file: string): Promise<DG.DataFrame> {
320
325
  const txt: string = await grok.dapi.files.readAsText(file);
321
- const df: DG.DataFrame = importFasta(txt)[0];
326
+ const df: DG.DataFrame = (await grok.functions.call('Bio.importFasta', {fileContent: txt}))[0] as DG.DataFrame;
322
327
  return df;
323
328
  }
324
329
 
@@ -336,77 +341,77 @@ MWRSWY-CKHPMWRSWY-CKHP`;
336
341
  test('NegativeStartEndIntermediate', async () => { await _testNegList(['START', 'END', 'INTERMEDIATE']); });
337
342
 
338
343
  test('FastaDna1', async () => {
339
- await _testPos(readCsv(csvTests.fastaDna1), 'seq',
344
+ await _testPos(readCsv(csvTests.fastaDna1), 'seq', seqHelper,
340
345
  NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false);
341
346
  });
342
347
  test('FastaRna1', async () => {
343
- await _testPos(readCsv(csvTests.fastaRna1), 'seq',
348
+ await _testPos(readCsv(csvTests.fastaRna1), 'seq', seqHelper,
344
349
  NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false);
345
350
  });
346
351
  test('FastaPt1', async () => {
347
- await _testPos(readCsv(csvTests.fastaPt1), 'seq',
352
+ await _testPos(readCsv(csvTests.fastaPt1), 'seq', seqHelper,
348
353
  NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
349
354
  });
350
- test('FastaPtGaps', () => _testPosList(['FW-PH-EYY', 'FYNRQWYV-', 'FKP-Q-SEYV'],
355
+ test('FastaPtGaps', () => _testPosList(['FW-PH-EYY', 'FYNRQWYV-', 'FKP-Q-SEYV'], seqHelper,
351
356
  NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false));
352
- test('FastaPtGapsMsa', () => _testPosList(['FW-PH-EYY', 'FYNRQWYV-', 'FKP-Q-SEY'],
357
+ test('FastaPtGapsMsa', () => _testPosList(['FW-PH-EYY', 'FYNRQWYV-', 'FKP-Q-SEY'], seqHelper,
353
358
  NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false));
354
359
 
355
360
  test('FastaUn', async () => {
356
- await _testPos(readCsv(csvTests.fastaUn), 'seq',
361
+ await _testPos(readCsv(csvTests.fastaUn), 'seq', seqHelper,
357
362
  NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 12, true);
358
363
  });
359
364
 
360
365
  test('FastaMsaDna1', async () => {
361
- await _testPos(readCsv(csvTests.fastaMsaDna1), 'seq',
366
+ await _testPos(readCsv(csvTests.fastaMsaDna1), 'seq', seqHelper,
362
367
  NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false);
363
368
  });
364
369
 
365
370
  test('FastaMsaPt1', async () => {
366
- await _testPos(readCsv(csvTests.fastaMsaPt1), 'seq',
371
+ await _testPos(readCsv(csvTests.fastaMsaPt1), 'seq', seqHelper,
367
372
  NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false);
368
373
  });
369
374
 
370
375
  test('SepDna', async () => {
371
- await _testPos(readCsv(csvTests.sepDna), 'seq',
376
+ await _testPos(readCsv(csvTests.sepDna), 'seq', seqHelper,
372
377
  NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false, '*');
373
378
  });
374
379
  test('SepRna', async () => {
375
- await _testPos(readCsv(csvTests.sepRna), 'seq',
380
+ await _testPos(readCsv(csvTests.sepRna), 'seq', seqHelper,
376
381
  NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false, '*');
377
382
  });
378
383
  test('SepPt', async () => {
379
- await _testPos(readCsv(csvTests.sepPt), 'seq',
384
+ await _testPos(readCsv(csvTests.sepPt), 'seq', seqHelper,
380
385
  NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.PT, 20, false, '-');
381
386
  });
382
387
  test('SepUn1', async () => {
383
- await _testPos(readCsv(csvTests.sepUn1), 'seq',
388
+ await _testPos(readCsv(csvTests.sepUn1), 'seq', seqHelper,
384
389
  NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 8, true, '-');
385
390
  });
386
391
  test('SepUn2', async () => {
387
- await _testPos(readCsv(csvTests.sepUn2), 'seq',
392
+ await _testPos(readCsv(csvTests.sepUn2), 'seq', seqHelper,
388
393
  NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 9, true, '/');
389
394
  });
390
395
 
391
396
  test('SepMsaN1', async () => {
392
- await _testPos(readCsv(csvTests.sepMsaDna1), 'seq',
397
+ await _testPos(readCsv(csvTests.sepMsaDna1), 'seq', seqHelper,
393
398
  NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false, '-');
394
399
  });
395
400
 
396
401
  test('SepMsaUnWEmpty', async () => {
397
- await _testPos(readCsv(csvTests.sepMsaUnWEmpty), 'seq',
402
+ await _testPos(readCsv(csvTests.sepMsaUnWEmpty), 'seq', seqHelper,
398
403
  NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 14, true);
399
404
  });
400
405
 
401
406
  test('SepComplex', async () => {
402
- await _testPos(readCsv(csvTests.sepComplex), 'seq',
407
+ await _testPos(readCsv(csvTests.sepComplex), 'seq', seqHelper,
403
408
  NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 17, true);
404
409
  });
405
410
 
406
411
  test('samplesFastaCsv', async () => {
407
412
  await _testDf(readSamples(Samples.fastaCsv), {
408
413
  'Sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
409
- });
414
+ }, seqHelper);
410
415
  });
411
416
 
412
417
  // test('samplesFastaFasta', async () => {
@@ -420,27 +425,27 @@ MWRSWY-CKHPMWRSWY-CKHP`;
420
425
  // await _testPos(readSamples(Samples.peptidesComplex), 'AlignedSequence', 'separator:SEQ:UN', '-');
421
426
  // });
422
427
  test('samplesPeptidesComplex', async () => {
423
- await _testDf(readSamples(Samples.peptidesComplex), {} /* no positive */);
428
+ await _testDf(readSamples(Samples.peptidesComplex), {} /* no positive */, seqHelper);
424
429
  });
425
430
 
426
431
  test('samplesMsaComplex', async () => {
427
432
  await _testDf(readSamples(Samples.msaComplex), {
428
433
  'MSA': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 161, true, '/'),
429
- });
434
+ }, seqHelper);
430
435
  });
431
436
 
432
437
  test('samplesIdCsv', async () => {
433
- await _testDf(readSamples(Samples.testIdCsv), {} /* no positive */);
438
+ await _testDf(readSamples(Samples.testIdCsv), {} /* no positive */, seqHelper);
434
439
  });
435
440
 
436
441
  test('samplesSarSmallCsv', async () => {
437
- await _testDf(readSamples(Samples.testSmilesCsv), {} /* nopositive */);
442
+ await _testDf(readSamples(Samples.testSmilesCsv), {} /* nopositive */, seqHelper);
438
443
  });
439
444
 
440
445
  test('samplesHelmCsv', async () => {
441
446
  await _testDf(readSamples(Samples.helmCsv), {
442
447
  'HELM': new PosCol(NOTATION.HELM, null, null, 160, true),
443
- });
448
+ }, seqHelper);
444
449
  });
445
450
 
446
451
  // sample_testHelm.csv
@@ -448,57 +453,57 @@ MWRSWY-CKHPMWRSWY-CKHP`;
448
453
  test('samplesTestHelmCsv', async () => {
449
454
  await _testDf(readSamples(Samples.testHelmCsv), {
450
455
  'HELM string': new PosCol(NOTATION.HELM, null, null, 9, true),
451
- });
456
+ }, seqHelper);
452
457
  });
453
458
 
454
459
  test('samplesTestDemogCsv', async () => {
455
- await _testDf(readSamples(Samples.testDemogCsv), {} /* no positive */);
460
+ await _testDf(readSamples(Samples.testDemogCsv), {} /* no positive */, seqHelper);
456
461
  });
457
462
 
458
463
  test('samplesTestSmiles2Csv', async () => {
459
- await _testDf(readSamples(Samples.testSmiles2Csv), {} /* no positive */);
464
+ await _testDf(readSamples(Samples.testSmiles2Csv), {} /* no positive */, seqHelper);
460
465
  });
461
466
 
462
467
  test('samplesTestSmilesShort', async () => {
463
- await _testDf(readSamples(Samples.testSmilesShort), {} /* no positive */);
468
+ await _testDf(readSamples(Samples.testSmilesShort), {} /* no positive */, seqHelper);
464
469
  });
465
470
 
466
471
  test('samplesTestActivityCliffsNegativeSmiles', async () => {
467
- await _testDf(readSamples(Samples.testActivityCliffsCsv), {} /* no positive */);
472
+ await _testDf(readSamples(Samples.testActivityCliffsCsv), {} /* no positive */, seqHelper);
468
473
  });
469
474
 
470
475
  test('samplesFastaPtCsv', async () => {
471
476
  await _testDf(readSamples(Samples.fastaPtCsv), {
472
477
  'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
473
- });
478
+ }, seqHelper);
474
479
  });
475
480
 
476
481
  test('samplesTestCerealCsv', async () => {
477
- await _testDf(readSamples(Samples.testCerealCsv), {} /* no positive */);
482
+ await _testDf(readSamples(Samples.testCerealCsv), {} /* no positive */, seqHelper);
478
483
  });
479
484
 
480
485
  test('samplesTestUnichemSources', async () => {
481
- await _testDf(readSamples(Samples.testUnichemSources), {} /* no positive */);
486
+ await _testDf(readSamples(Samples.testUnichemSources), {} /* no positive */, seqHelper);
482
487
  });
483
488
 
484
489
  test('samplesTestDmvOffices', async () => {
485
- await _testDf(readSamples(Samples.testDmvOffices), {} /* no positive */);
490
+ await _testDf(readSamples(Samples.testDmvOffices), {} /* no positive */, seqHelper);
486
491
  });
487
492
 
488
493
  test('samplesTestAlertCollection', async () => {
489
- await _testDf(readSamples(Samples.testAlertCollection), {} /* no positive */);
494
+ await _testDf(readSamples(Samples.testAlertCollection), {} /* no positive */, seqHelper);
490
495
  });
491
496
 
492
497
  test('samplesTestSpgi', async () => {
493
- await _testDf(readSamples(Samples.testSpgi), {} /* no positive */);
498
+ await _testDf(readSamples(Samples.testSpgi), {} /* no positive */, seqHelper);
494
499
  });
495
500
 
496
501
  test('samplesTestSpgi100', async () => {
497
- await _testDf(readSamples(Samples.testSpgi100), {} /* no positive */);
502
+ await _testDf(readSamples(Samples.testSpgi100), {} /* no positive */, seqHelper);
498
503
  });
499
504
 
500
505
  test('samplesTestUrl', async () => {
501
- await _testDf(readSamples(Samples.testUrl), {} /* no positive */);
506
+ await _testDf(readSamples(Samples.testUrl), {} /* no positive */, seqHelper);
502
507
  });
503
508
  });
504
509
 
@@ -511,7 +516,7 @@ export async function _testNegList(list: string[]): Promise<void> {
511
516
  }
512
517
  }
513
518
 
514
- export async function _testPosList(list: string[], units: NOTATION,
519
+ export async function _testPosList(list: string[], seqHelper: ISeqHelper, units: NOTATION,
515
520
  aligned: ALIGNMENT, alphabet: ALPHABET, alphabetSize: number, alphabetIsMultichar: boolean,
516
521
  separator: string | null = null
517
522
  ): Promise<void> {
@@ -527,7 +532,7 @@ export async function _testPosList(list: string[], units: NOTATION,
527
532
  if (separator)
528
533
  expect(col.getTag(bioTAGS.separator), separator);
529
534
 
530
- const sh = SeqHandler.forColumn(col);
535
+ const sh = seqHelper.getSeqHandler(col);
531
536
  expect(sh.getAlphabetSize(), alphabetSize);
532
537
  expect(sh.getAlphabetIsMultichar(), alphabetIsMultichar);
533
538
  if (!sh.isHelm()) {
@@ -536,14 +541,16 @@ export async function _testPosList(list: string[], units: NOTATION,
536
541
  }
537
542
  }
538
543
 
539
- export async function _testDf(readDf: DfReaderFunc, posCols: { [colName: string]: PosCol }): Promise<void> {
544
+ export async function _testDf(
545
+ readDf: DfReaderFunc, posCols: { [colName: string]: PosCol }, seqHelper: ISeqHelper
546
+ ): Promise<void> {
540
547
  const df: DG.DataFrame = await readDf();
541
548
  const errList: string[] = [];
542
549
  for (const colName of df.columns.names()) {
543
550
  if (colName in posCols) {
544
551
  const p = posCols[colName];
545
552
  try {
546
- await _testPos(readDf, colName, p.units, p.aligned, p.alphabet,
553
+ await _testPos(readDf, colName, seqHelper, p.units, p.aligned, p.alphabet,
547
554
  p.alphabetSize, p.alphabetIsMultichar, p.separator);
548
555
  } catch (err: any) {
549
556
  const errMsg: string = err.toString();
@@ -2,13 +2,20 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {category, test} from '@datagrok-libraries/utils/src/test';
5
+ import {before, category, test} from '@datagrok-libraries/utils/src/test';
6
6
  import {ALIGNMENT, ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
+ import {ISeqHelper, getSeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
7
8
 
8
9
  import {_testNeg, _testPos, DfReaderFunc} from './utils/detectors-utils';
9
10
 
10
11
 
11
- category('detectors:weak-and-likely', () => {
12
+ category('detectors.weak-and-likely', () => {
13
+ let seqHelper: ISeqHelper;
14
+
15
+ before(async () => {
16
+ seqHelper = await getSeqHelper();
17
+ });
18
+
12
19
  const enum csvTests {
13
20
  fastaDnaWeak1 = 'fastaDnaWeak1',
14
21
  fastaDnaWeak1LikelyName = 'fastaDnaWeak1LikelyName',
@@ -92,7 +99,7 @@ Megafantastic
92
99
  await _testNeg(readCsv(csvTests.fastaDnaWeak1), 'colName');
93
100
  });
94
101
  test(csvTests.fastaDnaWeak1LikelyName, async () => {
95
- await _testPos(readCsv(csvTests.fastaDnaWeak1LikelyName), 'seq',
102
+ await _testPos(readCsv(csvTests.fastaDnaWeak1LikelyName), 'seq', seqHelper,
96
103
  NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false);
97
104
  });
98
105
 
@@ -100,7 +107,7 @@ Megafantastic
100
107
  await _testNeg(readCsv(csvTests.fastaRnaWeak1), 'colName');
101
108
  });
102
109
  test(csvTests.fastaRnaWeak1LikelyName, async () => {
103
- await _testPos(readCsv(csvTests.fastaRnaWeak1LikelyName), 'seq',
110
+ await _testPos(readCsv(csvTests.fastaRnaWeak1LikelyName), 'seq', seqHelper,
104
111
  NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.RNA, 4, false);
105
112
  });
106
113
 
@@ -108,7 +115,7 @@ Megafantastic
108
115
  await _testNeg(readCsv(csvTests.fastaPtWeak1), 'colName');
109
116
  });
110
117
  test(csvTests.fastaPtWeak1LikelyName, async () => {
111
- await _testPos(readCsv(csvTests.fastaPtWeak1LikelyName), 'seq',
118
+ await _testPos(readCsv(csvTests.fastaPtWeak1LikelyName), 'seq', seqHelper,
112
119
  NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false);
113
120
  });
114
121