@datagrok/bio 1.9.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "1.9.0",
8
+ "version": "1.11.0",
9
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
10
10
  "repository": {
11
11
  "type": "git",
@@ -14,16 +14,17 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@biowasm/aioli": ">=2.4.0",
17
- "@datagrok-libraries/bio": "^3.1.1",
17
+ "@datagrok-libraries/bio": "^4.2.0",
18
+ "@datagrok-libraries/chem-meta": "1.0.0",
18
19
  "@datagrok-libraries/ml": "^4.0.0",
19
20
  "@datagrok-libraries/utils": "^1.6.2",
20
21
  "cash-dom": "latest",
21
- "datagrok-api": "^1.5.5",
22
+ "datagrok-api": "^1.6.6",
22
23
  "dayjs": "^1.11.4",
23
24
  "openchemlib": "6.0.1",
24
25
  "rxjs": "^6.5.5",
25
- "ts-loader": "^9.2.5",
26
- "typescript": "^4.4.2"
26
+ "typescript": "^4.4.2",
27
+ "wu": "latest"
27
28
  },
28
29
  "devDependencies": {
29
30
  "@types/jest": "^27.0.0",
@@ -35,6 +36,7 @@
35
36
  "jest-html-reporter": "^3.6.0",
36
37
  "puppeteer": "^13.7.0",
37
38
  "ts-jest": "^27.0.0",
39
+ "ts-loader": "^9.2.5",
38
40
  "webpack": "latest",
39
41
  "webpack-cli": "^4.10.0",
40
42
  "@types/js-yaml": "^4.0.5",
@@ -67,11 +69,7 @@
67
69
  "Developers"
68
70
  ],
69
71
  "sources": [
70
- "css/helm.css",
71
- "https://ajax.googleapis.com/ajax/libs/dojo/1.10.4/dojo/dojo.js",
72
- "helm/JSDraw/Scilligence.JSDraw2.Lite.js",
73
- "helm/JSDraw/Scilligence.JSDraw2.Resources.js",
74
- "helm/JSDraw/Pistoia.HELM-uncompressed.js"
72
+ "css/helm.css"
75
73
  ],
76
74
  "category": "Bioinformatics"
77
75
  }
@@ -13,6 +13,7 @@ import './tests/renderers-test';
13
13
  import './tests/convert-test';
14
14
  import './tests/fasta-handler-test';
15
15
  import './tests/WebLogo-positions-test';
16
+ import './tests/checkInputColumn-tests'
16
17
 
17
18
  export const _package = new DG.Package();
18
19
  export {tests};
package/src/package.ts CHANGED
@@ -23,13 +23,31 @@ import {convert} from './utils/convert';
23
23
  import {representationsWidget} from './widgets/representations';
24
24
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
25
25
  import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
26
- import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils'
26
+ import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
27
+ import {
28
+ generateManySequences,
29
+ generateLongSequence,
30
+ performanceTest
31
+ } from './tests/test-sequnces-generators';
27
32
 
33
+ import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
34
+ import * as C from './utils/constants';
28
35
 
29
36
  //tags: init
30
37
  export async function initBio() {
31
38
  }
32
39
 
40
+ //name: testManySequencesPerformance
41
+ export function testManySequencesPerformance(): void {
42
+ performanceTest(generateManySequences, 'Many sequences');
43
+ }
44
+
45
+ //name: testLongSequencesPerformance
46
+ export function testLongSequencesPerformance(): void {
47
+ performanceTest(generateLongSequence, 'Long sequences');
48
+ }
49
+
50
+
33
51
  //name: fastaSequenceCellRenderer
34
52
  //tags: cellRenderer
35
53
  //meta.cellType: Sequence
@@ -48,29 +66,50 @@ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRender
48
66
  return new MacromoleculeSequenceCellRenderer();
49
67
  }
50
68
 
51
- function checkInputColumn(col: DG.Column, name: string,
52
- allowedNotations: string[] = [], allowedAlphabets: string[] = []): boolean {
53
- const notation: string = col.getTag(DG.TAGS.UNITS);
54
- const alphabet: string = col.getTag('alphabet')
69
+ function checkInputColumnUi(
70
+ col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
71
+ ): boolean {
72
+ const [res, msg]: [boolean, string] = checkInputColumn(col, name, allowedNotations, allowedAlphabets);
73
+ if (!res)
74
+ grok.shell.warning(msg);
75
+ return res;
76
+ }
77
+
78
+ export function checkInputColumn(
79
+ col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
80
+ ): [boolean, string] {
81
+ let res: boolean = true;
82
+ let msg: string = '';
83
+
84
+ const uh = new UnitsHandler(col);
55
85
  if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
56
86
  grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
57
- return false;
58
- } else if (
59
- (allowedAlphabets.length > 0 &&
60
- !allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))) ||
61
- (allowedNotations.length > 0 &&
62
- !allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase())))
63
- ) {
64
- const notationAdd = allowedNotations.length == 0 ? 'any notation' :
65
- (`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
66
- const alphabetAdd = allowedNotations.length == 0 ? 'any alphabet' :
67
- (`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
68
-
69
- grok.shell.warning(name + ' analysis is allowed for Macromolecules with ' + notationAdd + ' and ' + alphabetAdd);
70
- return false;
87
+ res = false;
88
+ } else {
89
+ const notation: string = uh.notation;
90
+ if (allowedNotations.length > 0 &&
91
+ !allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase()))
92
+ ) {
93
+ const notationAdd = allowedNotations.length == 0 ? 'any notation' :
94
+ (`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
95
+ msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
96
+ res = false;
97
+ } else if (!uh.isHelm()) {
98
+ // alphabet is not specified for 'helm' notation
99
+ const alphabet: string = uh.alphabet;
100
+ if (
101
+ allowedAlphabets.length > 0 &&
102
+ !allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))
103
+ ) {
104
+ const alphabetAdd = allowedAlphabets.length == 0 ? 'any alphabet' :
105
+ (`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
106
+ msg = `${name} + ' analysis is allowed for Macromolecules with alphabet ${alphabetAdd}.`;
107
+ res = false;
108
+ }
109
+ }
71
110
  }
72
111
 
73
- return true;
112
+ return [res, msg];
74
113
  }
75
114
 
76
115
  //name: sequenceAlignment
@@ -113,7 +152,7 @@ export function vdRegionViewer() {
113
152
  //input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
114
153
  export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
115
154
  similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
116
- if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
155
+ if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
117
156
  return;
118
157
  const encodedCol = encodeMonomers(macroMolecule);
119
158
  if (!encodedCol)
@@ -127,7 +166,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
127
166
  'aligned': macroMolecule.tags['aligned'],
128
167
  'separator': macroMolecule.tags['separator'],
129
168
  'alphabet': macroMolecule.tags['alphabet'],
130
- }
169
+ };
131
170
  const sp = await getActivityCliffs(
132
171
  df,
133
172
  macroMolecule,
@@ -144,7 +183,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
144
183
  sequenceGetSimilarities,
145
184
  drawSequences,
146
185
  (options as any)[methodName]);
147
- return sp;
186
+ return sp;
148
187
  }
149
188
 
150
189
  //top-menu: Bio | Sequence Space...
@@ -155,18 +194,18 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
155
194
  //input: string similarityMetric { choices:["Levenshtein", "Tanimoto"] }
156
195
  //input: bool plotEmbeddings = true
157
196
  export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
158
- similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer|undefined> {
159
- if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
197
+ similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer | undefined> {
198
+ if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
160
199
  return;
161
200
  const encodedCol = encodeMonomers(macroMolecule);
162
201
  if (!encodedCol)
163
202
  return;
164
203
  const embedColsNames = getEmbeddingColsNames(table);
165
- const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
204
+ const withoutEmptyValues = DG.DataFrame.fromColumns([encodedCol]).clone();
166
205
  const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, encodedCol);
167
206
 
168
207
  const chemSpaceParams = {
169
- seqCol: withoutEmptyValues.col(macroMolecule.name)!,
208
+ seqCol: withoutEmptyValues.col(encodedCol.name)!,
170
209
  methodName: methodName,
171
210
  similarityMetric: similarityMetric,
172
211
  embedAxesNames: embedColsNames
@@ -174,11 +213,11 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
174
213
  const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
175
214
  const embeddings = sequenceSpaceRes.coordinates;
176
215
  for (const col of embeddings) {
177
- const listValues = col.toList();
178
- emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
179
- table.columns.add(DG.Column.fromList('double', col.name, listValues));
216
+ const listValues = col.toList();
217
+ emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
218
+ table.columns.add(DG.Column.fromList('double', col.name, listValues));
180
219
  }
181
- let sp;
220
+ let sp;
182
221
  if (plotEmbeddings) {
183
222
  for (const v of grok.shell.views) {
184
223
  if (v.name === table.name)
@@ -198,7 +237,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
198
237
  grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
199
238
  return;
200
239
  }
201
- if (!checkInputColumn(macroMolecule, 'To Atomic Level'))
240
+ if (!checkInputColumnUi(macroMolecule, 'To Atomic Level'))
202
241
  return;
203
242
 
204
243
  const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
@@ -217,10 +256,12 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
217
256
  //top-menu: Bio | MSA...
218
257
  //name: MSA
219
258
  //input: dataframe table
220
- //input: column sequence { semType: Macromolecule }
259
+ //input: column sequence { semType: Macromolecule, units: ['fasta'], alphabet: ['DNA', 'RNA', 'PT'] }
221
260
  //output: column result
222
261
  export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column | null> {
223
- if (!checkInputColumn(col, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
262
+ const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
263
+
264
+ if (!checkInputColumnUi(col, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
224
265
  return null;
225
266
 
226
267
  const unUsedName = table.columns.getUnusedName(`msa(${col.name})`);
@@ -261,7 +302,7 @@ export async function compositionAnalysis(): Promise<void> {
261
302
  });
262
303
 
263
304
  const handler = async (col: DG.Column) => {
264
- if (!checkInputColumn(col, 'Composition'))
305
+ if (!checkInputColumnUi(col, 'Composition'))
265
306
  return;
266
307
 
267
308
  const wlViewer = tv.addViewer('WebLogo', {sequenceColumnName: col.name});
@@ -376,11 +417,11 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
376
417
  //console.warn(`file: ${fileInfo.path}, column: ${col.name}, ` +
377
418
  // `semType: ${semType}, units: ${col.getTag(DG.TAGS.UNITS)}`);
378
419
  // console.warn('file: "' + fileInfo.path + '", semType: "' + semType + '", ' +
379
- // 'units: "' + col.getTag('units') + '"');
420
+ // 'units: "' + col.getTag(DG.TAGS.UNITS) + '"');
380
421
 
381
422
  res.push({
382
423
  file: fileInfo.path, result: 'detected', column: col.name,
383
- message: `units: ${col.getTag('units')}`
424
+ message: `units: ${col.getTag(DG.TAGS.UNITS)}`
384
425
  });
385
426
  }
386
427
  }
@@ -403,3 +444,19 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
403
444
  return resDf;
404
445
  }
405
446
 
447
+ //name: Bio | Split to monomers
448
+ //tags: panel, bio
449
+ //input: column col {semType: Macromolecule}
450
+ export function splitToMonomers(col: DG.Column<string>): void {
451
+ if (!col.getTag(UnitsHandler.TAGS.aligned).includes(C.MSA))
452
+ return grok.shell.error('Splitting is applicable only for aligned sequences');
453
+
454
+ const tempDf = splitAlignedSequences(col);
455
+ const originalDf = col.dataFrame;
456
+ for (const tempCol of tempDf.columns) {
457
+ const newCol = originalDf.columns.add(tempCol);
458
+ newCol.semType = C.SEM_TYPES.MONOMER;
459
+ // newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
460
+ newCol.setTag(C.TAGS.ALPHABET, col.getTag(C.TAGS.ALPHABET));
461
+ }
462
+ }
@@ -5,6 +5,7 @@ import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
  import {PositionInfo, PositionMonomerInfo, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
7
  import {Column} from 'datagrok-api/dg';
8
+ import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
9
 
9
10
  category('WebLogo-positions', () => {
10
11
  let tvList: DG.TableView[];
@@ -12,11 +13,11 @@ category('WebLogo-positions', () => {
12
13
  let currentView: DG.View;
13
14
 
14
15
  const csvDf1 = `seq
15
- ATC-G-TTGC--
16
- ATC-G-TTGC--
17
- -TC-G-TTGC--
18
- -TC-GCTTGC--
19
- -TC-GCTTGC--`;
16
+ ATC-G-TTGC--
17
+ ATC-G-TTGC--
18
+ -TC-G-TTGC--
19
+ -TC-GCTTGC--
20
+ -TC-GCTTGC--`;
20
21
 
21
22
 
22
23
  before(async () => {
@@ -34,8 +35,10 @@ category('WebLogo-positions', () => {
34
35
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
35
36
  const tv: DG.TableView = grok.shell.addTableView(df);
36
37
 
37
- df.getCol('seq').semType = 'Macromolecule';
38
- df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
38
+ const seqCol: DG.Column = df.getCol('seq');
39
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
40
+ seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
41
+ seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
39
42
 
40
43
  const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
41
44
  tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
@@ -81,10 +84,11 @@ category('WebLogo-positions', () => {
81
84
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf2);
82
85
  const tv: DG.TableView = grok.shell.addTableView(df);
83
86
 
84
- df.getCol('seq').semType = 'Macromolecule';
85
- df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
87
+ const seqCol: DG.Column = df.getCol('seq');
88
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
89
+ seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
90
+ seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
86
91
 
87
- let seq: Column = df.getCol('seq');
88
92
  df.filter.init((i) => {
89
93
  return i > 2;
90
94
  });
@@ -125,8 +129,10 @@ category('WebLogo-positions', () => {
125
129
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
126
130
  const tv: DG.TableView = grok.shell.addTableView(df);
127
131
 
128
- df.getCol('seq').semType = 'Macromolecule';
129
- df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
132
+ const seqCol: DG.Column = df.getCol('seq');
133
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
134
+ seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
135
+ seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
130
136
 
131
137
  const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
132
138
  tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
@@ -156,7 +162,6 @@ category('WebLogo-positions', () => {
156
162
  expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
157
163
  }
158
164
  }
159
-
160
165
  });
161
166
 
162
167
  });
@@ -0,0 +1,69 @@
1
+ import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ import {checkInputColumn} from '../package';
8
+ import {UNITS} from 'datagrok-api/dg';
9
+ import {ALPHABET, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
10
+
11
+
12
+ category('checkInputColumn', () => {
13
+
14
+ const csv = `seq
15
+ seq1,
16
+ seq2,
17
+ seq3,
18
+ seq4`;
19
+
20
+ category('MSA', () => {
21
+
22
+ test('testMsaPos', async () => {
23
+ const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
24
+ const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
25
+
26
+ let k = 11;
27
+
28
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
29
+ const col: DG.Column = df.getCol('seq');
30
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
31
+ col.setTag(DG.TAGS.UNITS, 'fasta');
32
+ col.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
33
+
34
+ const [res, msg]: [boolean, string] = checkInputColumn(
35
+ col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
36
+
37
+ expect(res, true);
38
+ });
39
+
40
+ test('testMsaNegHelm', async () => {
41
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
42
+ const col: DG.Column = df.getCol('seq');
43
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
44
+ col.setTag(DG.TAGS.UNITS, 'helm');
45
+ col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
46
+ col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
47
+
48
+ const [res, msg]: [boolean, string] = checkInputColumn(
49
+ col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
50
+
51
+ expect(res, false);
52
+ });
53
+
54
+ test('testMsaNegUN', async () => {
55
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
56
+ const col: DG.Column = df.getCol('seq');
57
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
58
+ col.setTag(DG.TAGS.UNITS, 'fasta');
59
+ col.setTag(UnitsHandler.TAGS.alphabet, 'UN');
60
+ col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
61
+ col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
62
+
63
+ const [res, msg]: [boolean, string] = checkInputColumn(
64
+ col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
65
+
66
+ expect(res, false);
67
+ });
68
+ });
69
+ });
@@ -5,6 +5,7 @@ import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
 
7
7
  import {importFasta} from '../package';
8
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
9
 
9
10
  type DfReaderFunc = () => Promise<DG.DataFrame>;
10
11
 
@@ -198,44 +199,44 @@ MWRSWY-CKHP
198
199
  test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
199
200
 
200
201
  test('Dna1', async () => {
201
- await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA');
202
+ await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA', 4, false);
202
203
  });
203
204
  test('Rna1', async () => {
204
- await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA');
205
+ await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA', 4, false);
205
206
  });
206
207
  test('AA1', async () => {
207
- await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT');
208
+ await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT', 20, false);
208
209
  });
209
210
  test('MsaDna1', async () => {
210
- await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA');
211
+ await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA', 4, false);
211
212
  });
212
213
 
213
214
  test('MsaAA1', async () => {
214
- await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT');
215
+ await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT', 20, false);
215
216
  });
216
217
 
217
218
  test('SepDna', async () => {
218
- await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', '*');
219
+ await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', 4, false, '*');
219
220
  });
220
221
  test('SepRna', async () => {
221
- await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', '*');
222
+ await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', 4, false, '*');
222
223
  });
223
224
  test('SepPt', async () => {
224
- await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', '-');
225
+ await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', 20, false, '-');
225
226
  });
226
227
  test('SepUn1', async () => {
227
- await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', '-');
228
+ await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', 8, true, '-');
228
229
  });
229
230
  test('SepUn2', async () => {
230
- await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', '/');
231
+ await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', 9, true, '/');
231
232
  });
232
233
 
233
234
  test('SepMsaN1', async () => {
234
- await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', '-');
235
+ await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', 4, false, '-');
235
236
  });
236
237
 
237
238
  test('SamplesFastaCsvPt', async () => {
238
- await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT');
239
+ await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
239
240
  });
240
241
  test('SamplesFastaCsvNegativeEntry', async () => {
241
242
  await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
@@ -248,7 +249,7 @@ MWRSWY-CKHP
248
249
  });
249
250
 
250
251
  test('SamplesFastaFastaPt', async () => {
251
- await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT');
252
+ await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
252
253
  });
253
254
 
254
255
  // peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
@@ -266,7 +267,7 @@ MWRSWY-CKHP
266
267
  });
267
268
 
268
269
  test('samplesMsaComplexUn', async () => {
269
- await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', '/');
270
+ await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', 161, true, '/');
270
271
  });
271
272
  test('samplesMsaComplexNegativeActivity', async () => {
272
273
  await _testNeg(readSamples(Samples.msaComplex), 'Activity');
@@ -281,7 +282,7 @@ MWRSWY-CKHP
281
282
  });
282
283
 
283
284
  test('samplesHelmCsvHELM', async () => {
284
- await _testPos(readSamples(Samples.helmCsv), 'HELM', 'helm', null, null, null);
285
+ await _testPos(readSamples(Samples.helmCsv), 'HELM', 'helm', null, null, 160, true, null);
285
286
  });
286
287
 
287
288
  test('samplesHelmCsvNegativeActivity', async () => {
@@ -297,7 +298,7 @@ MWRSWY-CKHP
297
298
  await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
298
299
  });
299
300
  test('samplesTestHelmPositiveHelmString', async () => {
300
- await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'helm', null, null, null);
301
+ await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'helm', null, null, 9, true, null);
301
302
  });
302
303
  test('samplesTestHelmNegativeValid', async () => {
303
304
  await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
@@ -329,7 +330,7 @@ MWRSWY-CKHP
329
330
  });
330
331
 
331
332
  test('samplesFastaPtPosSequence', async () => {
332
- await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT');
333
+ await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
333
334
  });
334
335
 
335
336
  test('samplesTestCerealNegativeCerealName', async () => {
@@ -371,7 +372,8 @@ MWRSWY-CKHP
371
372
  export async function _testNeg(readDf: DfReaderFunc, colName: string) {
372
373
  const df: DG.DataFrame = await readDf();
373
374
  const col: DG.Column = df.col(colName)!;
374
- const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
375
+ const semType: string = await grok.functions
376
+ .call('Bio:detectMacromolecule', {col: col}) as unknown as string;
375
377
  if (semType)
376
378
  col.semType = semType;
377
379
 
@@ -384,18 +386,31 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
384
386
  }
385
387
  }
386
388
 
387
- export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, aligned: string | null, alphabet: string | null, separator: string | null = null) {
389
+ export async function _testPos(
390
+ readDf: DfReaderFunc, colName: string, units: string,
391
+ aligned: string | null, alphabet: string | null, alphabetSize: number, alphabetIsMultichar: boolean,
392
+ separator: string | null = null
393
+ ) {
388
394
  const df: DG.DataFrame = await readDf();
389
395
  const col: DG.Column = df.col(colName)!;
390
- const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
396
+ const semType: string = await grok.functions
397
+ .call('Bio:detectMacromolecule', {col: col}) as unknown as string;
391
398
  if (semType)
392
399
  col.semType = semType;
393
400
 
394
401
  expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
395
402
  expect(col.getTag(DG.TAGS.UNITS), units);
396
- expect(col.getTag('aligned'), aligned);
397
- expect(col.getTag('alphabet'), alphabet);
403
+ expect(col.getTag(UnitsHandler.TAGS.aligned), aligned);
404
+ expect(col.getTag(UnitsHandler.TAGS.alphabet), alphabet);
398
405
  if (separator)
399
- expect(col.getTag('separator'), separator);
406
+ expect(col.getTag(UnitsHandler.TAGS.separator), separator);
407
+
408
+ const uh = new UnitsHandler(col);
409
+ expect(uh.getAlphabetSize(), alphabetSize);
410
+ expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
411
+ if (!uh.isHelm()) {
412
+ expect(uh.aligned, aligned);
413
+ expect(uh.alphabet, alphabet);
414
+ }
400
415
  }
401
416
 
@@ -49,7 +49,8 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
49
49
  const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
50
50
 
51
51
  const srcCol: DG.Column = srcDf.getCol('seq')!;
52
- const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
52
+ const semType: string = await grok.functions
53
+ .call('Bio:detectMacromolecule', {col: srcCol}) as unknown as string;
53
54
  if (semType)
54
55
  srcCol.semType = semType;
55
56
 
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
5
5
  import {importFasta, multipleSequenceAlignmentAny} from '../package';
6
6
  import {readDataframe} from './utils';
7
7
  import {convertDo} from '../utils/convert';
8
- import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
8
+ import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
9
 
10
10
  category('renderers', () => {
11
11
  let tvList: DG.TableView[];
@@ -42,21 +42,21 @@ category('renderers', () => {
42
42
 
43
43
  console.log('Bio: tests/renderers/afterMsa, src before test ' +
44
44
  `semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
45
- `cell.renderer="${srcSeqCol!.getTag('cell.renderer')}"`);
45
+ `cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
46
46
  expect(srcSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
47
- expect(srcSeqCol!.getTag(DG.TAGS.UNITS), 'fasta');
48
- expect(srcSeqCol!.getTag('aligned'), 'SEQ');
49
- expect(srcSeqCol!.getTag('alphabet'), 'PT');
50
- expect(srcSeqCol!.getTag('cell.renderer'), 'sequence');
47
+ expect(srcSeqCol!.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
48
+ expect(srcSeqCol!.getTag(UnitsHandler.TAGS.aligned), 'SEQ');
49
+ expect(srcSeqCol!.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
50
+ expect(srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
51
51
 
52
52
  const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
53
53
  tv.grid.invalidate();
54
-
54
+
55
55
  expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
56
- expect(msaSeqCol!.getTag(DG.TAGS.UNITS), 'fasta');
57
- expect(msaSeqCol!.getTag('aligned'), 'SEQ.MSA');
58
- expect(msaSeqCol!.getTag('alphabet'), 'PT');
59
- expect(msaSeqCol!.getTag('cell.renderer'), 'sequence');
56
+ expect(msaSeqCol!.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
57
+ expect(msaSeqCol!.getTag(UnitsHandler.TAGS.aligned), 'SEQ.MSA');
58
+ expect(msaSeqCol!.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
59
+ expect(msaSeqCol!.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
60
60
 
61
61
  dfList.push(df);
62
62
  tvList.push(tv);
@@ -70,7 +70,7 @@ category('renderers', () => {
70
70
 
71
71
  const srcCol: DG.Column = df.col('sequence')!;
72
72
  const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
73
- expect(tgtCol.getTag('cell.renderer'), 'sequence');
73
+ expect(tgtCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
74
74
 
75
75
  tvList.push(tv);
76
76
  dfList.push(df);