@datagrok/bio 1.7.22 → 1.7.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,8 +1,11 @@
1
1
  {
2
2
  "name": "@datagrok/bio",
3
- "beta": false,
4
3
  "friendlyName": "Bio",
5
- "version": "1.7.22",
4
+ "author": {
5
+ "name": "Leonid Stolbov",
6
+ "email": "lstolbov@datagrok.ai"
7
+ },
8
+ "version": "1.7.25",
6
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
7
10
  "repository": {
8
11
  "type": "git",
@@ -11,9 +14,9 @@
11
14
  },
12
15
  "dependencies": {
13
16
  "@biowasm/aioli": ">=2.4.0",
14
- "@datagrok-libraries/bio": "^3.0.0",
15
- "@datagrok-libraries/ml": "^3.0.0",
16
- "@datagrok-libraries/utils": "^1.4.0",
17
+ "@datagrok-libraries/bio": "^3.0.4",
18
+ "@datagrok-libraries/ml": "^3.0.3",
19
+ "@datagrok-libraries/utils": "^1.5.4",
17
20
  "cash-dom": "latest",
18
21
  "datagrok-api": "^1.5.0",
19
22
  "dayjs": "^1.11.4",
@@ -33,7 +36,11 @@
33
36
  "puppeteer": "^13.7.0",
34
37
  "ts-jest": "^27.0.0",
35
38
  "webpack": "latest",
36
- "webpack-cli": "^4.10.0"
39
+ "webpack-cli": "^4.10.0",
40
+ "@types/js-yaml": "^4.0.5",
41
+ "js-yaml": "^4.1.0",
42
+ "@types/node-fetch": "^2.6.2",
43
+ "node-fetch": "^2.6.7"
37
44
  },
38
45
  "scripts": {
39
46
  "link-api": "npm link datagrok-api",
File without changes
@@ -1,6 +1,6 @@
1
1
  #!/bin/bash
2
2
 
3
- ./setup-unlink-clean
3
+ ./setup-unlink-clean.sh
4
4
 
5
5
  GREEN='\e[0;32m'
6
6
  NO_COLOR='\e[0m'
@@ -5,7 +5,7 @@
5
5
  import * as utils from './test-node';
6
6
  import puppeteer from 'puppeteer';
7
7
 
8
- const P_START_TIMEOUT: number = 500000;
8
+ const P_START_TIMEOUT: number = 3600000;
9
9
  let browser: puppeteer.Browser;
10
10
  let page: puppeteer.Page;
11
11
 
@@ -51,10 +51,10 @@ it('TEST', async () => {
51
51
  let failReport = '';
52
52
  for (let i = 0; i < df.rowCount; i++) {
53
53
  if (cStatus.get(i)) {
54
- passReport += `Test result : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
54
+ passReport += `Test result : Success : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
55
55
  } else {
56
56
  failed = true;
57
- failReport += `Test result : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
57
+ failReport += `Test result : Failed : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
58
58
  }
59
59
  }
60
60
  resolve({failReport, passReport, failed});
@@ -65,4 +65,4 @@ it('TEST', async () => {
65
65
  console.log(r.passReport);
66
66
  // @ts-ignore
67
67
  expect(r.failed).checkOutput(false, r.failReport);
68
- }, 500000);
68
+ }, 3600000);
@@ -68,6 +68,7 @@ export async function getBrowserPage(puppeteer: any): Promise<{ browser: any, pa
68
68
  });
69
69
 
70
70
  const page = await browser.newPage();
71
+ await page.setDefaultNavigationTimeout(0);
71
72
  await page.goto(`${url}/oauth/`);
72
73
  await page.setCookie({name: 'auth', value: token});
73
74
  await page.evaluate((token: any) => {
@@ -75,8 +76,8 @@ export async function getBrowserPage(puppeteer: any): Promise<{ browser: any, pa
75
76
  }, token);
76
77
  await page.goto(url);
77
78
  try {
78
- await page.waitForSelector('.grok-preloader');
79
- await page.waitForFunction(() => document.querySelector('.grok-preloader') == null, {timeout: 500000});
79
+ await page.waitForSelector('.grok-preloader', { timeout: 1800000 });
80
+ await page.waitForFunction(() => document.querySelector('.grok-preloader') == null, {timeout: 3600000});
80
81
  } catch (error) {
81
82
  throw error;
82
83
  }
@@ -11,6 +11,7 @@ import './tests/activity-cliffs-tests';
11
11
  import './tests/splitters-test';
12
12
  import './tests/renderers-test';
13
13
  import './tests/convert-test';
14
+ import './tests/fasta-handler-test';
14
15
  import './tests/WebLogo-positions-test';
15
16
 
16
17
  export const _package = new DG.Package();
@@ -21,7 +22,6 @@ export {tests};
21
22
  //input: string category {optional: true}
22
23
  //input: string test {optional: true}
23
24
  //output: dataframe result
24
- //top-menu: Tools | Dev | JS API Tests
25
25
  export async function test(category: string, test: string): Promise<DG.DataFrame> {
26
26
  const data = await runTests({category, test});
27
27
  return DG.DataFrame.fromObjects(data)!;
package/src/package.ts CHANGED
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
5
5
 
6
6
  export const _package = new DG.Package();
7
7
 
8
- import {AlignedSequenceDifferenceCellRenderer, AminoAcidsCellRenderer} from './utils/cell-renderer';
8
+ import {MacromoleculeDifferenceCellRenderer, MonomerCellRenderer} from './utils/cell-renderer';
9
9
  import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
10
10
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
11
11
  import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
@@ -20,37 +20,27 @@ import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CO
20
20
  import {getMacroMol} from './utils/atomic-works';
21
21
  import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
22
22
  import {convert} from './utils/convert';
23
- import {lru} from './utils/cell-renderer';
24
23
  import {representationsWidget} from './widgets/representations';
25
24
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
26
25
  import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
26
+ import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils'
27
27
 
28
28
 
29
- //tags: init
30
- export async function initBio(): Promise<void> {
31
- // apparently HELMWebEditor requires dojo to be initialized first
32
- const funcList: DG.Func[] = DG.Func.find({package: 'Helm', name: 'initHelm'});
33
- console.debug(`Bio: initBio() funcList.length = ${funcList.length}`);
34
- if (funcList.length === 1)
35
- await grok.functions.call('Helm:initHelp');
36
-
37
- return new Promise((resolve, reject) => {
38
- // @ts-ignore
39
- dojo.ready(function() { resolve(null); });
40
- });
41
- }
42
-
43
- //name: Lru
44
- //output: object lruCache
45
- export function Lru() {
46
- return lru;
29
+ //name: fastaSequenceCellRenderer
30
+ //tags: cellRenderer
31
+ //meta.cellType: Sequence
32
+ //meta.columnTags: units=fasta
33
+ //output: grid_cell_renderer result
34
+ export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
35
+ return new MacromoleculeSequenceCellRenderer();
47
36
  }
48
37
 
49
- //name: macromoleculeSequenceCellRenderer
38
+ //name: separatorSequenceCellRenderer
50
39
  //tags: cellRenderer
51
- //meta.cellType: Macromolecule
40
+ //meta.cellType: Sequence
41
+ //meta.columnTags: units=separator
52
42
  //output: grid_cell_renderer result
53
- export function macromoleculeSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
43
+ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
54
44
  return new MacromoleculeSequenceCellRenderer();
55
45
  }
56
46
 
@@ -161,16 +151,22 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
161
151
  if (!encodedCol)
162
152
  return;
163
153
  const embedColsNames = getEmbeddingColsNames(table);
154
+ const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
155
+ const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, encodedCol);
156
+
164
157
  const chemSpaceParams = {
165
- seqCol: encodedCol,
158
+ seqCol: withoutEmptyValues.col(macroMolecule.name)!,
166
159
  methodName: methodName,
167
160
  similarityMetric: similarityMetric,
168
161
  embedAxesNames: embedColsNames
169
162
  };
170
163
  const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
171
164
  const embeddings = sequenceSpaceRes.coordinates;
172
- for (const col of embeddings)
173
- table.columns.add(col);
165
+ for (const col of embeddings) {
166
+ const listValues = col.toList();
167
+ emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
168
+ table.columns.add(DG.Column.fromFloat32Array(col.name, listValues));
169
+ }
174
170
  if (plotEmbeddings) {
175
171
  for (const v of grok.shell.views) {
176
172
  if (v.name === table.name)
@@ -241,6 +237,14 @@ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.
241
237
  return msaCol;
242
238
  }
243
239
 
240
+ //name: Bio | MSA
241
+ //tags: bio, panel
242
+ //input: column sequence { semType: Macromolecule }
243
+ //output: column result
244
+ export async function panelMSA(col: DG.Column): Promise<DG.Column | null> {
245
+ return multipleSequenceAlignmentAny(col.dataFrame, col);
246
+ }
247
+
244
248
  //name: Composition Analysis
245
249
  //top-menu: Bio | Composition Analysis
246
250
  //output: viewer result
@@ -334,20 +338,20 @@ export function convertPanel(col: DG.Column): void {
334
338
  convert(col);
335
339
  }
336
340
 
337
- //name: aminoAcidsCellRenderer
341
+ //name: monomerCellRenderer
338
342
  //tags: cellRenderer
339
- //meta.cellType: aminoAcids
343
+ //meta.cellType: Monomer
340
344
  //output: grid_cell_renderer result
341
- export function aminoAcidsCellRenderer(): AminoAcidsCellRenderer {
342
- return new AminoAcidsCellRenderer();
345
+ export function monomerCellRenderer(): MonomerCellRenderer {
346
+ return new MonomerCellRenderer();
343
347
  }
344
348
 
345
- //name: alignedSequenceDifferenceCellRenderer
349
+ //name: MacromoleculeDifferenceCellRenderer
346
350
  //tags: cellRenderer
347
- //meta.cellType: alignedSequenceDifference
351
+ //meta.cellType: MacromoleculeDifference
348
352
  //output: grid_cell_renderer result
349
- export function alignedSequenceDifferenceCellRenderer(): AlignedSequenceDifferenceCellRenderer {
350
- return new AlignedSequenceDifferenceCellRenderer();
353
+ export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCellRenderer {
354
+ return new MacromoleculeDifferenceCellRenderer();
351
355
  }
352
356
 
353
357
  //name: testDetectMacromolecule
@@ -4,10 +4,12 @@ import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
  import {PositionInfo, PositionMonomerInfo, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
+ import {Column} from 'datagrok-api/dg';
7
8
 
8
9
  category('WebLogo-positions', () => {
9
10
  let tvList: DG.TableView[];
10
11
  let dfList: DG.DataFrame[];
12
+ let currentView: DG.View;
11
13
 
12
14
  const csvDf1 = `seq
13
15
  ATC-G-TTGC--
@@ -17,22 +19,24 @@ category('WebLogo-positions', () => {
17
19
  -TC-GCTTGC--`;
18
20
 
19
21
 
20
- const resShrinkEmptyTailDf1: PositionInfo[] = [];
21
-
22
22
  before(async () => {
23
23
  tvList = [];
24
24
  dfList = [];
25
+ currentView = grok.shell.tv;
25
26
  });
26
27
 
27
28
  after(async () => {
28
- dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df); });
29
+ dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df);});
29
30
  tvList.forEach((tv: DG.TableView) => tv.close());
31
+ currentView = grok.shell.tv;
30
32
  });
31
-
32
33
  test('allPositions', async () => {
33
34
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
34
35
  const tv: DG.TableView = grok.shell.addTableView(df);
35
36
 
37
+ df.getCol('seq').semType = 'Macromolecule';
38
+ df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
39
+
36
40
  const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
37
41
  tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
38
42
 
@@ -55,12 +59,102 @@ category('WebLogo-positions', () => {
55
59
  new PositionInfo('11', {'-': new PositionMonomerInfo(5)}),
56
60
  new PositionInfo('12', {'-': new PositionMonomerInfo(5)})
57
61
  ];
58
- // check all positions are equal resAllDf1
62
+
63
+ expect(positions.length, resAllDf1.length);
64
+
65
+ for (let i = 0; i < positions.length; i++) {
66
+ expect(positions[i].name, resAllDf1[i].name);
67
+ for (const key in positions[i].freq) {
68
+ expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
69
+ }
70
+ }
71
+
72
+ });
73
+ test('positions with shrinkEmptyTail option true (filterd)', async () => {
74
+ let csvDf2 = `seq
75
+ -TC-G-TTGC--
76
+ -TC-GCTTGC--
77
+ -T--C-GT-
78
+ -T--C-GT-
79
+ -T--C-GT-
80
+ -T--CCGT-`;
81
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf2);
82
+ const tv: DG.TableView = grok.shell.addTableView(df);
83
+
84
+ df.getCol('seq').semType = 'Macromolecule';
85
+ df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
86
+
87
+ let seq: Column = df.getCol('seq');
88
+ df.filter.init((i) => {
89
+ return i > 2;
90
+ });
91
+ df.filter.fireChanged();
92
+ const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'shrinkEmptyTail': true}) as unknown as WebLogo;
93
+
94
+ tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
95
+
96
+ tvList.push(tv);
97
+ dfList.push(df);
98
+
99
+ const positions: PositionInfo[] = wlViewer['positions'];
100
+
101
+ const resAllDf1: PositionInfo[] = [
102
+ new PositionInfo('1', {'-': new PositionMonomerInfo(3)}),
103
+ new PositionInfo('2', {'T': new PositionMonomerInfo(3)}),
104
+ new PositionInfo('3', {'-': new PositionMonomerInfo(3)}),
105
+ new PositionInfo('4', {'-': new PositionMonomerInfo(3)}),
106
+ new PositionInfo('5', {'C': new PositionMonomerInfo(3)}),
107
+ new PositionInfo('6', {'-': new PositionMonomerInfo(2), 'C': new PositionMonomerInfo(1)}),
108
+ new PositionInfo('7', {'G': new PositionMonomerInfo(3)}),
109
+ new PositionInfo('8', {'T': new PositionMonomerInfo(3)}),
110
+ new PositionInfo('9', {'-': new PositionMonomerInfo(3)}),
111
+ ];
112
+
113
+ expect(positions.length, resAllDf1.length);
114
+
115
+ for (let i = 0; i < positions.length; i++) {
116
+ expect(positions[i].name, resAllDf1[i].name);
117
+ for (const key in positions[i].freq) {
118
+ expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
119
+ }
120
+ }
121
+
122
+ });
123
+
124
+ test('positions with skipEmptyPositions option', async () => {
125
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
126
+ const tv: DG.TableView = grok.shell.addTableView(df);
127
+
128
+ df.getCol('seq').semType = 'Macromolecule';
129
+ df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
130
+
131
+ const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
132
+ tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
133
+
134
+ tvList.push(tv);
135
+ dfList.push(df);
136
+
137
+ const positions: PositionInfo[] = wlViewer['positions'];
138
+
139
+ const resAllDf1: PositionInfo[] = [
140
+ new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
141
+ new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
142
+ new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
143
+ new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
144
+ new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
145
+ new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
146
+ new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
147
+ new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
148
+ new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
149
+ ];
150
+
151
+ expect(positions.length, resAllDf1.length);
152
+
59
153
  for (let i = 0; i < positions.length; i++) {
60
154
  expect(positions[i].name, resAllDf1[i].name);
61
- for (const key in positions[i].freq) {
62
- expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
63
- }
155
+ for (const key in positions[i].freq) {
156
+ expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
157
+ }
64
158
  }
65
159
 
66
160
  });
@@ -146,3 +146,4 @@ export async function _testPickupPaletteAA2(dfAA2: DG.DataFrame) {
146
146
 
147
147
  expect(cp instanceof AminoacidsPalettes, true);
148
148
  }
149
+
@@ -190,6 +190,7 @@ MWRSWY-CKHP
190
190
  };
191
191
  };
192
192
 
193
+
193
194
  test('NegativeEmpty', async () => { await _testNeg(readCsv('csvDfEmpty', csvDfEmpty), 'col1'); });
194
195
  test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
195
196
  test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
@@ -197,44 +198,44 @@ MWRSWY-CKHP
197
198
  test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
198
199
 
199
200
  test('Dna1', async () => {
200
- await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta:SEQ:DNA');
201
+ await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA');
201
202
  });
202
203
  test('Rna1', async () => {
203
- await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta:SEQ:RNA');
204
+ await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA');
204
205
  });
205
206
  test('AA1', async () => {
206
- await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta:SEQ:PT');
207
+ await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT');
207
208
  });
208
209
  test('MsaDna1', async () => {
209
- await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta:SEQ.MSA:DNA');
210
+ await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA');
210
211
  });
211
212
 
212
213
  test('MsaAA1', async () => {
213
- await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta:SEQ.MSA:PT');
214
+ await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT');
214
215
  });
215
216
 
216
217
  test('SepDna', async () => {
217
- await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator:SEQ:DNA', '*');
218
+ await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', '*');
218
219
  });
219
220
  test('SepRna', async () => {
220
- await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator:SEQ:RNA', '*');
221
+ await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', '*');
221
222
  });
222
223
  test('SepPt', async () => {
223
- await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator:SEQ:PT', '-');
224
+ await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', '-');
224
225
  });
225
226
  test('SepUn1', async () => {
226
- await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator:SEQ:UN', '-');
227
+ await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', '-');
227
228
  });
228
229
  test('SepUn2', async () => {
229
- await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator:SEQ:UN', '/');
230
+ await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', '/');
230
231
  });
231
232
 
232
233
  test('SepMsaN1', async () => {
233
- await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator:SEQ.MSA:DNA', '-');
234
+ await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', '-');
234
235
  });
235
236
 
236
237
  test('SamplesFastaCsvPt', async () => {
237
- await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta:SEQ:PT');
238
+ await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT');
238
239
  });
239
240
  test('SamplesFastaCsvNegativeEntry', async () => {
240
241
  await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
@@ -247,7 +248,7 @@ MWRSWY-CKHP
247
248
  });
248
249
 
249
250
  test('SamplesFastaFastaPt', async () => {
250
- await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta:SEQ:PT');
251
+ await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT');
251
252
  });
252
253
 
253
254
  // peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
@@ -265,7 +266,7 @@ MWRSWY-CKHP
265
266
  });
266
267
 
267
268
  test('samplesMsaComplexUn', async () => {
268
- await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator:SEQ.MSA:UN', '/');
269
+ await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', '/');
269
270
  });
270
271
  test('samplesMsaComplexNegativeActivity', async () => {
271
272
  await _testNeg(readSamples(Samples.msaComplex), 'Activity');
@@ -280,7 +281,7 @@ MWRSWY-CKHP
280
281
  });
281
282
 
282
283
  test('samplesHelmCsvHELM', async () => {
283
- await _testPos(readSamples(Samples.helmCsv), 'HELM', 'HELM', null);
284
+ await _testPos(readSamples(Samples.helmCsv), 'HELM', 'HELM', null, null, null);
284
285
  });
285
286
 
286
287
  test('samplesHelmCsvNegativeActivity', async () => {
@@ -296,7 +297,7 @@ MWRSWY-CKHP
296
297
  await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
297
298
  });
298
299
  test('samplesTestHelmPositiveHelmString', async () => {
299
- await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'HELM');
300
+ await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'HELM', null, null, null);
300
301
  });
301
302
  test('samplesTestHelmNegativeValid', async () => {
302
303
  await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
@@ -328,7 +329,7 @@ MWRSWY-CKHP
328
329
  });
329
330
 
330
331
  test('samplesFastaPtPosSequence', async () => {
331
- await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta:SEQ:PT');
332
+ await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT');
332
333
  });
333
334
 
334
335
  test('samplesTestCerealNegativeCerealName', async () => {
@@ -383,7 +384,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
383
384
  }
384
385
  }
385
386
 
386
- export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string | null = null) {
387
+ export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, aligned: string | null, alphabet: string | null, separator: string | null = null) {
387
388
  const df: DG.DataFrame = await readDf();
388
389
  const col: DG.Column = df.col(colName)!;
389
390
  const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
@@ -392,6 +393,8 @@ export async function _testPos(readDf: DfReaderFunc, colName: string, units: str
392
393
 
393
394
  expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
394
395
  expect(col.getTag(DG.TAGS.UNITS), units);
396
+ expect(col.getTag('aligned'), aligned);
397
+ expect(col.getTag('alphabet'), alphabet);
395
398
  if (separator)
396
399
  expect(col.getTag('separator'), separator);
397
400
  }
@@ -0,0 +1,141 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {category, expectArray, test} from '@datagrok-libraries/utils/src/test';
7
+ import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
8
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
+
10
+
11
+ category('fastaFileHandler', () => {
12
+ const fastaNormalFormatting = `>description:1
13
+ MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
14
+
15
+ >description:2
16
+ MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL
17
+
18
+ >description:3
19
+ MMELVLKTIIGPIVVGVVLRIVDKWLNKDK
20
+
21
+ >description:4
22
+ MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN
23
+ `;
24
+
25
+ const fastaExtraSpaces = `>description:1
26
+ MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
27
+
28
+ >description:2
29
+ MI EVF LFGIVLGLI PITLAGLFVTAY LQYRRGDQLDL
30
+
31
+ >description:3
32
+ M MELVLKTI IGPI VVGVVLR IVDKWLNKDK
33
+
34
+ >description:4
35
+ MDR TDEVSNHTHDKP TLTWFEEIFEEYHSPFHN
36
+ `;
37
+
38
+ const fastaExtraNewlines = `>description:1
39
+
40
+ MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
41
+
42
+ >description:2
43
+ MIEVF
44
+ LFGIVLGLI
45
+ PITLAGLFVTA
46
+ YLQYRRGDQLDL
47
+
48
+ >description:3
49
+ M
50
+ ME
51
+
52
+ LVLKTIIG
53
+
54
+ PIVVGVVLRI
55
+ VDKWLNKDK
56
+
57
+
58
+ >description:4
59
+
60
+ MDRT
61
+
62
+ DEVSNHTHDKP
63
+
64
+ TLTWFEEIFEE
65
+
66
+
67
+
68
+ YHSPFHN
69
+ `;
70
+ // a "broken" fasta file
71
+ // const fastaBroken = `
72
+
73
+ // >description:1
74
+ // MDYKETLLM
75
+ // PKTDFPMRGGLPN
76
+ // KEPQIQEKW
77
+
78
+
79
+
80
+ // >description:2
81
+ // MIEVFL FGIVLGLIPI TLAGLFVTAYLQYRRGDQLDL
82
+
83
+ // >description:3
84
+
85
+ // M
86
+ // MELVLKTIIGP
87
+ // IVVGVVLR
88
+ // IVDKWLNKD
89
+
90
+ // K
91
+
92
+ // >description:4
93
+ // MDRTDEV
94
+
95
+ // SNHTHDKP
96
+ // TLTWFEEI
97
+ // FEE
98
+
99
+ // YHSPFHN
100
+
101
+
102
+ // `;
103
+
104
+ const descriptionsArray = [
105
+ 'description:1', 'description:2', 'description:3', 'description:4',
106
+ ];
107
+ const descriptionCol = DG.Column.fromStrings('description', descriptionsArray);
108
+
109
+ const sequencesArray = [
110
+ 'MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW',
111
+ 'MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL',
112
+ 'MMELVLKTIIGPIVVGVVLRIVDKWLNKDK',
113
+ 'MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN',
114
+ ];
115
+ const sequencesCol = DG.Column.fromStrings('sequence', sequencesArray);
116
+ sequencesCol.semType = DG.SEMTYPE.MACROMOLECULE;
117
+ UnitsHandler.setUnitsToFastaColumn(sequencesCol);
118
+
119
+ const fastaDf = DG.DataFrame.fromColumns([descriptionCol, sequencesCol]);
120
+
121
+ function _testColumnsParser(inputFasta: string) {
122
+ const ffh = new FastaFileHandler(inputFasta);
123
+ const parsedDescriptionsArray = ffh.descriptionsArray;
124
+ const parsedSequencesArray = ffh.sequencesArray;
125
+ expectArray(
126
+ [parsedDescriptionsArray, parsedSequencesArray],
127
+ [descriptionsArray, sequencesArray]
128
+ );
129
+ }
130
+
131
+ // test parser
132
+ test('testNormalFormatting', async () => {
133
+ _testColumnsParser(fastaNormalFormatting);
134
+ });
135
+ test('testExtraSpaces', async () => {
136
+ _testColumnsParser(fastaExtraSpaces);
137
+ });
138
+ test('testExtraNewlines', async () => {
139
+ _testColumnsParser(fastaExtraNewlines);
140
+ });
141
+ });