@datagrok/bio 1.5.9 → 1.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@datagrok/bio",
3
3
  "beta": false,
4
4
  "friendlyName": "Bio",
5
- "version": "1.5.9",
5
+ "version": "1.5.10",
6
6
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
7
7
  "repository": {
8
8
  "type": "git",
@@ -11,7 +11,7 @@
11
11
  },
12
12
  "dependencies": {
13
13
  "@biowasm/aioli": ">=2.4.0",
14
- "@datagrok-libraries/bio": "^2.4.1",
14
+ "@datagrok-libraries/bio": "^2.4.2",
15
15
  "@datagrok-libraries/utils": "^1.0.0",
16
16
  "@datagrok-libraries/ml": "^2.0.9",
17
17
  "cash-dom": "latest",
package/src/package.ts CHANGED
@@ -135,6 +135,11 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
135
135
  //input: dataframe df [Input data table]
136
136
  //input: column sequence {semType: Macromolecule}
137
137
  export async function toAtomicLevel(df: DG.DataFrame, sequence: DG.Column): Promise<void> {
138
+ if (DG.Func.find({package: 'Chem', name: 'getRdKitModule'})) {
139
+ grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
140
+ return;
141
+ }
142
+
138
143
  const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
139
144
  const monomersLibObject: any[] = JSON.parse(monomersLibFile);
140
145
  const atomicCodes = getMolfilesFromSeq(sequence, monomersLibObject);
@@ -152,7 +157,23 @@ export async function toAtomicLevel(df: DG.DataFrame, sequence: DG.Column): Prom
152
157
  //input: dataframe table
153
158
  //input: column sequence { semType: Macromolecule }
154
159
  //output: column result
155
- export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column> {
160
+ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column | null> {
161
+ if (col.semType != DG.SEMTYPE.MACROMOLECULE) {
162
+ grok.shell.warning(`MSA analysis is allowed for semantic type "${DG.SEMTYPE.MACROMOLECULE}" data only.`);
163
+ return null;
164
+ }
165
+ const units: string = col.getTag(DG.TAGS.UNITS);
166
+ const allowedAlphabets = ['DNA', 'RNA', 'PT'];
167
+ const allowedNotations = ['fasta'];
168
+ if (!allowedAlphabets.some((a) => units.toUpperCase().endsWith(a.toUpperCase())) ||
169
+ !allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase()))) {
170
+ grok.shell.warning('MSA analysis is allowed for ' +
171
+ `notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} ` +
172
+ 'and ' +
173
+ `alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
174
+ return null;
175
+ }
176
+
156
177
  const msaCol = await runKalign(col, false);
157
178
  table.columns.add(msaCol);
158
179
 
@@ -171,19 +192,21 @@ export async function compositionAnalysis(): Promise<void> {
171
192
  // Higher priority for columns with MSA data to show with WebLogo.
172
193
  const tv = grok.shell.tv;
173
194
  const df = tv.dataFrame;
174
- const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
175
- let col: DG.Column | undefined = semTypeColList.find((col) => {
176
- const units = col.getTag(DG.TAGS.UNITS);
177
- return units ? units.indexOf('MSA') !== -1 : false;
178
- });
179
- if (!col)
180
- col = semTypeColList[0];
181
195
 
196
+ const col: DG.Column | null = WebLogo.pickUpSeqCol2(df);
182
197
  if (!col) {
183
198
  grok.shell.error('Current table does not contain sequences');
184
199
  return;
185
200
  }
186
201
 
202
+ const allowedNotations: string[] = ['fasta', 'separator'];
203
+ const units = col.getTag(DG.TAGS.UNITS);
204
+ if (!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase()))) {
205
+ grok.shell.warning('Composition analysis is allowed for ' +
206
+ `notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')}.`);
207
+ return;
208
+ }
209
+
187
210
  tv.addViewer('WebLogo', {sequenceColumnName: col.name});
188
211
  }
189
212
 
@@ -106,10 +106,11 @@ MWRSWY-CKHP
106
106
  fastaFasta = 'fastaFasta',
107
107
  msaComplex = 'msaComplex',
108
108
  helmCsv = 'helmCsv',
109
+ testDemogCsv = 'testDemogCsv',
110
+ testHelmCsv = 'testHelmCsv',
109
111
  testIdCsv = 'testIdCsv',
110
112
  testSmilesCsv = 'testSmilesCsv',
111
- testHelmCsv = 'testHelmCsv',
112
- testDemogCsv = 'testDemogCsv',
113
+ testSmiles2Csv = 'testSmiles2Csv',
113
114
  }
114
115
 
115
116
  const samples: { [key: string]: string } = {
@@ -119,9 +120,10 @@ MWRSWY-CKHP
119
120
  'msaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
120
121
  'helmCsv': 'System:AppData/Bio/samples/sample_HELM.csv',
121
122
  'testDemogCsv': 'System:AppData/Bio/samples/testDemog.csv',
122
- 'testIdCsv': 'System:AppData/Bio/samples/id.csv',
123
123
  'testHelmCsv': 'System:AppData/Bio/samples/testHelm.csv',
124
+ 'testIdCsv': 'System:AppData/Bio/samples/id.csv',
124
125
  'testSmilesCsv': 'System:AppData/Bio/samples/testSmiles.csv',
126
+ 'testSmiles2Csv': 'System:AppData/Bio/samples/testSmiles2.csv',
125
127
  };
126
128
 
127
129
  const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
@@ -288,6 +290,10 @@ MWRSWY-CKHP
288
290
  for (const col of df.columns.toList())
289
291
  await _testNeg(dfFunc, col.name);
290
292
  });
293
+
294
+ test('samplesTestSmiles2NegativeSmiles', async () => {
295
+ await _testNeg(readSamples(Samples.testSmiles2Csv), 'SMILES');
296
+ });
291
297
  });
292
298
 
293
299
  export async function _testNeg(readDf: DfReaderFunc, colName: string) {
@@ -6,31 +6,53 @@ import * as DG from 'datagrok-api/dg';
6
6
  import {importFasta, multipleSequenceAlignmentAny} from '../package';
7
7
 
8
8
  category('renderers', () => {
9
+ let tvList: DG.TableView[];
10
+
11
+ before(async () => {
12
+ tvList = [];
13
+ });
14
+
15
+ after(async () => {
16
+ tvList.forEach((tv: DG.TableView) => tv.close());
17
+ });
18
+
9
19
  test('afterMsa', async () => {
10
20
  await _testAfterMsa();
11
21
  });
12
- });
13
22
 
14
- export async function _testAfterMsa() {
15
- const fastaTxt: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
16
- const df: DG.DataFrame = importFasta(fastaTxt)[0];
23
+ async function _testAfterMsa() {
24
+ const fastaTxt: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
25
+ const df: DG.DataFrame = importFasta(fastaTxt)[0];
26
+
27
+ const srcSeqCol: DG.Column | null = df.col('sequence');
28
+ expect(srcSeqCol !== null, true);
29
+ console.log('Bio: tests/renderers/afterMsa, src data loaded');
17
30
 
18
- const seqCol: DG.Column | null = df.col('sequence');
19
- expect(seqCol !== null, true);
31
+ const tv: DG.TableView = grok.shell.addTableView(df);
32
+ console.log('Bio: tests/renderers/afterMsa, table view');
20
33
 
21
- const tv: DG.TableView = grok.shell.addTableView(df);
22
- await grok.data.detectSemanticTypes(df);
34
+ await grok.data.detectSemanticTypes(df);
35
+ console.log('Bio: tests/renderers/afterMsa, detectSemanticTypes');
23
36
 
24
- expect(seqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
25
- expect(seqCol!.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
26
- expect(seqCol!.getTag('cell.renderer'), 'Macromolecule');
37
+ console.log('Bio: tests/renderers/afterMsa, src before test semType' +
38
+ `semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
39
+ `cell.renderer="${srcSeqCol!.getTag('cell.renderer')}"`);
40
+ expect(srcSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
41
+ expect(srcSeqCol!.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
42
+ expect(srcSeqCol!.getTag('cell.renderer'), 'Macromolecule');
43
+ console.log('Bio: tests/renderers/afterMsa, src semType tested');
27
44
 
28
- const seqMsaCol: DG.Column = await multipleSequenceAlignmentAny(df, seqCol!);
29
- tv.grid.invalidate();
45
+ const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
46
+ console.log('Bio: tests/renderers/afterMsa, msaSeqCol created');
30
47
 
31
- expect(seqMsaCol!.semType, DG.SEMTYPE.MACROMOLECULE);
32
- expect(seqMsaCol!.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
33
- expect(seqMsaCol!.getTag('cell.renderer'), 'Macromolecule');
48
+ tv.grid.invalidate();
49
+ console.log('Bio: tests/renderers/afterMsa, tv.grid invalidated');
34
50
 
35
- // tv.close();
36
- }
51
+ expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
52
+ expect(msaSeqCol!.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
53
+ expect(msaSeqCol!.getTag('cell.renderer'), 'Macromolecule');
54
+ console.log('Bio: tests/renderers/afterMsa, msa semType tested');
55
+
56
+ tvList.push(tv);
57
+ }
58
+ });
@@ -193,7 +193,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
193
193
  const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator') );// splitter,
194
194
 
195
195
  const subParts:string[] = splitterFunc(cell.value);
196
- console.log(subParts);
196
+ // console.log(subParts);
197
197
 
198
198
  const textSize = g.measureText(subParts.join(''));
199
199
  let x1 = Math.max(x, x + (w - textSize.width) / 2);
@@ -50,10 +50,12 @@ export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<D
50
50
 
51
51
  await CLI.fs.writeFile('input.fa', fasta);
52
52
  const output = await CLI.exec('kalign input.fa -f fasta -o result.fasta');
53
- const buf = await CLI.cat('result.fasta');
54
-
55
53
  console.warn(output);
56
54
 
55
+ const buf = await CLI.cat('result.fasta');
56
+ if (!buf)
57
+ throw new Error(`kalign output no result`);
58
+
57
59
  const aligned = _fastaToStrings(buf).slice(0, sequences.length);
58
60
  const tgtCol = DG.Column.fromStrings(`msa(${srcCol.name})`, aligned);
59
61
 
@@ -1,4 +1,4 @@
1
- <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit b47d4664.</title><style type="text/css">html,
1
+ <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 936bf89b.</title><style type="text/css">html,
2
2
  body {
3
3
  font-family: Arial, Helvetica, sans-serif;
4
4
  font-size: 1rem;
@@ -229,14 +229,21 @@ header {
229
229
  font-size: 1rem;
230
230
  padding: 0 0.5rem;
231
231
  }
232
- </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit b47d4664.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-07-11 12:49:26</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">107.855s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">98.311s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: expect(received).toBe(expected) // Object.is equality
233
-
234
- Expected: false
235
- Received: true
236
- at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:49:20
237
- at Generator.next (&lt;anonymous&gt;)
238
- at fulfilled (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:31:58)
239
- at processTicksAndRejections (internal/process/task_queues.js:97:5)</pre></div></div></div><div class="suite-consolelog"><div class="suite-consolelog-header">Console Log</div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:63:11)
232
+ </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 936bf89b.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-07-11 18:26:55</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">109.382s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">100.002s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: thrown: "Exceeded timeout of 100000 ms for a test.
233
+ Use jest.setTimeout(newTimeout) to increase the timeout value, if this is a long-running test."
234
+ at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:22:1)
235
+ at Runtime._execModule (/home/runner/work/public/public/packages/Bio/node_modules/jest-runtime/build/index.js:1646:24)
236
+ at Runtime._loadModule (/home/runner/work/public/public/packages/Bio/node_modules/jest-runtime/build/index.js:1185:12)
237
+ at Runtime.requireModule (/home/runner/work/public/public/packages/Bio/node_modules/jest-runtime/build/index.js:1009:12)
238
+ at jestAdapter (/home/runner/work/public/public/packages/Bio/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:79:13)
239
+ at runTestInternal (/home/runner/work/public/public/packages/Bio/node_modules/jest-runner/build/runTest.js:389:16)
240
+ at runTest (/home/runner/work/public/public/packages/Bio/node_modules/jest-runner/build/runTest.js:475:34)
241
+ at TestRunner.runTests (/home/runner/work/public/public/packages/Bio/node_modules/jest-runner/build/index.js:101:12)
242
+ at TestScheduler.scheduleTests (/home/runner/work/public/public/packages/Bio/node_modules/@jest/core/build/TestScheduler.js:333:13)
243
+ at runJest (/home/runner/work/public/public/packages/Bio/node_modules/@jest/core/build/runJest.js:404:19)
244
+ at _run10000 (/home/runner/work/public/public/packages/Bio/node_modules/@jest/core/build/cli/index.js:320:7)
245
+ at runCLI (/home/runner/work/public/public/packages/Bio/node_modules/@jest/core/build/cli/index.js:173:3)
246
+ at Object.run (/home/runner/work/public/public/packages/Bio/node_modules/jest-cli/build/cli/index.js:155:37)</pre></div></div></div><div class="suite-consolelog"><div class="suite-consolelog-header">Console Log</div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:63:11)
240
247
  at Generator.next (&lt;anonymous&gt;)
241
248
  at fulfilled (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:28:58)
242
249
  at processTicksAndRejections (internal/process/task_queues.js:97:5)</pre><pre class="suite-consolelog-item-message">Using web root: http://localhost:8080</pre></div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:24:11
@@ -246,15 +253,4 @@ Received: true
246
253
  at Object.&lt;anonymous&gt;.__awaiter (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:30:12)
247
254
  at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:22:23)
248
255
  at Promise.then.completed (/home/runner/work/public/public/packages/Bio/node_modules/jest-circus/build/utils.js:391:28)
249
- at new Promise (&lt;anonymous&gt;)</pre><pre class="suite-consolelog-item-message">Testing Bio package</pre></div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:47:11
250
- at Generator.next (&lt;anonymous&gt;)
251
- at fulfilled (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:31:58)
252
- at processTicksAndRejections (internal/process/task_queues.js:97:5)</pre><pre class="suite-consolelog-item-message">detectors.samplesIdCsvNegativeID: Operation caused an exception (FileSystemException: Cannot open file, path = '/home/grok/data/prod/packages/data/Bio/samples/id.csv' (OS Error: No such file or directory, errno = 2))
253
- MSA.is_correct: TypeError: Cannot read properties of undefined (reading 'split')
254
- sequenceSpace.sequenceSpaceOpens: TypeError: Cannot read properties of undefined (reading 'col')
255
- sequenceSpace.init: Operation caused an exception (FileSystemException: Cannot open file, path = '/home/grok/data/prod/packages/data/Bio/sample_FASTA.csv' (OS Error: No such file or directory, errno = 2))
256
- activityCliffs.activityCliffsOpen: TypeError: Cannot read properties of undefined (reading 'columns')
257
- activityCliffs.init: TypeError: Cannot read properties of undefined (reading 'close')
258
- activityCliffs.init: Operation caused an exception (FileSystemException: Cannot open file, path = '/home/grok/data/prod/packages/data/Bio/sample_MSA.csv' (OS Error: No such file or directory, errno = 2))
259
- renderers.afterMsa: Error: Expected "Macromolecule", got "null"
260
- </pre></div></div></div></div></body></html>
256
+ at new Promise (&lt;anonymous&gt;)</pre><pre class="suite-consolelog-item-message">Testing Bio package</pre></div></div></div></div></body></html>