@datagrok/bio 1.5.9 → 1.5.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +17 -5
- package/dist/package-test.js +126 -62
- package/dist/package.js +85 -39
- package/files/samples/testSmiles2.csv +12248 -0
- package/package.json +2 -2
- package/src/package.ts +31 -8
- package/src/tests/detectors-test.ts +9 -3
- package/src/tests/renderer-test.ts +40 -18
- package/src/utils/cell-renderer.ts +1 -1
- package/src/utils/multiple-sequence-alignment.ts +4 -2
- package/{test-Bio-34f75e5127b8-b47d4664.html → test-Bio-34f75e5127b8-936bf89b.html} +17 -21
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.5.
|
|
5
|
+
"version": "1.5.10",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
13
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
|
-
"@datagrok-libraries/bio": "^2.4.
|
|
14
|
+
"@datagrok-libraries/bio": "^2.4.2",
|
|
15
15
|
"@datagrok-libraries/utils": "^1.0.0",
|
|
16
16
|
"@datagrok-libraries/ml": "^2.0.9",
|
|
17
17
|
"cash-dom": "latest",
|
package/src/package.ts
CHANGED
|
@@ -135,6 +135,11 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
135
135
|
//input: dataframe df [Input data table]
|
|
136
136
|
//input: column sequence {semType: Macromolecule}
|
|
137
137
|
export async function toAtomicLevel(df: DG.DataFrame, sequence: DG.Column): Promise<void> {
|
|
138
|
+
if (DG.Func.find({package: 'Chem', name: 'getRdKitModule'})) {
|
|
139
|
+
grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
|
|
138
143
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
139
144
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
140
145
|
const atomicCodes = getMolfilesFromSeq(sequence, monomersLibObject);
|
|
@@ -152,7 +157,23 @@ export async function toAtomicLevel(df: DG.DataFrame, sequence: DG.Column): Prom
|
|
|
152
157
|
//input: dataframe table
|
|
153
158
|
//input: column sequence { semType: Macromolecule }
|
|
154
159
|
//output: column result
|
|
155
|
-
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column> {
|
|
160
|
+
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column | null> {
|
|
161
|
+
if (col.semType != DG.SEMTYPE.MACROMOLECULE) {
|
|
162
|
+
grok.shell.warning(`MSA analysis is allowed for semantic type "${DG.SEMTYPE.MACROMOLECULE}" data only.`);
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
const units: string = col.getTag(DG.TAGS.UNITS);
|
|
166
|
+
const allowedAlphabets = ['DNA', 'RNA', 'PT'];
|
|
167
|
+
const allowedNotations = ['fasta'];
|
|
168
|
+
if (!allowedAlphabets.some((a) => units.toUpperCase().endsWith(a.toUpperCase())) ||
|
|
169
|
+
!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase()))) {
|
|
170
|
+
grok.shell.warning('MSA analysis is allowed for ' +
|
|
171
|
+
`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} ` +
|
|
172
|
+
'and ' +
|
|
173
|
+
`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
174
|
+
return null;
|
|
175
|
+
}
|
|
176
|
+
|
|
156
177
|
const msaCol = await runKalign(col, false);
|
|
157
178
|
table.columns.add(msaCol);
|
|
158
179
|
|
|
@@ -171,19 +192,21 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
171
192
|
// Higher priority for columns with MSA data to show with WebLogo.
|
|
172
193
|
const tv = grok.shell.tv;
|
|
173
194
|
const df = tv.dataFrame;
|
|
174
|
-
const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
175
|
-
let col: DG.Column | undefined = semTypeColList.find((col) => {
|
|
176
|
-
const units = col.getTag(DG.TAGS.UNITS);
|
|
177
|
-
return units ? units.indexOf('MSA') !== -1 : false;
|
|
178
|
-
});
|
|
179
|
-
if (!col)
|
|
180
|
-
col = semTypeColList[0];
|
|
181
195
|
|
|
196
|
+
const col: DG.Column | null = WebLogo.pickUpSeqCol2(df);
|
|
182
197
|
if (!col) {
|
|
183
198
|
grok.shell.error('Current table does not contain sequences');
|
|
184
199
|
return;
|
|
185
200
|
}
|
|
186
201
|
|
|
202
|
+
const allowedNotations: string[] = ['fasta', 'separator'];
|
|
203
|
+
const units = col.getTag(DG.TAGS.UNITS);
|
|
204
|
+
if (!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase()))) {
|
|
205
|
+
grok.shell.warning('Composition analysis is allowed for ' +
|
|
206
|
+
`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')}.`);
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
209
|
+
|
|
187
210
|
tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
188
211
|
}
|
|
189
212
|
|
|
@@ -106,10 +106,11 @@ MWRSWY-CKHP
|
|
|
106
106
|
fastaFasta = 'fastaFasta',
|
|
107
107
|
msaComplex = 'msaComplex',
|
|
108
108
|
helmCsv = 'helmCsv',
|
|
109
|
+
testDemogCsv = 'testDemogCsv',
|
|
110
|
+
testHelmCsv = 'testHelmCsv',
|
|
109
111
|
testIdCsv = 'testIdCsv',
|
|
110
112
|
testSmilesCsv = 'testSmilesCsv',
|
|
111
|
-
|
|
112
|
-
testDemogCsv = 'testDemogCsv',
|
|
113
|
+
testSmiles2Csv = 'testSmiles2Csv',
|
|
113
114
|
}
|
|
114
115
|
|
|
115
116
|
const samples: { [key: string]: string } = {
|
|
@@ -119,9 +120,10 @@ MWRSWY-CKHP
|
|
|
119
120
|
'msaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
120
121
|
'helmCsv': 'System:AppData/Bio/samples/sample_HELM.csv',
|
|
121
122
|
'testDemogCsv': 'System:AppData/Bio/samples/testDemog.csv',
|
|
122
|
-
'testIdCsv': 'System:AppData/Bio/samples/id.csv',
|
|
123
123
|
'testHelmCsv': 'System:AppData/Bio/samples/testHelm.csv',
|
|
124
|
+
'testIdCsv': 'System:AppData/Bio/samples/id.csv',
|
|
124
125
|
'testSmilesCsv': 'System:AppData/Bio/samples/testSmiles.csv',
|
|
126
|
+
'testSmiles2Csv': 'System:AppData/Bio/samples/testSmiles2.csv',
|
|
125
127
|
};
|
|
126
128
|
|
|
127
129
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -288,6 +290,10 @@ MWRSWY-CKHP
|
|
|
288
290
|
for (const col of df.columns.toList())
|
|
289
291
|
await _testNeg(dfFunc, col.name);
|
|
290
292
|
});
|
|
293
|
+
|
|
294
|
+
test('samplesTestSmiles2NegativeSmiles', async () => {
|
|
295
|
+
await _testNeg(readSamples(Samples.testSmiles2Csv), 'SMILES');
|
|
296
|
+
});
|
|
291
297
|
});
|
|
292
298
|
|
|
293
299
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
@@ -6,31 +6,53 @@ import * as DG from 'datagrok-api/dg';
|
|
|
6
6
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
7
7
|
|
|
8
8
|
category('renderers', () => {
|
|
9
|
+
let tvList: DG.TableView[];
|
|
10
|
+
|
|
11
|
+
before(async () => {
|
|
12
|
+
tvList = [];
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
after(async () => {
|
|
16
|
+
tvList.forEach((tv: DG.TableView) => tv.close());
|
|
17
|
+
});
|
|
18
|
+
|
|
9
19
|
test('afterMsa', async () => {
|
|
10
20
|
await _testAfterMsa();
|
|
11
21
|
});
|
|
12
|
-
});
|
|
13
22
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
23
|
+
async function _testAfterMsa() {
|
|
24
|
+
const fastaTxt: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
|
|
25
|
+
const df: DG.DataFrame = importFasta(fastaTxt)[0];
|
|
26
|
+
|
|
27
|
+
const srcSeqCol: DG.Column | null = df.col('sequence');
|
|
28
|
+
expect(srcSeqCol !== null, true);
|
|
29
|
+
console.log('Bio: tests/renderers/afterMsa, src data loaded');
|
|
17
30
|
|
|
18
|
-
|
|
19
|
-
|
|
31
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
32
|
+
console.log('Bio: tests/renderers/afterMsa, table view');
|
|
20
33
|
|
|
21
|
-
|
|
22
|
-
|
|
34
|
+
await grok.data.detectSemanticTypes(df);
|
|
35
|
+
console.log('Bio: tests/renderers/afterMsa, detectSemanticTypes');
|
|
23
36
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
37
|
+
console.log('Bio: tests/renderers/afterMsa, src before test semType' +
|
|
38
|
+
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
|
|
39
|
+
`cell.renderer="${srcSeqCol!.getTag('cell.renderer')}"`);
|
|
40
|
+
expect(srcSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
41
|
+
expect(srcSeqCol!.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
42
|
+
expect(srcSeqCol!.getTag('cell.renderer'), 'Macromolecule');
|
|
43
|
+
console.log('Bio: tests/renderers/afterMsa, src semType tested');
|
|
27
44
|
|
|
28
|
-
|
|
29
|
-
|
|
45
|
+
const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
|
|
46
|
+
console.log('Bio: tests/renderers/afterMsa, msaSeqCol created');
|
|
30
47
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
expect(seqMsaCol!.getTag('cell.renderer'), 'Macromolecule');
|
|
48
|
+
tv.grid.invalidate();
|
|
49
|
+
console.log('Bio: tests/renderers/afterMsa, tv.grid invalidated');
|
|
34
50
|
|
|
35
|
-
|
|
36
|
-
|
|
51
|
+
expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
52
|
+
expect(msaSeqCol!.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
|
|
53
|
+
expect(msaSeqCol!.getTag('cell.renderer'), 'Macromolecule');
|
|
54
|
+
console.log('Bio: tests/renderers/afterMsa, msa semType tested');
|
|
55
|
+
|
|
56
|
+
tvList.push(tv);
|
|
57
|
+
}
|
|
58
|
+
});
|
|
@@ -193,7 +193,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
193
193
|
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator') );// splitter,
|
|
194
194
|
|
|
195
195
|
const subParts:string[] = splitterFunc(cell.value);
|
|
196
|
-
console.log(subParts);
|
|
196
|
+
// console.log(subParts);
|
|
197
197
|
|
|
198
198
|
const textSize = g.measureText(subParts.join(''));
|
|
199
199
|
let x1 = Math.max(x, x + (w - textSize.width) / 2);
|
|
@@ -50,10 +50,12 @@ export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<D
|
|
|
50
50
|
|
|
51
51
|
await CLI.fs.writeFile('input.fa', fasta);
|
|
52
52
|
const output = await CLI.exec('kalign input.fa -f fasta -o result.fasta');
|
|
53
|
-
const buf = await CLI.cat('result.fasta');
|
|
54
|
-
|
|
55
53
|
console.warn(output);
|
|
56
54
|
|
|
55
|
+
const buf = await CLI.cat('result.fasta');
|
|
56
|
+
if (!buf)
|
|
57
|
+
throw new Error(`kalign output no result`);
|
|
58
|
+
|
|
57
59
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
58
60
|
const tgtCol = DG.Column.fromStrings(`msa(${srcCol.name})`, aligned);
|
|
59
61
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
<html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit
|
|
1
|
+
<html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 936bf89b.</title><style type="text/css">html,
|
|
2
2
|
body {
|
|
3
3
|
font-family: Arial, Helvetica, sans-serif;
|
|
4
4
|
font-size: 1rem;
|
|
@@ -229,14 +229,21 @@ header {
|
|
|
229
229
|
font-size: 1rem;
|
|
230
230
|
padding: 0 0.5rem;
|
|
231
231
|
}
|
|
232
|
-
</style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
at /home/runner/work/public/public/packages/Bio/
|
|
237
|
-
at
|
|
238
|
-
at
|
|
239
|
-
at
|
|
232
|
+
</style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 936bf89b.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-07-11 18:26:55</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">109.382s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">100.002s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: thrown: "Exceeded timeout of 100000 ms for a test.
|
|
233
|
+
Use jest.setTimeout(newTimeout) to increase the timeout value, if this is a long-running test."
|
|
234
|
+
at Object.<anonymous> (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:22:1)
|
|
235
|
+
at Runtime._execModule (/home/runner/work/public/public/packages/Bio/node_modules/jest-runtime/build/index.js:1646:24)
|
|
236
|
+
at Runtime._loadModule (/home/runner/work/public/public/packages/Bio/node_modules/jest-runtime/build/index.js:1185:12)
|
|
237
|
+
at Runtime.requireModule (/home/runner/work/public/public/packages/Bio/node_modules/jest-runtime/build/index.js:1009:12)
|
|
238
|
+
at jestAdapter (/home/runner/work/public/public/packages/Bio/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:79:13)
|
|
239
|
+
at runTestInternal (/home/runner/work/public/public/packages/Bio/node_modules/jest-runner/build/runTest.js:389:16)
|
|
240
|
+
at runTest (/home/runner/work/public/public/packages/Bio/node_modules/jest-runner/build/runTest.js:475:34)
|
|
241
|
+
at TestRunner.runTests (/home/runner/work/public/public/packages/Bio/node_modules/jest-runner/build/index.js:101:12)
|
|
242
|
+
at TestScheduler.scheduleTests (/home/runner/work/public/public/packages/Bio/node_modules/@jest/core/build/TestScheduler.js:333:13)
|
|
243
|
+
at runJest (/home/runner/work/public/public/packages/Bio/node_modules/@jest/core/build/runJest.js:404:19)
|
|
244
|
+
at _run10000 (/home/runner/work/public/public/packages/Bio/node_modules/@jest/core/build/cli/index.js:320:7)
|
|
245
|
+
at runCLI (/home/runner/work/public/public/packages/Bio/node_modules/@jest/core/build/cli/index.js:173:3)
|
|
246
|
+
at Object.run (/home/runner/work/public/public/packages/Bio/node_modules/jest-cli/build/cli/index.js:155:37)</pre></div></div></div><div class="suite-consolelog"><div class="suite-consolelog-header">Console Log</div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at Object.<anonymous> (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:63:11)
|
|
240
247
|
at Generator.next (<anonymous>)
|
|
241
248
|
at fulfilled (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:28:58)
|
|
242
249
|
at processTicksAndRejections (internal/process/task_queues.js:97:5)</pre><pre class="suite-consolelog-item-message">Using web root: http://localhost:8080</pre></div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:24:11
|
|
@@ -246,15 +253,4 @@ Received: true
|
|
|
246
253
|
at Object.<anonymous>.__awaiter (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:30:12)
|
|
247
254
|
at Object.<anonymous> (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:22:23)
|
|
248
255
|
at Promise.then.completed (/home/runner/work/public/public/packages/Bio/node_modules/jest-circus/build/utils.js:391:28)
|
|
249
|
-
at new Promise (<anonymous>)</pre><pre class="suite-consolelog-item-message">Testing Bio package</pre></div
|
|
250
|
-
at Generator.next (<anonymous>)
|
|
251
|
-
at fulfilled (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:31:58)
|
|
252
|
-
at processTicksAndRejections (internal/process/task_queues.js:97:5)</pre><pre class="suite-consolelog-item-message">detectors.samplesIdCsvNegativeID: Operation caused an exception (FileSystemException: Cannot open file, path = '/home/grok/data/prod/packages/data/Bio/samples/id.csv' (OS Error: No such file or directory, errno = 2))
|
|
253
|
-
MSA.is_correct: TypeError: Cannot read properties of undefined (reading 'split')
|
|
254
|
-
sequenceSpace.sequenceSpaceOpens: TypeError: Cannot read properties of undefined (reading 'col')
|
|
255
|
-
sequenceSpace.init: Operation caused an exception (FileSystemException: Cannot open file, path = '/home/grok/data/prod/packages/data/Bio/sample_FASTA.csv' (OS Error: No such file or directory, errno = 2))
|
|
256
|
-
activityCliffs.activityCliffsOpen: TypeError: Cannot read properties of undefined (reading 'columns')
|
|
257
|
-
activityCliffs.init: TypeError: Cannot read properties of undefined (reading 'close')
|
|
258
|
-
activityCliffs.init: Operation caused an exception (FileSystemException: Cannot open file, path = '/home/grok/data/prod/packages/data/Bio/sample_MSA.csv' (OS Error: No such file or directory, errno = 2))
|
|
259
|
-
renderers.afterMsa: Error: Expected "Macromolecule", got "null"
|
|
260
|
-
</pre></div></div></div></div></body></html>
|
|
256
|
+
at new Promise (<anonymous>)</pre><pre class="suite-consolelog-item-message">Testing Bio package</pre></div></div></div></div></body></html>
|