@datagrok/bio 1.11.4 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -9
- package/dist/package-test.js +128 -31
- package/dist/package.js +120 -87
- package/package.json +2 -6
- package/src/calculations/fingerprints.ts +76 -0
- package/src/package.ts +18 -12
- package/src/tests/renderers-test.ts +9 -0
- package/src/utils/cell-renderer.ts +8 -12
- package/{test-Bio-a3ba57cf36f3-42effe59.html → test-Bio-c5a05867bf79-5c91e1fb.html} +2 -2
- package/helm/JSDraw/Pistoia.HELM-uncompressed.js +0 -9694
- package/helm/JSDraw/Pistoia.HELM.js +0 -27
- package/helm/JSDraw/ReadMe.txt +0 -8
- package/helm/JSDraw/Scilligence.JSDraw2.Lite-uncompressed.js +0 -31126
- package/helm/JSDraw/Scilligence.JSDraw2.Lite.js +0 -12
- package/helm/JSDraw/Scilligence.JSDraw2.Resources.js +0 -762
- package/helm/JSDraw/dojo.js +0 -250
- package/helm/JSDraw/test.html +0 -21
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "
|
|
8
|
+
"version": "2.0.1",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -70,11 +70,7 @@
|
|
|
70
70
|
"Developers"
|
|
71
71
|
],
|
|
72
72
|
"sources": [
|
|
73
|
-
"css/helm.css"
|
|
74
|
-
"https://ajax.googleapis.com/ajax/libs/dojo/1.10.4/dojo/dojo.js",
|
|
75
|
-
"helm/JSDraw/Scilligence.JSDraw2.Lite.js",
|
|
76
|
-
"helm/JSDraw/Scilligence.JSDraw2.Resources.js",
|
|
77
|
-
"helm/JSDraw/Pistoia.HELM-uncompressed.js"
|
|
73
|
+
"css/helm.css"
|
|
78
74
|
],
|
|
79
75
|
"category": "Bioinformatics"
|
|
80
76
|
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
const V2000_ATOM_NAME_POS = 31;
|
|
6
|
+
|
|
7
|
+
export async function getFingerprints(mols: Array<string>, monomers: Array<string>): Promise<Uint8Array[]> {
|
|
8
|
+
const mod = await grok.functions.call('Chem:getRdKitModule');
|
|
9
|
+
const fps: Uint8Array[] = [];
|
|
10
|
+
|
|
11
|
+
let dict = new Map();
|
|
12
|
+
for(let i = 0; i < monomers.length; i++)
|
|
13
|
+
dict.set(monomers[i], `R${Math.pow(10,(i + 1))}`);
|
|
14
|
+
|
|
15
|
+
mols = changeToV3000(mols, dict);
|
|
16
|
+
|
|
17
|
+
for(let i = 0; i< mols.length; i++) {
|
|
18
|
+
const mol = mod.get_mol(mols[i]);
|
|
19
|
+
const fp = mol.get_pattern_fp_as_uint8array();
|
|
20
|
+
fps.push(fp);
|
|
21
|
+
mol?.delete();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return fps;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function changeToV3000(mols: Array<string>, dict: Map<string, string>): Array<string> {
|
|
28
|
+
for (let i = 0; i < mols.length; i++) {
|
|
29
|
+
let curPos = 0;
|
|
30
|
+
let endPos = 0;
|
|
31
|
+
let molV3000 = `
|
|
32
|
+
Datagrok macromolecule handler
|
|
33
|
+
|
|
34
|
+
0 0 0 0 0 0 999 V3000
|
|
35
|
+
M V30 BEGIN CTAB
|
|
36
|
+
`;
|
|
37
|
+
|
|
38
|
+
const mol = mols[i];
|
|
39
|
+
curPos = mol.indexOf('\n', curPos) + 1;
|
|
40
|
+
curPos = mol.indexOf('\n', curPos) + 1;
|
|
41
|
+
curPos = mol.indexOf('\n', curPos) + 1;
|
|
42
|
+
|
|
43
|
+
const atomMonomerCounts = parseInt(mol.substring(curPos, curPos + 3));
|
|
44
|
+
const bondMonomerCounts = parseInt(mol.substring(curPos + 3, curPos + 6));
|
|
45
|
+
|
|
46
|
+
molV3000 += `M V30 COUNTS ${atomMonomerCounts} ${bondMonomerCounts} 0 0 0\n`;
|
|
47
|
+
molV3000 += 'M V30 BEGIN ATOM\n';
|
|
48
|
+
|
|
49
|
+
for (let atomRowI = 0; atomRowI < atomMonomerCounts; atomRowI++) {
|
|
50
|
+
curPos = mol.indexOf('\n', curPos) + 1 + V2000_ATOM_NAME_POS;
|
|
51
|
+
endPos = mol.indexOf(' ', curPos);
|
|
52
|
+
const monomerName: string = mol.substring(curPos, endPos);
|
|
53
|
+
molV3000 += `M V30 ${atomRowI + 1} ${dict.get(monomerName)} 0.000 0.000 0 0\n`;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
molV3000 += 'M V30 END ATOM\n';
|
|
57
|
+
molV3000 += 'M V30 BEGIN BOND\n';
|
|
58
|
+
|
|
59
|
+
for (let bondRowI = 0; bondRowI < bondMonomerCounts; bondRowI++) {
|
|
60
|
+
curPos = mol.indexOf('\n', curPos) + 1;
|
|
61
|
+
const firstMonomer = parseInt(mol.substring(curPos, curPos + 3).trim());
|
|
62
|
+
const secondMonomer = parseInt(mol.substring(curPos + 3, curPos + 6).trim());
|
|
63
|
+
const order = parseInt(mol.substring(curPos + 6, curPos + 9).trim());
|
|
64
|
+
|
|
65
|
+
molV3000 += `M V30 ${bondRowI + 1} ${order} ${firstMonomer} ${secondMonomer}\n`;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
molV3000 += 'M V30 END BOND\n';
|
|
69
|
+
molV3000 += 'M V30 END CTAB\n';
|
|
70
|
+
molV3000 += 'M END';
|
|
71
|
+
console.log(molV3000);
|
|
72
|
+
mols[i] = molV3000;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return mols;
|
|
76
|
+
}
|
package/src/package.ts
CHANGED
|
@@ -32,22 +32,12 @@ import {
|
|
|
32
32
|
|
|
33
33
|
import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
|
|
34
34
|
import * as C from './utils/constants';
|
|
35
|
+
import {getFingerprints} from './calculations/fingerprints'
|
|
35
36
|
|
|
36
37
|
//tags: init
|
|
37
38
|
export async function initBio() {
|
|
38
39
|
}
|
|
39
40
|
|
|
40
|
-
//name: testManySequencesPerformance
|
|
41
|
-
export function testManySequencesPerformance(): void {
|
|
42
|
-
performanceTest(generateManySequences, 'Many sequences');
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
//name: testLongSequencesPerformance
|
|
46
|
-
export function testLongSequencesPerformance(): void {
|
|
47
|
-
performanceTest(generateLongSequence, 'Long sequences');
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
|
|
51
41
|
//name: fastaSequenceCellRenderer
|
|
52
42
|
//tags: cellRenderer
|
|
53
43
|
//meta.cellType: Sequence
|
|
@@ -457,7 +447,23 @@ export function splitToMonomers(col: DG.Column<string>): void {
|
|
|
457
447
|
for (const tempCol of tempDf.columns) {
|
|
458
448
|
const newCol = originalDf.columns.add(tempCol);
|
|
459
449
|
newCol.semType = C.SEM_TYPES.MONOMER;
|
|
460
|
-
|
|
450
|
+
newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
|
|
461
451
|
newCol.setTag(C.TAGS.ALPHABET, col.getTag(C.TAGS.ALPHABET));
|
|
462
452
|
}
|
|
453
|
+
grok.shell.tv.grid.invalidate();
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
//name: Bio: getHelmMonomers
|
|
457
|
+
//input: column col {semType: Macromolecule}
|
|
458
|
+
export function getHelmMonomers(seqCol: DG.Column<string>): string[] {
|
|
459
|
+
const stats = WebLogo.getStats(seqCol, 1, WebLogo.splitterAsHelm);
|
|
460
|
+
return Object.keys(stats.freq);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
export async function macromoleculesFingerprints(mcol: DG.Column): Promise<Uint8Array[]> {
|
|
464
|
+
grok.functions.call('Chem:getRdKitModule');
|
|
465
|
+
const monomers = getHelmMonomers(mcol);
|
|
466
|
+
const mols = await grok.functions.call('HELM:getMolFiles', {mcol : mcol});
|
|
467
|
+
|
|
468
|
+
return getFingerprints(mols.toList(), monomers);
|
|
463
469
|
}
|
|
@@ -6,6 +6,7 @@ import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
|
6
6
|
import {convertDo} from '../utils/convert';
|
|
7
7
|
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
8
|
import {SEM_TYPES, TAGS} from '../utils/constants';
|
|
9
|
+
import {generateLongSequence, generateManySequences, performanceTest} from './test-sequnces-generators';
|
|
9
10
|
|
|
10
11
|
category('renderers', () => {
|
|
11
12
|
let tvList: DG.TableView[];
|
|
@@ -22,6 +23,14 @@ category('renderers', () => {
|
|
|
22
23
|
tvList.forEach((tv: DG.TableView) => tv.close());
|
|
23
24
|
});
|
|
24
25
|
|
|
26
|
+
test('long sequence performance ', async () => {
|
|
27
|
+
performanceTest(generateLongSequence, 'Long sequences');
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test('many sequence performance', async () => {
|
|
31
|
+
performanceTest(generateManySequences, 'Many sequences');
|
|
32
|
+
});
|
|
33
|
+
|
|
25
34
|
test('afterMsa', async () => {
|
|
26
35
|
await _testAfterMsa();
|
|
27
36
|
});
|
|
@@ -196,7 +196,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
196
196
|
}
|
|
197
197
|
|
|
198
198
|
export class MonomerCellRenderer extends DG.GridCellRenderer {
|
|
199
|
-
get name(): string {return
|
|
199
|
+
get name(): string {return C.SEM_TYPES.MONOMER;}
|
|
200
200
|
|
|
201
201
|
get cellType(): string {return C.SEM_TYPES.MONOMER;}
|
|
202
202
|
|
|
@@ -217,21 +217,17 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
|
|
|
217
217
|
*/
|
|
218
218
|
render(
|
|
219
219
|
g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
|
|
220
|
-
|
|
221
|
-
y -= 2;
|
|
222
|
-
g.save();
|
|
223
|
-
g.beginPath();
|
|
224
|
-
g.rect(x, y, w, h);
|
|
225
|
-
g.clip();
|
|
220
|
+
_cellStyle: DG.GridCellStyle): void {
|
|
226
221
|
g.font = `12px monospace`;
|
|
227
|
-
g.textBaseline = '
|
|
222
|
+
g.textBaseline = 'middle';
|
|
223
|
+
g.textAlign = 'center';
|
|
228
224
|
|
|
229
|
-
const palette = getPaletteByType(gridCell.
|
|
230
|
-
const s: string = gridCell.cell.value
|
|
225
|
+
const palette = getPaletteByType(gridCell.cell.column.getTag(C.TAGS.ALPHABET));
|
|
226
|
+
const s: string = gridCell.cell.value || '-';
|
|
231
227
|
const color = palette.get(s);
|
|
232
228
|
|
|
233
|
-
|
|
234
|
-
g.
|
|
229
|
+
g.fillStyle = color;
|
|
230
|
+
g.fillText(s, x + (w / 2), y - (h / 2), w);
|
|
235
231
|
}
|
|
236
232
|
}
|
|
237
233
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
<html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=
|
|
1
|
+
<html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=c5a05867bf79. Commit 5c91e1fb.</title><style type="text/css">html,
|
|
2
2
|
body {
|
|
3
3
|
font-family: Arial, Helvetica, sans-serif;
|
|
4
4
|
font-size: 1rem;
|
|
@@ -229,7 +229,7 @@ header {
|
|
|
229
229
|
font-size: 1rem;
|
|
230
230
|
padding: 0 0.5rem;
|
|
231
231
|
}
|
|
232
|
-
</style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=
|
|
232
|
+
</style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=c5a05867bf79. Commit 5c91e1fb.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-09-14 12:47:46</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">7.679s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">0.001s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: Unknown server alias. Please add it to /home/runner/.grok/config.yaml
|
|
233
233
|
at getDevKey (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:48:13)
|
|
234
234
|
at Object.<anonymous> (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:57:15)
|
|
235
235
|
at Generator.next (<anonymous>)
|