@datagrok/bio 1.11.4 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "1.11.4",
8
+ "version": "2.0.1",
9
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
10
10
  "repository": {
11
11
  "type": "git",
@@ -70,11 +70,7 @@
70
70
  "Developers"
71
71
  ],
72
72
  "sources": [
73
- "css/helm.css",
74
- "https://ajax.googleapis.com/ajax/libs/dojo/1.10.4/dojo/dojo.js",
75
- "helm/JSDraw/Scilligence.JSDraw2.Lite.js",
76
- "helm/JSDraw/Scilligence.JSDraw2.Resources.js",
77
- "helm/JSDraw/Pistoia.HELM-uncompressed.js"
73
+ "css/helm.css"
78
74
  ],
79
75
  "category": "Bioinformatics"
80
76
  }
@@ -0,0 +1,76 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ const V2000_ATOM_NAME_POS = 31;
6
+
7
+ export async function getFingerprints(mols: Array<string>, monomers: Array<string>): Promise<Uint8Array[]> {
8
+ const mod = await grok.functions.call('Chem:getRdKitModule');
9
+ const fps: Uint8Array[] = [];
10
+
11
+ let dict = new Map();
12
+ for(let i = 0; i < monomers.length; i++)
13
+ dict.set(monomers[i], `R${Math.pow(10,(i + 1))}`);
14
+
15
+ mols = changeToV3000(mols, dict);
16
+
17
+ for(let i = 0; i< mols.length; i++) {
18
+ const mol = mod.get_mol(mols[i]);
19
+ const fp = mol.get_pattern_fp_as_uint8array();
20
+ fps.push(fp);
21
+ mol?.delete();
22
+ }
23
+
24
+ return fps;
25
+ }
26
+
27
+ function changeToV3000(mols: Array<string>, dict: Map<string, string>): Array<string> {
28
+ for (let i = 0; i < mols.length; i++) {
29
+ let curPos = 0;
30
+ let endPos = 0;
31
+ let molV3000 = `
32
+ Datagrok macromolecule handler
33
+
34
+ 0 0 0 0 0 0 999 V3000
35
+ M V30 BEGIN CTAB
36
+ `;
37
+
38
+ const mol = mols[i];
39
+ curPos = mol.indexOf('\n', curPos) + 1;
40
+ curPos = mol.indexOf('\n', curPos) + 1;
41
+ curPos = mol.indexOf('\n', curPos) + 1;
42
+
43
+ const atomMonomerCounts = parseInt(mol.substring(curPos, curPos + 3));
44
+ const bondMonomerCounts = parseInt(mol.substring(curPos + 3, curPos + 6));
45
+
46
+ molV3000 += `M V30 COUNTS ${atomMonomerCounts} ${bondMonomerCounts} 0 0 0\n`;
47
+ molV3000 += 'M V30 BEGIN ATOM\n';
48
+
49
+ for (let atomRowI = 0; atomRowI < atomMonomerCounts; atomRowI++) {
50
+ curPos = mol.indexOf('\n', curPos) + 1 + V2000_ATOM_NAME_POS;
51
+ endPos = mol.indexOf(' ', curPos);
52
+ const monomerName: string = mol.substring(curPos, endPos);
53
+ molV3000 += `M V30 ${atomRowI + 1} ${dict.get(monomerName)} 0.000 0.000 0 0\n`;
54
+ }
55
+
56
+ molV3000 += 'M V30 END ATOM\n';
57
+ molV3000 += 'M V30 BEGIN BOND\n';
58
+
59
+ for (let bondRowI = 0; bondRowI < bondMonomerCounts; bondRowI++) {
60
+ curPos = mol.indexOf('\n', curPos) + 1;
61
+ const firstMonomer = parseInt(mol.substring(curPos, curPos + 3).trim());
62
+ const secondMonomer = parseInt(mol.substring(curPos + 3, curPos + 6).trim());
63
+ const order = parseInt(mol.substring(curPos + 6, curPos + 9).trim());
64
+
65
+ molV3000 += `M V30 ${bondRowI + 1} ${order} ${firstMonomer} ${secondMonomer}\n`;
66
+ }
67
+
68
+ molV3000 += 'M V30 END BOND\n';
69
+ molV3000 += 'M V30 END CTAB\n';
70
+ molV3000 += 'M END';
71
+ console.log(molV3000);
72
+ mols[i] = molV3000;
73
+ }
74
+
75
+ return mols;
76
+ }
package/src/package.ts CHANGED
@@ -32,22 +32,12 @@ import {
32
32
 
33
33
  import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
34
34
  import * as C from './utils/constants';
35
+ import {getFingerprints} from './calculations/fingerprints'
35
36
 
36
37
  //tags: init
37
38
  export async function initBio() {
38
39
  }
39
40
 
40
- //name: testManySequencesPerformance
41
- export function testManySequencesPerformance(): void {
42
- performanceTest(generateManySequences, 'Many sequences');
43
- }
44
-
45
- //name: testLongSequencesPerformance
46
- export function testLongSequencesPerformance(): void {
47
- performanceTest(generateLongSequence, 'Long sequences');
48
- }
49
-
50
-
51
41
  //name: fastaSequenceCellRenderer
52
42
  //tags: cellRenderer
53
43
  //meta.cellType: Sequence
@@ -457,7 +447,23 @@ export function splitToMonomers(col: DG.Column<string>): void {
457
447
  for (const tempCol of tempDf.columns) {
458
448
  const newCol = originalDf.columns.add(tempCol);
459
449
  newCol.semType = C.SEM_TYPES.MONOMER;
460
- // newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
450
+ newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
461
451
  newCol.setTag(C.TAGS.ALPHABET, col.getTag(C.TAGS.ALPHABET));
462
452
  }
453
+ grok.shell.tv.grid.invalidate();
454
+ }
455
+
456
+ //name: Bio: getHelmMonomers
457
+ //input: column col {semType: Macromolecule}
458
+ export function getHelmMonomers(seqCol: DG.Column<string>): string[] {
459
+ const stats = WebLogo.getStats(seqCol, 1, WebLogo.splitterAsHelm);
460
+ return Object.keys(stats.freq);
461
+ }
462
+
463
+ export async function macromoleculesFingerprints(mcol: DG.Column): Promise<Uint8Array[]> {
464
+ grok.functions.call('Chem:getRdKitModule');
465
+ const monomers = getHelmMonomers(mcol);
466
+ const mols = await grok.functions.call('HELM:getMolFiles', {mcol : mcol});
467
+
468
+ return getFingerprints(mols.toList(), monomers);
463
469
  }
@@ -6,6 +6,7 @@ import {importFasta, multipleSequenceAlignmentAny} from '../package';
6
6
  import {convertDo} from '../utils/convert';
7
7
  import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
8
  import {SEM_TYPES, TAGS} from '../utils/constants';
9
+ import {generateLongSequence, generateManySequences, performanceTest} from './test-sequnces-generators';
9
10
 
10
11
  category('renderers', () => {
11
12
  let tvList: DG.TableView[];
@@ -22,6 +23,14 @@ category('renderers', () => {
22
23
  tvList.forEach((tv: DG.TableView) => tv.close());
23
24
  });
24
25
 
26
+ test('long sequence performance ', async () => {
27
+ performanceTest(generateLongSequence, 'Long sequences');
28
+ });
29
+
30
+ test('many sequence performance', async () => {
31
+ performanceTest(generateManySequences, 'Many sequences');
32
+ });
33
+
25
34
  test('afterMsa', async () => {
26
35
  await _testAfterMsa();
27
36
  });
@@ -196,7 +196,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
196
196
  }
197
197
 
198
198
  export class MonomerCellRenderer extends DG.GridCellRenderer {
199
- get name(): string {return 'MonomerCR';}
199
+ get name(): string {return C.SEM_TYPES.MONOMER;}
200
200
 
201
201
  get cellType(): string {return C.SEM_TYPES.MONOMER;}
202
202
 
@@ -217,21 +217,17 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
217
217
  */
218
218
  render(
219
219
  g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
220
- cellStyle: DG.GridCellStyle): void {
221
- y -= 2;
222
- g.save();
223
- g.beginPath();
224
- g.rect(x, y, w, h);
225
- g.clip();
220
+ _cellStyle: DG.GridCellStyle): void {
226
221
  g.font = `12px monospace`;
227
- g.textBaseline = 'top';
222
+ g.textBaseline = 'middle';
223
+ g.textAlign = 'center';
228
224
 
229
- const palette = getPaletteByType(gridCell.tableColumn!.tags[C.TAGS.ALPHABET]);
230
- const s: string = gridCell.cell.value ? gridCell.cell.value : '-';
225
+ const palette = getPaletteByType(gridCell.cell.column.getTag(C.TAGS.ALPHABET));
226
+ const s: string = gridCell.cell.value || '-';
231
227
  const color = palette.get(s);
232
228
 
233
- printLeftOrCentered(x, y, w, h, g, s, color, 0, false);
234
- g.restore();
229
+ g.fillStyle = color;
230
+ g.fillText(s, x + (w / 2), y - (h / 2), w);
235
231
  }
236
232
  }
237
233
 
@@ -1,4 +1,4 @@
1
- <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=a3ba57cf36f3. Commit 42effe59.</title><style type="text/css">html,
1
+ <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=c5a05867bf79. Commit 5c91e1fb.</title><style type="text/css">html,
2
2
  body {
3
3
  font-family: Arial, Helvetica, sans-serif;
4
4
  font-size: 1rem;
@@ -229,7 +229,7 @@ header {
229
229
  font-size: 1rem;
230
230
  padding: 0 0.5rem;
231
231
  }
232
- </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=a3ba57cf36f3. Commit 42effe59.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-09-09 08:55:59</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">8.833s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">0.001s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: Unknown server alias. Please add it to /home/runner/.grok/config.yaml
232
+ </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=c5a05867bf79. Commit 5c91e1fb.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-09-14 12:47:46</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">7.679s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">0.001s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: Unknown server alias. Please add it to /home/runner/.grok/config.yaml
233
233
  at getDevKey (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:48:13)
234
234
  at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:57:15)
235
235
  at Generator.next (&lt;anonymous&gt;)