@datagrok/bio 1.11.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "1.11.2",
8
+ "version": "2.0.0",
9
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
10
10
  "repository": {
11
11
  "type": "git",
@@ -16,7 +16,7 @@
16
16
  "@biowasm/aioli": ">=2.4.0",
17
17
  "@datagrok-libraries/bio": "^4.2.1",
18
18
  "@datagrok-libraries/chem-meta": "1.0.0",
19
- "@datagrok-libraries/ml": "^6.0.0",
19
+ "@datagrok-libraries/ml": "^6.1.0",
20
20
  "@datagrok-libraries/utils": "^1.6.2",
21
21
  "cash-dom": "latest",
22
22
  "datagrok-api": "^1.6.7",
@@ -70,11 +70,7 @@
70
70
  "Developers"
71
71
  ],
72
72
  "sources": [
73
- "css/helm.css",
74
- "https://ajax.googleapis.com/ajax/libs/dojo/1.10.4/dojo/dojo.js",
75
- "helm/JSDraw/Scilligence.JSDraw2.Lite.js",
76
- "helm/JSDraw/Scilligence.JSDraw2.Resources.js",
77
- "helm/JSDraw/Pistoia.HELM-uncompressed.js"
73
+ "css/helm.css"
78
74
  ],
79
75
  "category": "Bioinformatics"
80
76
  }
package/src/package.ts CHANGED
@@ -37,17 +37,6 @@ import * as C from './utils/constants';
37
37
  export async function initBio() {
38
38
  }
39
39
 
40
- //name: testManySequencesPerformance
41
- export function testManySequencesPerformance(): void {
42
- performanceTest(generateManySequences, 'Many sequences');
43
- }
44
-
45
- //name: testLongSequencesPerformance
46
- export function testLongSequencesPerformance(): void {
47
- performanceTest(generateLongSequence, 'Long sequences');
48
- }
49
-
50
-
51
40
  //name: fastaSequenceCellRenderer
52
41
  //tags: cellRenderer
53
42
  //meta.cellType: Sequence
@@ -148,7 +137,7 @@ export function vdRegionViewer() {
148
137
  //input: dataframe table [Input data table]
149
138
  //input: column macroMolecule {semType: Macromolecule}
150
139
  //input: column activities
151
- //input: double similarity = 80 [Similarity cutoff]
140
+ //input: double similarity = 90 [Similarity cutoff]
152
141
  //input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
153
142
  export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
154
143
  similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
@@ -457,7 +446,16 @@ export function splitToMonomers(col: DG.Column<string>): void {
457
446
  for (const tempCol of tempDf.columns) {
458
447
  const newCol = originalDf.columns.add(tempCol);
459
448
  newCol.semType = C.SEM_TYPES.MONOMER;
460
- // newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
449
+ newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
461
450
  newCol.setTag(C.TAGS.ALPHABET, col.getTag(C.TAGS.ALPHABET));
462
451
  }
452
+ (grok.shell.v as DG.TableView)?.grid.invalidate();
463
453
  }
454
+
455
+ //name: Bio: getHelmMonomers
456
+ //input: column col {semType: Macromolecule}
457
+ //output: string[] result
458
+ export function getHelmMonomers(seqCol: DG.Column<string>): string[] {
459
+ const stats = WebLogo.getStats(seqCol, 5, WebLogo.splitterAsHelm);
460
+ return Object.keys(stats.freq);
461
+ }
@@ -6,6 +6,7 @@ import {importFasta, multipleSequenceAlignmentAny} from '../package';
6
6
  import {convertDo} from '../utils/convert';
7
7
  import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
8
  import {SEM_TYPES, TAGS} from '../utils/constants';
9
+ import {generateLongSequence, generateManySequences, performanceTest} from './test-sequnces-generators';
9
10
 
10
11
  category('renderers', () => {
11
12
  let tvList: DG.TableView[];
@@ -22,6 +23,14 @@ category('renderers', () => {
22
23
  tvList.forEach((tv: DG.TableView) => tv.close());
23
24
  });
24
25
 
26
+ test('long sequence performance ', async () => {
27
+ performanceTest(generateLongSequence, 'Long sequences');
28
+ });
29
+
30
+ test('many sequence performance', async () => {
31
+ performanceTest(generateManySequences, 'Many sequences');
32
+ });
33
+
25
34
  test('afterMsa', async () => {
26
35
  await _testAfterMsa();
27
36
  });
@@ -196,7 +196,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
196
196
  }
197
197
 
198
198
  export class MonomerCellRenderer extends DG.GridCellRenderer {
199
- get name(): string {return 'MonomerCR';}
199
+ get name(): string {return C.SEM_TYPES.MONOMER;}
200
200
 
201
201
  get cellType(): string {return C.SEM_TYPES.MONOMER;}
202
202
 
@@ -217,21 +217,17 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
217
217
  */
218
218
  render(
219
219
  g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
220
- cellStyle: DG.GridCellStyle): void {
221
- y -= 2;
222
- g.save();
223
- g.beginPath();
224
- g.rect(x, y, w, h);
225
- g.clip();
220
+ _cellStyle: DG.GridCellStyle): void {
226
221
  g.font = `12px monospace`;
227
- g.textBaseline = 'top';
222
+ g.textBaseline = 'middle';
223
+ g.textAlign = 'center';
228
224
 
229
- const palette = getPaletteByType(gridCell.tableColumn!.tags[C.TAGS.ALPHABET]);
225
+ const palette = getPaletteByType(gridCell.cell.column.getTag(C.TAGS.ALPHABET));
230
226
  const s: string = gridCell.cell.value ? gridCell.cell.value : '-';
231
227
  const color = palette.get(s);
232
228
 
233
- printLeftOrCentered(x, y, w, h, g, s, color, 0, false);
234
- g.restore();
229
+ g.fillStyle = color;
230
+ g.fillText(s, x + (w / 2), y - (h / 2));
235
231
  }
236
232
  }
237
233
 
@@ -4,47 +4,32 @@ import * as ui from 'datagrok-api/ui';
4
4
  import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
5
5
  import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
6
6
  import * as grok from 'datagrok-api/grok';
7
+ import { SplitterFunc, WebLogo } from '@datagrok-libraries/bio/src/viewers/web-logo';
8
+ import { UnitsHandler } from '@datagrok-libraries/bio/src/utils/units-handler';
7
9
 
8
10
  export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
9
11
  const stringArray = col.toList();
10
12
  const distances = new Array(stringArray.length).fill(0);
11
- for (let i = 0; i < stringArray.length; ++i)
12
- distances[i] = stringArray[i] ? AvailableMetrics['String']['Levenshtein'](stringArray[i], seq) : null;
13
+ for (let i = 0; i < stringArray.length; ++i) {
14
+ const distance = stringArray[i] ? AvailableMetrics['String']['Levenshtein'](stringArray[i], seq) : null;
15
+ distances[i] = distance ? distance/Math.max((stringArray[i] as string).length, seq.length) : null;
16
+ }
13
17
  return distances;
14
18
  }
15
19
 
16
20
  export async function getSimilaritiesMarix(dim: number, seqCol: DG.Column, df: DG.DataFrame, colName: string, simArr: DG.Column[])
17
21
  : Promise<DG.Column[]> {
18
22
 
19
- function arrayMin(arr: number[]) {
20
- return arr.reduce(function (p, v) {
21
- return (p < v ? p : v);
22
- });
23
- }
24
-
25
- function arrayMax(arr: number[]) {
26
- return arr.reduce(function (p, v) {
27
- return (p > v ? p : v);
28
- });
29
- }
30
23
  const distances = new Array(simArr.length).fill(null);
31
- let min = Infinity;
32
- let max = -Infinity;
33
24
  for (let i = 0; i != dim - 1; ++i) {
34
- const seq = seqCol.get(i);
25
+ const seq: string = seqCol.get(i);
35
26
  df.rows.removeAt(0, 1, false);
36
27
  distances[i] = (await getDistances(df.col(colName)!, seq))!;
37
- const newMin = arrayMin(distances[i]);
38
- const newMax = arrayMax(distances[i]);
39
- if (newMin < min)
40
- min = newMin;
41
- if (newMax > max)
42
- max = newMax;
43
28
  }
44
29
 
45
30
  for (let i = 0; i < distances.length; i++) {
46
31
  for (let j = 0; j < distances[i].length; j++) {
47
- distances[i][j] = getSimilarityFromDistance((distances[i][j] - min)/(max - min));
32
+ distances[i][j] = getSimilarityFromDistance(distances[i][j]);
48
33
  }
49
34
  simArr[i] = DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'distances', distances[i]);
50
35
  }
@@ -1,4 +1,4 @@
1
- <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=f1ac5a5eade4. Commit a0f7e8c0.</title><style type="text/css">html,
1
+ <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=c5a05867bf79. Commit 1b9affa5.</title><style type="text/css">html,
2
2
  body {
3
3
  font-family: Arial, Helvetica, sans-serif;
4
4
  font-size: 1rem;
@@ -229,7 +229,7 @@ header {
229
229
  font-size: 1rem;
230
230
  padding: 0 0.5rem;
231
231
  }
232
- </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=f1ac5a5eade4. Commit a0f7e8c0.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-09-08 10:03:50</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">10.76s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">0s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: Unknown server alias. Please add it to /home/runner/.grok/config.yaml
232
+ </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=c5a05867bf79. Commit 1b9affa5.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-09-13 11:28:35</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">8.755s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">0s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: Unknown server alias. Please add it to /home/runner/.grok/config.yaml
233
233
  at getDevKey (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:48:13)
234
234
  at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:57:15)
235
235
  at Generator.next (&lt;anonymous&gt;)