@datagrok/bio 1.7.11 → 1.7.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@datagrok/bio",
3
3
  "beta": false,
4
4
  "friendlyName": "Bio",
5
- "version": "1.7.11",
5
+ "version": "1.7.12",
6
6
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
7
7
  "repository": {
8
8
  "type": "git",
@@ -13,7 +13,7 @@
13
13
  "@biowasm/aioli": ">=2.4.0",
14
14
  "@datagrok-libraries/bio": "^2.8.5",
15
15
  "@datagrok-libraries/utils": "^1.0.0",
16
- "@datagrok-libraries/ml": "^2.0.10",
16
+ "@datagrok-libraries/ml": "^3.0.0",
17
17
  "cash-dom": "latest",
18
18
  "datagrok-api": "^1.4.12",
19
19
  "dayjs": "latest",
package/src/const.ts CHANGED
@@ -23,3 +23,8 @@ export const CAP_GROUP_NAME = 'capGroupName';
23
23
  export const RGROUP_LABEL = 'label';
24
24
  export const MONOMER_SYMBOL = 'symbol';
25
25
  export const SDF_MONOMER_NAME = 'MonomerName';
26
+
27
+ // range of hex nubers used in PepSea library to endode monomers
28
+ export const MONOMER_ENCODE_MIN = 0x100;
29
+ export const MONOMER_ENCODE_MAX = 0x40A;
30
+
package/src/package.ts CHANGED
@@ -16,7 +16,7 @@ import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
16
16
  import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
17
17
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
18
18
  import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
19
- import {createJsonMonomerLibFromSdf, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
19
+ import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
20
20
  import {getMacroMol} from './utils/atomic-works';
21
21
  import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
22
22
  import {convert} from './utils/convert';
@@ -112,7 +112,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
112
112
  similarity: number, methodName: string): Promise<void> {
113
113
  if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
114
114
  return;
115
-
115
+ const encodedCol = encodeMonomers(macroMolecule);
116
+ if (!encodedCol)
117
+ return;
116
118
  const axesNames = getEmbeddingColsNames(df);
117
119
  const options = {
118
120
  'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
@@ -121,6 +123,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
121
123
  await getActivityCliffs(
122
124
  df,
123
125
  macroMolecule,
126
+ encodedCol,
124
127
  axesNames,
125
128
  'Activity cliffs',
126
129
  activities,
@@ -146,10 +149,12 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
146
149
  similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<void> {
147
150
  if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
148
151
  return;
149
-
152
+ const encodedCol = encodeMonomers(macroMolecule);
153
+ if (!encodedCol)
154
+ return;
150
155
  const embedColsNames = getEmbeddingColsNames(table);
151
156
  const chemSpaceParams = {
152
- seqCol: macroMolecule,
157
+ seqCol: encodedCol,
153
158
  methodName: methodName,
154
159
  similarityMetric: similarityMetric,
155
160
  embedAxesNames: embedColsNames
@@ -7,6 +7,7 @@ import {readDataframe} from './utils';
7
7
  import {getEmbeddingColsNames, sequenceSpace} from '../utils/sequence-space';
8
8
  import {drawTooltip, sequenceGetSimilarities} from '../utils/sequence-activity-cliffs';
9
9
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
10
+ import { encodeMonomers } from '../utils/utils';
10
11
 
11
12
 
12
13
  category('activityCliffs', async () => {
@@ -31,9 +32,11 @@ category('activityCliffs', async () => {
31
32
  const options = {
32
33
  'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
33
34
  };
35
+ const encodedCol = encodeMonomers(actCliffsDf.col('MSA')!) as DG.Column;
34
36
  const scatterPlot = await getActivityCliffs(
35
37
  actCliffsDf,
36
38
  actCliffsDf.col('MSA')!,
39
+ encodedCol,
37
40
  axesNames,
38
41
  'Activity cliffs',
39
42
  actCliffsDf.col('Activity')!,
@@ -2,7 +2,7 @@ import * as DG from 'datagrok-api/dg';
2
2
  import {WebLogo, SplitterFunc} from '@datagrok-libraries/bio/src/viewers/web-logo';
3
3
  import * as grok from 'datagrok-api/grok';
4
4
  import {
5
- CAP_GROUP_NAME, CAP_GROUP_SMILES, jsonSdfMonomerLibDict, MONOMER_SYMBOL,
5
+ CAP_GROUP_NAME, CAP_GROUP_SMILES, jsonSdfMonomerLibDict, MONOMER_ENCODE_MAX, MONOMER_ENCODE_MIN, MONOMER_SYMBOL,
6
6
  RGROUP_ALTER_ID, RGROUP_FIELD, RGROUP_LABEL, SDF_MONOMER_NAME
7
7
  } from '../const';
8
8
 
@@ -11,6 +11,33 @@ export const HELM_CORE_LIB_MONOMER_SYMBOL = 'symbol';
11
11
  export const HELM_CORE_LIB_MOLFILE = 'molfile';
12
12
  export const HELM_CORE_FIELDS = ['symbol', 'molfile', 'rgroups', 'name'];
13
13
 
14
+
15
+ export function encodeMonomers(col: DG.Column): DG.Column | null {
16
+ let encodeSymbol = MONOMER_ENCODE_MIN;
17
+ const monomerSymbolDict: { [key: string]: number }= {};
18
+ const units = col.tags[DG.TAGS.UNITS];
19
+ const sep = col.getTag('separator');
20
+ const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, sep);
21
+ const encodedStringArray = [];
22
+ for (let i = 0; i < col.length; ++i) {
23
+ let encodedMonomerStr = '';
24
+ const monomers = splitterFunc(col.get(i));
25
+ monomers.forEach(m => {
26
+ if(!monomerSymbolDict[m]) {
27
+ if(encodeSymbol > MONOMER_ENCODE_MAX) {
28
+ grok.shell.error(`Not enougth symbols to encode monomers`);
29
+ return null;
30
+ }
31
+ monomerSymbolDict[m] = encodeSymbol;
32
+ encodeSymbol++;
33
+ }
34
+ encodedMonomerStr += String.fromCodePoint(monomerSymbolDict[m]);
35
+ })
36
+ encodedStringArray.push(encodedMonomerStr);
37
+ }
38
+ return DG.Column.fromStrings('encodedMolecules', encodedStringArray);
39
+ }
40
+
14
41
  export function getMolfilesFromSeq(col: DG.Column, monomersLibObject: any[]): any[][] | null {
15
42
  const units = col.tags[DG.TAGS.UNITS];
16
43
  const sep = col.getTag('separator');
@@ -79,4 +106,4 @@ export function createJsonMonomerLibFromSdf(table: DG.DataFrame): any {
79
106
  resultLib.push(monomer);
80
107
  }
81
108
  return resultLib;
82
- }
109
+ }
@@ -1,4 +1,4 @@
1
- <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit f542cbde.</title><style type="text/css">html,
1
+ <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 6fa72ec4.</title><style type="text/css">html,
2
2
  body {
3
3
  font-family: Arial, Helvetica, sans-serif;
4
4
  font-size: 1rem;
@@ -229,7 +229,8 @@ header {
229
229
  font-size: 1rem;
230
230
  padding: 0 0.5rem;
231
231
  }
232
- </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit f542cbde.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-07-20 17:49:40</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">209.758s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">199.086s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: Test result : Bio.MSA.is_correct : TypeError: Cannot read properties of undefined (reading 'split')
232
+ </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 6fa72ec4.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-07-22 13:03:45</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">213.603s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">199.146s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: Test result : Bio.MSA.is_correct : TypeError: Cannot read properties of undefined (reading 'split')
233
+ Test result : Bio.activityCliffs.activityCliffsOpen : Error: Expected "105 cliffs", got "2362 cliffs"
233
234
 
234
235
  at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:67:20
235
236
  at Generator.next (&lt;anonymous&gt;)
@@ -310,7 +311,6 @@ Test result : Bio.detectors.samplesTestUnichemSourcesNegativeSrcUrl : OK
310
311
  Test result : Bio.detectors.samplesTestUnichemSourcesNegativeBaseIdUrl : OK
311
312
  Test result : Bio.MSA.test_table.is_not_empty : OK
312
313
  Test result : Bio.sequenceSpace.sequenceSpaceOpens : OK
313
- Test result : Bio.activityCliffs.activityCliffsOpen : OK
314
314
  Test result : Bio.splitters.helm1 : OK
315
315
  Test result : Bio.splitters.helm2 : OK
316
316
  Test result : Bio.splitters.helm3-multichar : OK