@datagrok/bio 1.7.11 → 1.7.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +47 -7
- package/dist/package.js +43 -6
- package/package.json +2 -2
- package/src/const.ts +5 -0
- package/src/package.ts +9 -4
- package/src/tests/activity-cliffs-tests.ts +3 -0
- package/src/utils/utils.ts +29 -2
- package/{test-Bio-34f75e5127b8-f542cbde.html → test-Bio-34f75e5127b8-6fa72ec4.html} +3 -3
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.7.
|
|
5
|
+
"version": "1.7.12",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
14
|
"@datagrok-libraries/bio": "^2.8.5",
|
|
15
15
|
"@datagrok-libraries/utils": "^1.0.0",
|
|
16
|
-
"@datagrok-libraries/ml": "^
|
|
16
|
+
"@datagrok-libraries/ml": "^3.0.0",
|
|
17
17
|
"cash-dom": "latest",
|
|
18
18
|
"datagrok-api": "^1.4.12",
|
|
19
19
|
"dayjs": "latest",
|
package/src/const.ts
CHANGED
|
@@ -23,3 +23,8 @@ export const CAP_GROUP_NAME = 'capGroupName';
|
|
|
23
23
|
export const RGROUP_LABEL = 'label';
|
|
24
24
|
export const MONOMER_SYMBOL = 'symbol';
|
|
25
25
|
export const SDF_MONOMER_NAME = 'MonomerName';
|
|
26
|
+
|
|
27
|
+
// range of hex nubers used in PepSea library to endode monomers
|
|
28
|
+
export const MONOMER_ENCODE_MIN = 0x100;
|
|
29
|
+
export const MONOMER_ENCODE_MAX = 0x40A;
|
|
30
|
+
|
package/src/package.ts
CHANGED
|
@@ -16,7 +16,7 @@ import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
|
|
|
16
16
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
17
17
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
18
18
|
import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
|
|
19
|
-
import {createJsonMonomerLibFromSdf, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
19
|
+
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
22
|
import {convert} from './utils/convert';
|
|
@@ -112,7 +112,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
112
112
|
similarity: number, methodName: string): Promise<void> {
|
|
113
113
|
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
114
114
|
return;
|
|
115
|
-
|
|
115
|
+
const encodedCol = encodeMonomers(macroMolecule);
|
|
116
|
+
if (!encodedCol)
|
|
117
|
+
return;
|
|
116
118
|
const axesNames = getEmbeddingColsNames(df);
|
|
117
119
|
const options = {
|
|
118
120
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
@@ -121,6 +123,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
121
123
|
await getActivityCliffs(
|
|
122
124
|
df,
|
|
123
125
|
macroMolecule,
|
|
126
|
+
encodedCol,
|
|
124
127
|
axesNames,
|
|
125
128
|
'Activity cliffs',
|
|
126
129
|
activities,
|
|
@@ -146,10 +149,12 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
146
149
|
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<void> {
|
|
147
150
|
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
148
151
|
return;
|
|
149
|
-
|
|
152
|
+
const encodedCol = encodeMonomers(macroMolecule);
|
|
153
|
+
if (!encodedCol)
|
|
154
|
+
return;
|
|
150
155
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
151
156
|
const chemSpaceParams = {
|
|
152
|
-
seqCol:
|
|
157
|
+
seqCol: encodedCol,
|
|
153
158
|
methodName: methodName,
|
|
154
159
|
similarityMetric: similarityMetric,
|
|
155
160
|
embedAxesNames: embedColsNames
|
|
@@ -7,6 +7,7 @@ import {readDataframe} from './utils';
|
|
|
7
7
|
import {getEmbeddingColsNames, sequenceSpace} from '../utils/sequence-space';
|
|
8
8
|
import {drawTooltip, sequenceGetSimilarities} from '../utils/sequence-activity-cliffs';
|
|
9
9
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
10
|
+
import { encodeMonomers } from '../utils/utils';
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
category('activityCliffs', async () => {
|
|
@@ -31,9 +32,11 @@ category('activityCliffs', async () => {
|
|
|
31
32
|
const options = {
|
|
32
33
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
33
34
|
};
|
|
35
|
+
const encodedCol = encodeMonomers(actCliffsDf.col('MSA')!) as DG.Column;
|
|
34
36
|
const scatterPlot = await getActivityCliffs(
|
|
35
37
|
actCliffsDf,
|
|
36
38
|
actCliffsDf.col('MSA')!,
|
|
39
|
+
encodedCol,
|
|
37
40
|
axesNames,
|
|
38
41
|
'Activity cliffs',
|
|
39
42
|
actCliffsDf.col('Activity')!,
|
package/src/utils/utils.ts
CHANGED
|
@@ -2,7 +2,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import {WebLogo, SplitterFunc} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
4
|
import {
|
|
5
|
-
CAP_GROUP_NAME, CAP_GROUP_SMILES, jsonSdfMonomerLibDict, MONOMER_SYMBOL,
|
|
5
|
+
CAP_GROUP_NAME, CAP_GROUP_SMILES, jsonSdfMonomerLibDict, MONOMER_ENCODE_MAX, MONOMER_ENCODE_MIN, MONOMER_SYMBOL,
|
|
6
6
|
RGROUP_ALTER_ID, RGROUP_FIELD, RGROUP_LABEL, SDF_MONOMER_NAME
|
|
7
7
|
} from '../const';
|
|
8
8
|
|
|
@@ -11,6 +11,33 @@ export const HELM_CORE_LIB_MONOMER_SYMBOL = 'symbol';
|
|
|
11
11
|
export const HELM_CORE_LIB_MOLFILE = 'molfile';
|
|
12
12
|
export const HELM_CORE_FIELDS = ['symbol', 'molfile', 'rgroups', 'name'];
|
|
13
13
|
|
|
14
|
+
|
|
15
|
+
export function encodeMonomers(col: DG.Column): DG.Column | null {
|
|
16
|
+
let encodeSymbol = MONOMER_ENCODE_MIN;
|
|
17
|
+
const monomerSymbolDict: { [key: string]: number }= {};
|
|
18
|
+
const units = col.tags[DG.TAGS.UNITS];
|
|
19
|
+
const sep = col.getTag('separator');
|
|
20
|
+
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, sep);
|
|
21
|
+
const encodedStringArray = [];
|
|
22
|
+
for (let i = 0; i < col.length; ++i) {
|
|
23
|
+
let encodedMonomerStr = '';
|
|
24
|
+
const monomers = splitterFunc(col.get(i));
|
|
25
|
+
monomers.forEach(m => {
|
|
26
|
+
if(!monomerSymbolDict[m]) {
|
|
27
|
+
if(encodeSymbol > MONOMER_ENCODE_MAX) {
|
|
28
|
+
grok.shell.error(`Not enougth symbols to encode monomers`);
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
monomerSymbolDict[m] = encodeSymbol;
|
|
32
|
+
encodeSymbol++;
|
|
33
|
+
}
|
|
34
|
+
encodedMonomerStr += String.fromCodePoint(monomerSymbolDict[m]);
|
|
35
|
+
})
|
|
36
|
+
encodedStringArray.push(encodedMonomerStr);
|
|
37
|
+
}
|
|
38
|
+
return DG.Column.fromStrings('encodedMolecules', encodedStringArray);
|
|
39
|
+
}
|
|
40
|
+
|
|
14
41
|
export function getMolfilesFromSeq(col: DG.Column, monomersLibObject: any[]): any[][] | null {
|
|
15
42
|
const units = col.tags[DG.TAGS.UNITS];
|
|
16
43
|
const sep = col.getTag('separator');
|
|
@@ -79,4 +106,4 @@ export function createJsonMonomerLibFromSdf(table: DG.DataFrame): any {
|
|
|
79
106
|
resultLib.push(monomer);
|
|
80
107
|
}
|
|
81
108
|
return resultLib;
|
|
82
|
-
}
|
|
109
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
<html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit
|
|
1
|
+
<html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 6fa72ec4.</title><style type="text/css">html,
|
|
2
2
|
body {
|
|
3
3
|
font-family: Arial, Helvetica, sans-serif;
|
|
4
4
|
font-size: 1rem;
|
|
@@ -229,7 +229,8 @@ header {
|
|
|
229
229
|
font-size: 1rem;
|
|
230
230
|
padding: 0 0.5rem;
|
|
231
231
|
}
|
|
232
|
-
</style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit
|
|
232
|
+
</style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 6fa72ec4.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-07-22 13:03:45</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">213.603s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">199.146s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: Test result : Bio.MSA.is_correct : TypeError: Cannot read properties of undefined (reading 'split')
|
|
233
|
+
Test result : Bio.activityCliffs.activityCliffsOpen : Error: Expected "105 cliffs", got "2362 cliffs"
|
|
233
234
|
|
|
234
235
|
at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:67:20
|
|
235
236
|
at Generator.next (<anonymous>)
|
|
@@ -310,7 +311,6 @@ Test result : Bio.detectors.samplesTestUnichemSourcesNegativeSrcUrl : OK
|
|
|
310
311
|
Test result : Bio.detectors.samplesTestUnichemSourcesNegativeBaseIdUrl : OK
|
|
311
312
|
Test result : Bio.MSA.test_table.is_not_empty : OK
|
|
312
313
|
Test result : Bio.sequenceSpace.sequenceSpaceOpens : OK
|
|
313
|
-
Test result : Bio.activityCliffs.activityCliffsOpen : OK
|
|
314
314
|
Test result : Bio.splitters.helm1 : OK
|
|
315
315
|
Test result : Bio.splitters.helm2 : OK
|
|
316
316
|
Test result : Bio.splitters.helm3-multichar : OK
|