@datagrok/bio 2.0.32 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +2563 -2698
- package/dist/package.js +2501 -2663
- package/files/libraries/HELMCoreLibrary.json +18218 -0
- package/package.json +3 -3
- package/src/package.ts +152 -2
- package/src/tests/activity-cliffs-tests.ts +19 -4
- package/src/tests/similarity-diversity-tests.ts +35 -19
- package/src/widgets/representations.ts +1 -2
- package/test-Bio-3afbd4014fa1-de511bfe.html +389 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.0
|
|
8
|
+
"version": "2.1.0",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.
|
|
17
|
+
"@datagrok-libraries/bio": "^5.9.12",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.1",
|
|
19
19
|
"@datagrok-libraries/ml": "^6.2.2",
|
|
20
20
|
"@datagrok-libraries/utils": "^1.14.1",
|
|
@@ -47,7 +47,7 @@
|
|
|
47
47
|
"webpack-cli": "^4.6.0"
|
|
48
48
|
},
|
|
49
49
|
"grokDependencies": {
|
|
50
|
-
"@datagrok/chem": "1.3.
|
|
50
|
+
"@datagrok/chem": "1.3.19",
|
|
51
51
|
"@datagrok/helm": "latest"
|
|
52
52
|
},
|
|
53
53
|
"scripts": {
|
package/src/package.ts
CHANGED
|
@@ -19,9 +19,9 @@ import {getMacroMol} from './utils/atomic-works';
|
|
|
19
19
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
20
20
|
import {convert} from './utils/convert';
|
|
21
21
|
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
22
|
-
import {TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
22
|
+
import {MonomerFreqs, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
23
23
|
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule'
|
|
24
|
-
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/
|
|
24
|
+
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
25
25
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
26
26
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
27
27
|
import {
|
|
@@ -39,9 +39,159 @@ import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
|
39
39
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
40
40
|
import { getMonomericMols } from './calculations/monomerLevelMols';
|
|
41
41
|
import { delay } from '@datagrok-libraries/utils/src/test';
|
|
42
|
+
import {Observable, Subject} from 'rxjs';
|
|
43
|
+
|
|
44
|
+
const STORAGE_NAME = 'Libraries';
|
|
45
|
+
const LIB_PATH = 'libraries/';
|
|
46
|
+
const expectedMonomerData = ['symbol', 'name', 'molfile', 'rgroups', 'polymerType', 'monomerType'];
|
|
47
|
+
|
|
48
|
+
let monomerLib: bio.IMonomerLib | null = null;
|
|
49
|
+
export let hydrophobPalette: SeqPaletteCustom | null = null;
|
|
50
|
+
|
|
51
|
+
export class SeqPaletteCustom implements bio.SeqPalette {
|
|
52
|
+
private readonly _palette: { [m: string]: string };
|
|
53
|
+
constructor(palette: { [m: string]: string }) {
|
|
54
|
+
this._palette = palette;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
public get(m: string): string {
|
|
58
|
+
return this._palette[m];
|
|
59
|
+
}
|
|
60
|
+
}
|
|
42
61
|
|
|
43
62
|
//tags: init
|
|
44
63
|
export async function initBio() {
|
|
64
|
+
await loadLibraries();
|
|
65
|
+
let monomers: string[] = [];
|
|
66
|
+
let logPs: number[] = [];
|
|
67
|
+
const module = await grok.functions.call('Chem:getRdKitModule');
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
const series = monomerLib!.getMonomersByType('PEPTIDE')!;
|
|
71
|
+
Object.keys(series).forEach(symbol => {
|
|
72
|
+
monomers.push(symbol);
|
|
73
|
+
const block = series[symbol].replaceAll('#R', 'O ');
|
|
74
|
+
const mol = module.get_mol(block);
|
|
75
|
+
const logP = JSON.parse(mol.get_descriptors()).CrippenClogP;
|
|
76
|
+
logPs.push(logP);
|
|
77
|
+
mol?.delete();
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
const sum = logPs.reduce((a, b) => a + b, 0);
|
|
81
|
+
const avg = (sum / logPs.length) || 0;
|
|
82
|
+
|
|
83
|
+
let palette: {[monomer: string]: string} = {};
|
|
84
|
+
for (let i = 0; i < monomers.length; i++) {
|
|
85
|
+
palette[monomers[i]] = logPs[i] < avg ? '#4682B4' : '#DC143C';
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
hydrophobPalette = new SeqPaletteCustom(palette);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async function loadLibraries() {
|
|
92
|
+
let uploadedLibraries: string[] = Object.values(await grok.dapi.userDataStorage.get(STORAGE_NAME, true));
|
|
93
|
+
for (let i = 0; i < 1; ++i)
|
|
94
|
+
await monomerManager(uploadedLibraries[i]);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
//name: monomerManager
|
|
98
|
+
//input: string value
|
|
99
|
+
export async function monomerManager(value: string) {
|
|
100
|
+
let data: any[] = [];
|
|
101
|
+
let file;
|
|
102
|
+
let dfSdf;
|
|
103
|
+
if (value.endsWith('.sdf')) {
|
|
104
|
+
const funcList: DG.Func[] = DG.Func.find({package: 'Chem', name: 'importSdf'});
|
|
105
|
+
if (funcList.length === 1) {
|
|
106
|
+
file = await _package.files.readAsBytes(`${LIB_PATH}${value}`);
|
|
107
|
+
dfSdf = await grok.functions.call('Chem:importSdf', {bytes: file});
|
|
108
|
+
data = createJsonMonomerLibFromSdf(dfSdf[0]);
|
|
109
|
+
} else {
|
|
110
|
+
grok.shell.warning('Chem package is not installed');
|
|
111
|
+
}
|
|
112
|
+
} else {
|
|
113
|
+
const file = await _package.files.readAsText(`${LIB_PATH}${value}`);
|
|
114
|
+
data = JSON.parse(file);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
let monomers: { [type: string]: { [name: string]: bio.Monomer } } = {};
|
|
118
|
+
const types: string[] = [];
|
|
119
|
+
//group monomers by their type
|
|
120
|
+
data.forEach(monomer => {
|
|
121
|
+
let monomerAdd: bio.Monomer = {
|
|
122
|
+
'symbol': monomer['symbol'],
|
|
123
|
+
'name': monomer['name'],
|
|
124
|
+
'naturalAnalog': monomer['naturalAnalog'],
|
|
125
|
+
'molfile': monomer['molfile'],
|
|
126
|
+
'rgroups': monomer['rgroups'],
|
|
127
|
+
'polymerType': monomer['polymerType'],
|
|
128
|
+
'monomerType': monomer['monomerType'],
|
|
129
|
+
'data': {}
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
Object.keys(monomer).forEach(prop => {
|
|
133
|
+
if (!expectedMonomerData.includes(prop))
|
|
134
|
+
monomerAdd.data[prop] = monomer[prop];
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
if (!types.includes(monomer['polymerType'])) {
|
|
138
|
+
monomers[monomer['polymerType']] = {};
|
|
139
|
+
types.push(monomer['polymerType']);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
monomers[monomer['polymerType']][monomer['symbol']] = monomerAdd;
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
if (monomerLib == null)
|
|
146
|
+
monomerLib = new bio.MonomerLib();
|
|
147
|
+
|
|
148
|
+
monomerLib!.update(monomers);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
//name: getBioLib
|
|
152
|
+
//output: object monomerLib
|
|
153
|
+
export function getBioLib(): bio.IMonomerLib | null {
|
|
154
|
+
return monomerLib;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
//name: manageFiles
|
|
158
|
+
export async function manageFiles() {
|
|
159
|
+
const a = ui.dialog({title: 'Manage files'})
|
|
160
|
+
//@ts-ignore
|
|
161
|
+
.add(ui.fileBrowser({path: 'System:AppData/Bio/libraries'}).root)
|
|
162
|
+
.addButton('OK', () => a.close())
|
|
163
|
+
.show();
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
//name: Manage Libraries
|
|
167
|
+
//tags: panel, widgets
|
|
168
|
+
//input: column seqColumn {semType: Macromolecule}
|
|
169
|
+
//output: widget result
|
|
170
|
+
export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
|
|
171
|
+
//@ts-ignore
|
|
172
|
+
let filesButton: HTMLButtonElement = ui.button('Manage', manageFiles);
|
|
173
|
+
let divInputs: HTMLDivElement = ui.div();
|
|
174
|
+
let librariesList: string[] = (await _package.files.list(`${LIB_PATH}`, false, '')).map(it => it.fileName);
|
|
175
|
+
let uploadedLibraries: string[] = Object.values(await grok.dapi.userDataStorage.get(STORAGE_NAME, true));
|
|
176
|
+
for (let i = 0; i < uploadedLibraries.length; ++i) {
|
|
177
|
+
let libraryName: string = uploadedLibraries[i];
|
|
178
|
+
divInputs.append(ui.boolInput(libraryName, true, async() => {
|
|
179
|
+
grok.dapi.userDataStorage.remove(STORAGE_NAME, libraryName, true);
|
|
180
|
+
await loadLibraries();
|
|
181
|
+
}).root);
|
|
182
|
+
}
|
|
183
|
+
let unusedLibraries: string[] = librariesList.filter(x => !uploadedLibraries.includes(x));
|
|
184
|
+
for (let i = 0; i < unusedLibraries.length; ++i) {
|
|
185
|
+
let libraryName: string = unusedLibraries[i];
|
|
186
|
+
divInputs.append(ui.boolInput(libraryName, false, () => {
|
|
187
|
+
monomerManager(libraryName);
|
|
188
|
+
grok.dapi.userDataStorage.postValue(STORAGE_NAME, libraryName, libraryName, true);
|
|
189
|
+
}).root);
|
|
190
|
+
}
|
|
191
|
+
return new DG.Widget(ui.splitV([
|
|
192
|
+
divInputs,
|
|
193
|
+
ui.divV([filesButton])
|
|
194
|
+
]));
|
|
45
195
|
}
|
|
46
196
|
|
|
47
197
|
//name: fastaSequenceCellRenderer
|
|
@@ -13,19 +13,34 @@ category('activityCliffs', async () => {
|
|
|
13
13
|
let actCliffsTableViewWithEmptyRows: DG.TableView;
|
|
14
14
|
let actCliffsDfWithEmptyRows: DG.DataFrame;
|
|
15
15
|
|
|
16
|
+
let viewList: DG.ViewBase[] = [];
|
|
17
|
+
let dfList: DG.DataFrame[] = [];
|
|
18
|
+
|
|
19
|
+
before(async () => {
|
|
20
|
+
viewList = [];
|
|
21
|
+
dfList = [];
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
after(async () => {
|
|
25
|
+
for (const view of viewList) view.close();
|
|
26
|
+
for (const df of dfList) grok.shell.closeTable(df);
|
|
27
|
+
});
|
|
28
|
+
|
|
16
29
|
test('activityCliffsOpens', async () => {
|
|
17
30
|
actCliffsDf = await readDataframe('tests/sample_MSA_data.csv');
|
|
31
|
+
dfList.push(actCliffsDf);
|
|
18
32
|
actCliffsTableView = grok.shell.addTableView(actCliffsDf);
|
|
33
|
+
viewList.push(actCliffsTableView);
|
|
34
|
+
|
|
19
35
|
await _testActivityCliffsOpen(actCliffsDf, 57, 'UMAP', 'MSA');
|
|
20
|
-
grok.shell.closeTable(actCliffsDf);
|
|
21
|
-
actCliffsTableView.close();
|
|
22
36
|
});
|
|
23
37
|
|
|
24
38
|
test('activityCliffsWithEmptyRows', async () => {
|
|
25
39
|
actCliffsDfWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
|
|
40
|
+
dfList.push(actCliffsDfWithEmptyRows);
|
|
26
41
|
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
42
|
+
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
43
|
+
|
|
27
44
|
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 57, 'UMAP', 'MSA');
|
|
28
|
-
grok.shell.closeTable(actCliffsDfWithEmptyRows);
|
|
29
|
-
actCliffsTableViewWithEmptyRows.close();
|
|
30
45
|
});
|
|
31
46
|
});
|
|
@@ -4,7 +4,23 @@ import {createTableView, readDataframe} from './utils';
|
|
|
4
4
|
import * as grok from 'datagrok-api/grok';
|
|
5
5
|
import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
|
|
6
6
|
|
|
7
|
+
let viewList: DG.ViewBase[];
|
|
8
|
+
let dfList: DG.DataFrame[];
|
|
9
|
+
|
|
10
|
+
|
|
7
11
|
category('similarity/diversity', async () => {
|
|
12
|
+
|
|
13
|
+
before(async () => {
|
|
14
|
+
viewList = [];
|
|
15
|
+
dfList = [];
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
after(async () => {
|
|
19
|
+
for (const view of viewList) view.close();
|
|
20
|
+
for (const df of dfList) grok.shell.closeTable(df);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
|
|
8
24
|
test('similaritySearchViewer', async () => {
|
|
9
25
|
await _testSimilaritySearchViewer();
|
|
10
26
|
});
|
|
@@ -17,27 +33,27 @@ async function _testSimilaritySearchViewer() {
|
|
|
17
33
|
const molecules = await createTableView('tests/sample_MSA_data.csv');
|
|
18
34
|
const viewer = molecules.addViewer('SequenceSimilaritySearchViewer');
|
|
19
35
|
await delay(100);
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
expect(
|
|
26
|
-
expect(
|
|
27
|
-
expect(
|
|
36
|
+
const similaritySearchViewer = getSearchViewer(viewer, 'SequenceSimilaritySearchViewer');
|
|
37
|
+
viewList.push(similaritySearchViewer);
|
|
38
|
+
viewList.push(molecules);
|
|
39
|
+
if (!similaritySearchViewer.molCol)
|
|
40
|
+
await waitForCompute(similaritySearchViewer);
|
|
41
|
+
expect(similaritySearchViewer.fingerprint, 'Morgan');
|
|
42
|
+
expect(similaritySearchViewer.distanceMetric, 'Tanimoto');
|
|
43
|
+
expect(similaritySearchViewer.scores!.get(0), DG.FLOAT_NULL);
|
|
44
|
+
expect(similaritySearchViewer.idxs!.get(0), 0);
|
|
45
|
+
expect(similaritySearchViewer.molCol!.get(0),
|
|
28
46
|
'D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
|
|
29
|
-
expect(
|
|
30
|
-
expect(
|
|
31
|
-
expect(
|
|
47
|
+
expect(similaritySearchViewer.scores!.get(1), 0.4722222089767456);
|
|
48
|
+
expect(similaritySearchViewer.idxs!.get(1), 11);
|
|
49
|
+
expect(similaritySearchViewer.molCol!.get(1),
|
|
32
50
|
'meI/hHis//Aca/meM/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
|
|
51
|
+
const waiter = waitForCompute(similaritySearchViewer); /* subscribe for computeCompleted event before start compute */
|
|
33
52
|
molecules.dataFrame.currentRowIdx = 1;
|
|
34
|
-
await
|
|
35
|
-
|
|
36
|
-
expect(
|
|
37
|
-
expect(similaritySearchviewer.molCol!.get(0),
|
|
53
|
+
await waiter;
|
|
54
|
+
expect(similaritySearchViewer.targetMoleculeIdx, 1);
|
|
55
|
+
expect(similaritySearchViewer.molCol!.get(0),
|
|
38
56
|
'meI/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Phe_ab-dehydro/N/D-Orn/D-aThr//Phe_4Me');
|
|
39
|
-
similaritySearchviewer.close();
|
|
40
|
-
molecules.close();
|
|
41
57
|
}
|
|
42
58
|
|
|
43
59
|
|
|
@@ -46,14 +62,14 @@ async function _testDiversitySearchViewer() {
|
|
|
46
62
|
const viewer = molecules.addViewer('SequenceDiversitySearchViewer');
|
|
47
63
|
await delay(10);
|
|
48
64
|
const diversitySearchviewer = getSearchViewer(viewer, 'SequenceDiversitySearchViewer');
|
|
65
|
+
viewList.push(diversitySearchviewer);
|
|
66
|
+
viewList.push(molecules);
|
|
49
67
|
if (!diversitySearchviewer.renderMolIds)
|
|
50
68
|
await waitForCompute(diversitySearchviewer);
|
|
51
69
|
expect(diversitySearchviewer.fingerprint, 'Morgan');
|
|
52
70
|
expect(diversitySearchviewer.distanceMetric, 'Tanimoto');
|
|
53
71
|
expect(diversitySearchviewer.initialized, true);
|
|
54
72
|
expect(diversitySearchviewer.renderMolIds.length > 0, true);
|
|
55
|
-
diversitySearchviewer.close();
|
|
56
|
-
molecules.close();
|
|
57
73
|
}
|
|
58
74
|
|
|
59
75
|
function getSearchViewer(viewer: DG.Viewer, name: string) {
|
|
@@ -3,7 +3,6 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
import {getMolfilesFromSingleSeq} from '@datagrok-libraries/bio/src/utils/monomer-utils';
|
|
5
5
|
import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
|
|
6
|
-
import {getMacroMol} from '@datagrok-libraries/bio/src/utils/atomic-works';
|
|
7
6
|
|
|
8
7
|
/**
|
|
9
8
|
* @export
|
|
@@ -79,7 +78,7 @@ export async function representationsWidget(macroMolecule: DG.Cell, monomersLibO
|
|
|
79
78
|
try {
|
|
80
79
|
try {
|
|
81
80
|
const atomicCodes = getMolfilesFromSingleSeq(macroMolecule, monomersLibObject);
|
|
82
|
-
const result = await getMacroMol(atomicCodes!);
|
|
81
|
+
const result = ''//await getMacroMol(atomicCodes!);
|
|
83
82
|
const molBlock2D = result[0];
|
|
84
83
|
molBlock3D = (await grok.functions.call('Bio:Embed', {molBlock2D})) as unknown as string;
|
|
85
84
|
} catch (e) {
|