@datagrok/bio 2.0.31 → 2.0.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +379 -139
- package/dist/package.js +326 -113
- package/files/libraries/HELMCoreLibrary.json +18218 -0
- package/package.json +3 -3
- package/src/analysis/sequence-activity-cliffs.ts +18 -0
- package/src/analysis/sequence-space.ts +18 -3
- package/src/calculations/monomerLevelMols.ts +1 -1
- package/src/package.ts +221 -20
- package/src/tests/activity-cliffs-tests.ts +21 -6
- package/src/tests/activity-cliffs-utils.ts +1 -1
- package/src/tests/similarity-diversity-tests.ts +35 -19
- package/test-Bio-3afbd4014fa1-15e7930e.html +389 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.0.
|
|
8
|
+
"version": "2.0.33",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
17
|
"@datagrok-libraries/bio": "^5.8.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.1",
|
|
19
|
-
"@datagrok-libraries/ml": "^6.2.
|
|
19
|
+
"@datagrok-libraries/ml": "^6.2.2",
|
|
20
20
|
"@datagrok-libraries/utils": "^1.14.1",
|
|
21
21
|
"cash-dom": "^8.0.0",
|
|
22
22
|
"datagrok-api": "^1.8.1",
|
|
@@ -47,7 +47,7 @@
|
|
|
47
47
|
"webpack-cli": "^4.6.0"
|
|
48
48
|
},
|
|
49
49
|
"grokDependencies": {
|
|
50
|
-
"@datagrok/chem": "1.3.
|
|
50
|
+
"@datagrok/chem": "1.3.19",
|
|
51
51
|
"@datagrok/helm": "latest"
|
|
52
52
|
},
|
|
53
53
|
"scripts": {
|
|
@@ -10,6 +10,7 @@ import {TAGS} from '../utils/constants';
|
|
|
10
10
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
11
11
|
import * as C from '../utils/constants';
|
|
12
12
|
import { GridColumn } from 'datagrok-api/dg';
|
|
13
|
+
import { invalidateMols, MONOMERIC_COL_TAGS } from '../substructure-search/substructure-search';
|
|
13
14
|
|
|
14
15
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
15
16
|
const stringArray = col.toList();
|
|
@@ -39,6 +40,23 @@ export async function getSimilaritiesMarix(dim: number, seqCol: DG.Column, df: D
|
|
|
39
40
|
return simArr;
|
|
40
41
|
}
|
|
41
42
|
|
|
43
|
+
export async function getChemSimilaritiesMarix(dim: number, seqCol: DG.Column,
|
|
44
|
+
df: DG.DataFrame, colName: string, simArr: DG.Column[])
|
|
45
|
+
: Promise<DG.Column[]> {
|
|
46
|
+
if (seqCol.version !== seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
47
|
+
await invalidateMols(seqCol, false);
|
|
48
|
+
const fpDf = DG.DataFrame.create(seqCol.length);
|
|
49
|
+
fpDf.columns.addNewString(colName).init((i) => seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS].get(i));
|
|
50
|
+
const res = await grok.functions.call('Chem:getChemSimilaritiesMatrix', {
|
|
51
|
+
dim: dim,
|
|
52
|
+
col: seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
53
|
+
df: fpDf,
|
|
54
|
+
colName: colName,
|
|
55
|
+
simArr: simArr
|
|
56
|
+
});
|
|
57
|
+
return res;
|
|
58
|
+
}
|
|
59
|
+
|
|
42
60
|
export function createTooltipElement(params: ITooltipAndPanelParams): HTMLDivElement {
|
|
43
61
|
const tooltipElement = ui.divH([]);
|
|
44
62
|
const columnNames = ui.divV([
|
|
@@ -5,6 +5,8 @@ import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-m
|
|
|
5
5
|
import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
6
|
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
7
7
|
import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
8
|
+
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
9
|
+
import * as grok from 'datagrok-api/grok';
|
|
8
10
|
|
|
9
11
|
export interface ISequenceSpaceResult {
|
|
10
12
|
distance: Matrix;
|
|
@@ -12,9 +14,8 @@ export interface ISequenceSpaceResult {
|
|
|
12
14
|
}
|
|
13
15
|
|
|
14
16
|
export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
/* let preparedData: any;
|
|
17
|
+
// code deprecated since seqCol is encoded
|
|
18
|
+
/* let preparedData: any;
|
|
18
19
|
if (!(spaceParams.seqCol!.tags[DG.TAGS.UNITS] === 'HELM')) {
|
|
19
20
|
const sep = spaceParams.seqCol.getTag(UnitsHandler.TAGS.separator);
|
|
20
21
|
const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
|
|
@@ -37,6 +38,20 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
|
|
|
37
38
|
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
38
39
|
}
|
|
39
40
|
|
|
41
|
+
export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
42
|
+
if (spaceParams.seqCol.version !== spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
43
|
+
await invalidateMols(spaceParams.seqCol, false);
|
|
44
|
+
|
|
45
|
+
const result = await grok.functions.call('Chem:getChemSpaceEmbeddings', {
|
|
46
|
+
col: spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
47
|
+
methodName: spaceParams.methodName,
|
|
48
|
+
similarityMetric: spaceParams.similarityMetric,
|
|
49
|
+
xAxis: spaceParams.embedAxesNames[0],
|
|
50
|
+
yAxis: spaceParams.embedAxesNames[1]
|
|
51
|
+
});
|
|
52
|
+
return result;
|
|
53
|
+
}
|
|
54
|
+
|
|
40
55
|
|
|
41
56
|
export function getEmbeddingColsNames(df: DG.DataFrame) {
|
|
42
57
|
const axes = ['Embed_X', 'Embed_Y'];
|
|
@@ -46,7 +46,7 @@ function molV3000FromNonHelmSequence(
|
|
|
46
46
|
M V30 BEGIN CTAB
|
|
47
47
|
`;
|
|
48
48
|
|
|
49
|
-
molV3000 += `M V30 COUNTS ${monomers.length} ${monomers.length - 1} 0 0 0\n`;
|
|
49
|
+
molV3000 += `M V30 COUNTS ${monomers.length} ${monomers.length ? monomers.length - 1 : 0} 0 0 0\n`;
|
|
50
50
|
molV3000 += 'M V30 BEGIN ATOM\n';
|
|
51
51
|
|
|
52
52
|
for (let atomRowI = 0; atomRowI < monomers.length; atomRowI++) {
|
package/src/package.ts
CHANGED
|
@@ -10,16 +10,16 @@ import {MacromoleculeDifferenceCellRenderer, MonomerCellRenderer} from './utils/
|
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
12
12
|
import {SequenceAlignment, Aligned} from './seq_align';
|
|
13
|
-
import {getEmbeddingColsNames, sequenceSpace} from './analysis/sequence-space';
|
|
13
|
+
import {getEmbeddingColsNames, sequenceSpace, sequenceSpaceByFingerprints} from './analysis/sequence-space';
|
|
14
14
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
15
|
-
import {createLinesGrid, createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
|
|
15
|
+
import {createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMarix, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
|
|
16
16
|
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq} from '@datagrok-libraries/bio/src/utils/monomer-utils';
|
|
17
17
|
import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
|
|
18
18
|
import {getMacroMol} from './utils/atomic-works';
|
|
19
19
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
20
20
|
import {convert} from './utils/convert';
|
|
21
21
|
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
22
|
-
import {TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
22
|
+
import {MonomerFreqs, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
23
23
|
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule'
|
|
24
24
|
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/utils/to-atomic-level';
|
|
25
25
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
@@ -39,9 +39,209 @@ import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
|
39
39
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
40
40
|
import { getMonomericMols } from './calculations/monomerLevelMols';
|
|
41
41
|
import { delay } from '@datagrok-libraries/utils/src/test';
|
|
42
|
+
import {Observable, Subject} from 'rxjs';
|
|
43
|
+
|
|
44
|
+
const STORAGE_NAME = 'Libraries';
|
|
45
|
+
const LIB_PATH = 'libraries/';
|
|
46
|
+
const expectedMonomerData = ['symbol', 'name', 'molfile', 'rgroups', 'polymerType', 'monomerType'];
|
|
47
|
+
|
|
48
|
+
let monomerLib: IMonomerLib | null = null;
|
|
49
|
+
export let hydrophobPalette: SeqPaletteCustom | null = null;
|
|
50
|
+
|
|
51
|
+
class MonomerLib implements IMonomerLib {
|
|
52
|
+
private _monomers: { [type: string]: { [name: string]: Monomer } } = {};
|
|
53
|
+
private _onChanged = new Subject<any>();
|
|
54
|
+
|
|
55
|
+
getMonomer(monomerType: string, monomerName: string): Monomer | null {
|
|
56
|
+
if (monomerType in this._monomers! && monomerName in this._monomers![monomerType])
|
|
57
|
+
return this._monomers![monomerType][monomerName];
|
|
58
|
+
else
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
getTypes(): string[] {
|
|
63
|
+
return Object.keys(this._monomers);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
getMonomersByType(type: string): {[symbol: string]: string} {
|
|
67
|
+
let res: {[symbol: string]: string} = {};
|
|
68
|
+
|
|
69
|
+
Object.keys(this._monomers[type]).forEach(monomerSymbol => {
|
|
70
|
+
res[monomerSymbol] = this._monomers[type][monomerSymbol].molfile;
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
return res;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
get onChanged(): Observable<any> {
|
|
77
|
+
return this._onChanged;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
public update(monomers: { [type: string]: { [name: string]: Monomer } }): void {
|
|
81
|
+
Object.keys(monomers).forEach(type => {
|
|
82
|
+
//could possibly rewrite -> TODO: check duplicated monomer symbol
|
|
83
|
+
|
|
84
|
+
if (!this.getTypes().includes(type))
|
|
85
|
+
this._monomers![type] = {};
|
|
86
|
+
|
|
87
|
+
Object.keys(monomers[type]).forEach(monomerName =>{
|
|
88
|
+
this._monomers[type][monomerName] = monomers[type][monomerName];
|
|
89
|
+
})
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
this._onChanged.next();
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export type Monomer = {
|
|
97
|
+
symbol: string,
|
|
98
|
+
name: string,
|
|
99
|
+
molfile: string,
|
|
100
|
+
rgroups: {capGroupSmiles: string, alternateId: string, capGroupName: string, label: string }[],
|
|
101
|
+
polymerType: string,
|
|
102
|
+
monomerType: string,
|
|
103
|
+
data: {[property: string]: string}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
//expected types: HELM_AA, HELM_BASE, HELM_CHEM, HELM_LINKER, HELM_SUGAR
|
|
107
|
+
export interface IMonomerLib {
|
|
108
|
+
getMonomer(monomerType: string, monomerName: string): Monomer | null;
|
|
109
|
+
getMonomersByType(type: string): {[symbol: string]: string} | null;
|
|
110
|
+
getTypes(): string[];
|
|
111
|
+
update(monomers: { [type: string]: { [name: string]: Monomer } }): void;
|
|
112
|
+
get onChanged(): Observable<any>;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export class SeqPaletteCustom implements bio.SeqPalette {
|
|
116
|
+
private readonly _palette: { [m: string]: string };
|
|
117
|
+
constructor(palette: { [m: string]: string }) {
|
|
118
|
+
this._palette = palette;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
public get(m: string): string {
|
|
122
|
+
return this._palette[m];
|
|
123
|
+
}
|
|
124
|
+
}
|
|
42
125
|
|
|
43
126
|
//tags: init
|
|
44
127
|
export async function initBio() {
|
|
128
|
+
await loadLibraries();
|
|
129
|
+
let monomers: string[] = [];
|
|
130
|
+
let logPs: number[] = [];
|
|
131
|
+
const module = await grok.functions.call('Chem:getRdKitModule');
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
const series = monomerLib!.getMonomersByType('PEPTIDE')!;
|
|
135
|
+
Object.keys(series).forEach(symbol => {
|
|
136
|
+
monomers.push(symbol);
|
|
137
|
+
const block = series[symbol].replaceAll('#R', 'O ');
|
|
138
|
+
const mol = module.get_mol(block);
|
|
139
|
+
const logP = JSON.parse(mol.get_descriptors()).CrippenClogP;
|
|
140
|
+
logPs.push(logP);
|
|
141
|
+
mol?.delete();
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
const sum = logPs.reduce((a, b) => a + b, 0);
|
|
145
|
+
const avg = (sum / logPs.length) || 0;
|
|
146
|
+
|
|
147
|
+
let palette: {[monomer: string]: string} = {};
|
|
148
|
+
for (let i = 0; i < monomers.length; i++) {
|
|
149
|
+
palette[monomers[i]] = logPs[i] < avg ? '#4682B4' : '#DC143C';
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
hydrophobPalette = new SeqPaletteCustom(palette);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
async function loadLibraries() {
|
|
156
|
+
let uploadedLibraries: string[] = Object.values(await grok.dapi.userDataStorage.get(STORAGE_NAME, true));
|
|
157
|
+
for (let i = 0; i < 1; ++i)
|
|
158
|
+
await monomerManager(uploadedLibraries[i]);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
//name: monomerManager
|
|
162
|
+
//input: string value
|
|
163
|
+
export async function monomerManager(value: string) {
|
|
164
|
+
let data: any[] = [];
|
|
165
|
+
let file;
|
|
166
|
+
let dfSdf;
|
|
167
|
+
if (value.endsWith('.sdf')) {
|
|
168
|
+
const funcList: DG.Func[] = DG.Func.find({package: 'Chem', name: 'importSdf'});
|
|
169
|
+
console.debug(`Helm: initHelm() funcList.length = ${funcList.length}`);
|
|
170
|
+
if (funcList.length === 1) {
|
|
171
|
+
file = await _package.files.readAsBytes(`${LIB_PATH}${value}`);
|
|
172
|
+
dfSdf = await grok.functions.call('Chem:importSdf', {bytes: file});
|
|
173
|
+
data = createJsonMonomerLibFromSdf(dfSdf[0]);
|
|
174
|
+
} else {
|
|
175
|
+
grok.shell.warning('Chem package is not installed');
|
|
176
|
+
}
|
|
177
|
+
} else {
|
|
178
|
+
const file = await _package.files.readAsText(`${LIB_PATH}${value}`);
|
|
179
|
+
data = JSON.parse(file);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
if (monomerLib == null)
|
|
183
|
+
monomerLib = new MonomerLib();
|
|
184
|
+
|
|
185
|
+
let monomers: { [type: string]: { [name: string]: Monomer } } = {};
|
|
186
|
+
const types: string[] = [];
|
|
187
|
+
//group monomers by their type
|
|
188
|
+
data.forEach(monomer => {
|
|
189
|
+
let monomerAdd: Monomer = {
|
|
190
|
+
'symbol': monomer['symbol'],
|
|
191
|
+
'name': monomer['name'],
|
|
192
|
+
'molfile': monomer['molfile'],
|
|
193
|
+
'rgroups': monomer['rgroups'],
|
|
194
|
+
'polymerType': monomer['polymerType'],
|
|
195
|
+
'monomerType': monomer['monomerType'],
|
|
196
|
+
'data': {}
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
Object.keys(monomer).forEach(prop => {
|
|
200
|
+
if (!expectedMonomerData.includes(prop))
|
|
201
|
+
monomerAdd.data[prop] = monomer[prop];
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
if (!types.includes(monomer['polymerType'])) {
|
|
205
|
+
monomers[monomer['polymerType']] = {};
|
|
206
|
+
types.push(monomer['polymerType']);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
monomers[monomer['polymerType']][monomer['symbol']] = monomerAdd;
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
monomerLib!.update(monomers);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
//name: Manage Libraries
|
|
216
|
+
//tags: panel, widgets
|
|
217
|
+
//input: column helmColumn {semType: Macromolecule}
|
|
218
|
+
//output: widget result
|
|
219
|
+
export async function libraryPanel(helmColumn: DG.Column): Promise<DG.Widget> {
|
|
220
|
+
//@ts-ignore
|
|
221
|
+
let filesButton: HTMLButtonElement = ui.button('Manage', manageFiles);
|
|
222
|
+
let divInputs: HTMLDivElement = ui.div();
|
|
223
|
+
let librariesList: string[] = (await _package.files.list(`${LIB_PATH}`, false, '')).map(it => it.fileName);
|
|
224
|
+
let uploadedLibraries: string[] = Object.values(await grok.dapi.userDataStorage.get(STORAGE_NAME, true));
|
|
225
|
+
for (let i = 0; i < uploadedLibraries.length; ++i) {
|
|
226
|
+
let libraryName: string = uploadedLibraries[i];
|
|
227
|
+
divInputs.append(ui.boolInput(libraryName, true, async() => {
|
|
228
|
+
grok.dapi.userDataStorage.remove(STORAGE_NAME, libraryName, true);
|
|
229
|
+
await loadLibraries();
|
|
230
|
+
grok.shell.tv.grid.invalidate();
|
|
231
|
+
}).root);
|
|
232
|
+
}
|
|
233
|
+
let unusedLibraries: string[] = librariesList.filter(x => !uploadedLibraries.includes(x));
|
|
234
|
+
for (let i = 0; i < unusedLibraries.length; ++i) {
|
|
235
|
+
let libraryName: string = unusedLibraries[i];
|
|
236
|
+
divInputs.append(ui.boolInput(libraryName, false, () => {
|
|
237
|
+
monomerManager(libraryName);
|
|
238
|
+
grok.dapi.userDataStorage.postValue(STORAGE_NAME, libraryName, libraryName, true);
|
|
239
|
+
}).root);
|
|
240
|
+
}
|
|
241
|
+
return new DG.Widget(ui.splitV([
|
|
242
|
+
divInputs,
|
|
243
|
+
ui.divV([filesButton])
|
|
244
|
+
]));
|
|
45
245
|
}
|
|
46
246
|
|
|
47
247
|
//name: fastaSequenceCellRenderer
|
|
@@ -168,9 +368,6 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
168
368
|
similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
|
|
169
369
|
if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
|
|
170
370
|
return;
|
|
171
|
-
const encodedCol = encodeMonomers(macroMolecule);
|
|
172
|
-
if (!encodedCol)
|
|
173
|
-
return;
|
|
174
371
|
const axesNames = getEmbeddingColsNames(df);
|
|
175
372
|
const options = {
|
|
176
373
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
@@ -184,17 +381,17 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
184
381
|
const sp = await getActivityCliffs(
|
|
185
382
|
df,
|
|
186
383
|
macroMolecule,
|
|
187
|
-
|
|
384
|
+
null,
|
|
188
385
|
axesNames,
|
|
189
386
|
'Activity cliffs',
|
|
190
387
|
activities,
|
|
191
388
|
similarity,
|
|
192
|
-
'
|
|
389
|
+
'Tanimoto',
|
|
193
390
|
methodName,
|
|
194
391
|
DG.SEMTYPE.MACROMOLECULE,
|
|
195
392
|
tags,
|
|
196
|
-
|
|
197
|
-
|
|
393
|
+
sequenceSpaceByFingerprints,
|
|
394
|
+
getChemSimilaritiesMarix,
|
|
198
395
|
createTooltipElement,
|
|
199
396
|
createPropPanelElement,
|
|
200
397
|
createLinesGrid,
|
|
@@ -216,26 +413,30 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
216
413
|
if (!checkInputColumnUi(macroMolecule, 'Sequence space'))
|
|
217
414
|
return;
|
|
218
415
|
|
|
219
|
-
if (macroMolecule.version !== macroMolecule.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
220
|
-
await invalidateMols(macroMolecule, false);
|
|
221
416
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
417
|
+
const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
|
|
418
|
+
const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, macroMolecule);
|
|
222
419
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
col: macroMolecule.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
420
|
+
const chemSpaceParams = {
|
|
421
|
+
seqCol: withoutEmptyValues.col(macroMolecule.name)!,
|
|
226
422
|
methodName: methodName,
|
|
227
423
|
similarityMetric: similarityMetric,
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
424
|
+
embedAxesNames: embedColsNames
|
|
425
|
+
};
|
|
426
|
+
const sequenceSpaceRes = await sequenceSpaceByFingerprints(chemSpaceParams);
|
|
427
|
+
const embeddings = sequenceSpaceRes.coordinates;
|
|
428
|
+
for (const col of embeddings) {
|
|
429
|
+
const listValues = col.toList();
|
|
430
|
+
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
431
|
+
table.columns.add(DG.Column.fromList('double', col.name, listValues));
|
|
432
|
+
}
|
|
232
433
|
if (plotEmbeddings) {
|
|
233
434
|
return grok.shell
|
|
234
435
|
.tableView(table.name)
|
|
235
436
|
.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
236
437
|
};
|
|
237
438
|
|
|
238
|
-
/* const encodedCol = encodeMonomers(macroMolecule);
|
|
439
|
+
/* const encodedCol = encodeMonomers(macroMolecule);
|
|
239
440
|
if (!encodedCol)
|
|
240
441
|
return;
|
|
241
442
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
@@ -13,19 +13,34 @@ category('activityCliffs', async () => {
|
|
|
13
13
|
let actCliffsTableViewWithEmptyRows: DG.TableView;
|
|
14
14
|
let actCliffsDfWithEmptyRows: DG.DataFrame;
|
|
15
15
|
|
|
16
|
+
let viewList: DG.ViewBase[] = [];
|
|
17
|
+
let dfList: DG.DataFrame[] = [];
|
|
18
|
+
|
|
19
|
+
before(async () => {
|
|
20
|
+
viewList = [];
|
|
21
|
+
dfList = [];
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
after(async () => {
|
|
25
|
+
for (const view of viewList) view.close();
|
|
26
|
+
for (const df of dfList) grok.shell.closeTable(df);
|
|
27
|
+
});
|
|
28
|
+
|
|
16
29
|
test('activityCliffsOpens', async () => {
|
|
17
30
|
actCliffsDf = await readDataframe('tests/sample_MSA_data.csv');
|
|
31
|
+
dfList.push(actCliffsDf);
|
|
18
32
|
actCliffsTableView = grok.shell.addTableView(actCliffsDf);
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
33
|
+
viewList.push(actCliffsTableView);
|
|
34
|
+
|
|
35
|
+
await _testActivityCliffsOpen(actCliffsDf, 57, 'UMAP', 'MSA');
|
|
22
36
|
});
|
|
23
37
|
|
|
24
38
|
test('activityCliffsWithEmptyRows', async () => {
|
|
25
39
|
actCliffsDfWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
|
|
40
|
+
dfList.push(actCliffsDfWithEmptyRows);
|
|
26
41
|
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
42
|
+
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
43
|
+
|
|
44
|
+
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 57, 'UMAP', 'MSA');
|
|
30
45
|
});
|
|
31
46
|
});
|
|
@@ -4,7 +4,23 @@ import {createTableView, readDataframe} from './utils';
|
|
|
4
4
|
import * as grok from 'datagrok-api/grok';
|
|
5
5
|
import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
|
|
6
6
|
|
|
7
|
+
let viewList: DG.ViewBase[];
|
|
8
|
+
let dfList: DG.DataFrame[];
|
|
9
|
+
|
|
10
|
+
|
|
7
11
|
category('similarity/diversity', async () => {
|
|
12
|
+
|
|
13
|
+
before(async () => {
|
|
14
|
+
viewList = [];
|
|
15
|
+
dfList = [];
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
after(async () => {
|
|
19
|
+
for (const view of viewList) view.close();
|
|
20
|
+
for (const df of dfList) grok.shell.closeTable(df);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
|
|
8
24
|
test('similaritySearchViewer', async () => {
|
|
9
25
|
await _testSimilaritySearchViewer();
|
|
10
26
|
});
|
|
@@ -17,27 +33,27 @@ async function _testSimilaritySearchViewer() {
|
|
|
17
33
|
const molecules = await createTableView('tests/sample_MSA_data.csv');
|
|
18
34
|
const viewer = molecules.addViewer('SequenceSimilaritySearchViewer');
|
|
19
35
|
await delay(100);
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
expect(
|
|
26
|
-
expect(
|
|
27
|
-
expect(
|
|
36
|
+
const similaritySearchViewer = getSearchViewer(viewer, 'SequenceSimilaritySearchViewer');
|
|
37
|
+
viewList.push(similaritySearchViewer);
|
|
38
|
+
viewList.push(molecules);
|
|
39
|
+
if (!similaritySearchViewer.molCol)
|
|
40
|
+
await waitForCompute(similaritySearchViewer);
|
|
41
|
+
expect(similaritySearchViewer.fingerprint, 'Morgan');
|
|
42
|
+
expect(similaritySearchViewer.distanceMetric, 'Tanimoto');
|
|
43
|
+
expect(similaritySearchViewer.scores!.get(0), DG.FLOAT_NULL);
|
|
44
|
+
expect(similaritySearchViewer.idxs!.get(0), 0);
|
|
45
|
+
expect(similaritySearchViewer.molCol!.get(0),
|
|
28
46
|
'D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
|
|
29
|
-
expect(
|
|
30
|
-
expect(
|
|
31
|
-
expect(
|
|
47
|
+
expect(similaritySearchViewer.scores!.get(1), 0.4722222089767456);
|
|
48
|
+
expect(similaritySearchViewer.idxs!.get(1), 11);
|
|
49
|
+
expect(similaritySearchViewer.molCol!.get(1),
|
|
32
50
|
'meI/hHis//Aca/meM/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
|
|
51
|
+
const waiter = waitForCompute(similaritySearchViewer); /* subscribe for computeCompleted event before start compute */
|
|
33
52
|
molecules.dataFrame.currentRowIdx = 1;
|
|
34
|
-
await
|
|
35
|
-
|
|
36
|
-
expect(
|
|
37
|
-
expect(similaritySearchviewer.molCol!.get(0),
|
|
53
|
+
await waiter;
|
|
54
|
+
expect(similaritySearchViewer.targetMoleculeIdx, 1);
|
|
55
|
+
expect(similaritySearchViewer.molCol!.get(0),
|
|
38
56
|
'meI/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Phe_ab-dehydro/N/D-Orn/D-aThr//Phe_4Me');
|
|
39
|
-
similaritySearchviewer.close();
|
|
40
|
-
molecules.close();
|
|
41
57
|
}
|
|
42
58
|
|
|
43
59
|
|
|
@@ -46,14 +62,14 @@ async function _testDiversitySearchViewer() {
|
|
|
46
62
|
const viewer = molecules.addViewer('SequenceDiversitySearchViewer');
|
|
47
63
|
await delay(10);
|
|
48
64
|
const diversitySearchviewer = getSearchViewer(viewer, 'SequenceDiversitySearchViewer');
|
|
65
|
+
viewList.push(diversitySearchviewer);
|
|
66
|
+
viewList.push(molecules);
|
|
49
67
|
if (!diversitySearchviewer.renderMolIds)
|
|
50
68
|
await waitForCompute(diversitySearchviewer);
|
|
51
69
|
expect(diversitySearchviewer.fingerprint, 'Morgan');
|
|
52
70
|
expect(diversitySearchviewer.distanceMetric, 'Tanimoto');
|
|
53
71
|
expect(diversitySearchviewer.initialized, true);
|
|
54
72
|
expect(diversitySearchviewer.renderMolIds.length > 0, true);
|
|
55
|
-
diversitySearchviewer.close();
|
|
56
|
-
molecules.close();
|
|
57
73
|
}
|
|
58
74
|
|
|
59
75
|
function getSearchViewer(viewer: DG.Viewer, name: string) {
|