@datagrok/bio 2.0.31 → 2.0.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +162 -116
- package/dist/package.js +159 -113
- package/package.json +2 -2
- package/src/analysis/sequence-activity-cliffs.ts +18 -0
- package/src/analysis/sequence-space.ts +18 -3
- package/src/calculations/monomerLevelMols.ts +1 -1
- package/src/package.ts +20 -19
- package/src/tests/activity-cliffs-tests.ts +2 -2
- package/src/tests/activity-cliffs-utils.ts +1 -1
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.0.
|
|
8
|
+
"version": "2.0.32",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
17
|
"@datagrok-libraries/bio": "^5.8.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.1",
|
|
19
|
-
"@datagrok-libraries/ml": "^6.2.
|
|
19
|
+
"@datagrok-libraries/ml": "^6.2.2",
|
|
20
20
|
"@datagrok-libraries/utils": "^1.14.1",
|
|
21
21
|
"cash-dom": "^8.0.0",
|
|
22
22
|
"datagrok-api": "^1.8.1",
|
|
@@ -10,6 +10,7 @@ import {TAGS} from '../utils/constants';
|
|
|
10
10
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
11
11
|
import * as C from '../utils/constants';
|
|
12
12
|
import { GridColumn } from 'datagrok-api/dg';
|
|
13
|
+
import { invalidateMols, MONOMERIC_COL_TAGS } from '../substructure-search/substructure-search';
|
|
13
14
|
|
|
14
15
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
15
16
|
const stringArray = col.toList();
|
|
@@ -39,6 +40,23 @@ export async function getSimilaritiesMarix(dim: number, seqCol: DG.Column, df: D
|
|
|
39
40
|
return simArr;
|
|
40
41
|
}
|
|
41
42
|
|
|
43
|
+
export async function getChemSimilaritiesMarix(dim: number, seqCol: DG.Column,
|
|
44
|
+
df: DG.DataFrame, colName: string, simArr: DG.Column[])
|
|
45
|
+
: Promise<DG.Column[]> {
|
|
46
|
+
if (seqCol.version !== seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
47
|
+
await invalidateMols(seqCol, false);
|
|
48
|
+
const fpDf = DG.DataFrame.create(seqCol.length);
|
|
49
|
+
fpDf.columns.addNewString(colName).init((i) => seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS].get(i));
|
|
50
|
+
const res = await grok.functions.call('Chem:getChemSimilaritiesMatrix', {
|
|
51
|
+
dim: dim,
|
|
52
|
+
col: seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
53
|
+
df: fpDf,
|
|
54
|
+
colName: colName,
|
|
55
|
+
simArr: simArr
|
|
56
|
+
});
|
|
57
|
+
return res;
|
|
58
|
+
}
|
|
59
|
+
|
|
42
60
|
export function createTooltipElement(params: ITooltipAndPanelParams): HTMLDivElement {
|
|
43
61
|
const tooltipElement = ui.divH([]);
|
|
44
62
|
const columnNames = ui.divV([
|
|
@@ -5,6 +5,8 @@ import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-m
|
|
|
5
5
|
import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
6
|
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
7
7
|
import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
8
|
+
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
9
|
+
import * as grok from 'datagrok-api/grok';
|
|
8
10
|
|
|
9
11
|
export interface ISequenceSpaceResult {
|
|
10
12
|
distance: Matrix;
|
|
@@ -12,9 +14,8 @@ export interface ISequenceSpaceResult {
|
|
|
12
14
|
}
|
|
13
15
|
|
|
14
16
|
export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
/* let preparedData: any;
|
|
17
|
+
// code deprecated since seqCol is encoded
|
|
18
|
+
/* let preparedData: any;
|
|
18
19
|
if (!(spaceParams.seqCol!.tags[DG.TAGS.UNITS] === 'HELM')) {
|
|
19
20
|
const sep = spaceParams.seqCol.getTag(UnitsHandler.TAGS.separator);
|
|
20
21
|
const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
|
|
@@ -37,6 +38,20 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
|
|
|
37
38
|
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
38
39
|
}
|
|
39
40
|
|
|
41
|
+
export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
42
|
+
if (spaceParams.seqCol.version !== spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
43
|
+
await invalidateMols(spaceParams.seqCol, false);
|
|
44
|
+
|
|
45
|
+
const result = await grok.functions.call('Chem:getChemSpaceEmbeddings', {
|
|
46
|
+
col: spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
47
|
+
methodName: spaceParams.methodName,
|
|
48
|
+
similarityMetric: spaceParams.similarityMetric,
|
|
49
|
+
xAxis: spaceParams.embedAxesNames[0],
|
|
50
|
+
yAxis: spaceParams.embedAxesNames[1]
|
|
51
|
+
});
|
|
52
|
+
return result;
|
|
53
|
+
}
|
|
54
|
+
|
|
40
55
|
|
|
41
56
|
export function getEmbeddingColsNames(df: DG.DataFrame) {
|
|
42
57
|
const axes = ['Embed_X', 'Embed_Y'];
|
|
@@ -46,7 +46,7 @@ function molV3000FromNonHelmSequence(
|
|
|
46
46
|
M V30 BEGIN CTAB
|
|
47
47
|
`;
|
|
48
48
|
|
|
49
|
-
molV3000 += `M V30 COUNTS ${monomers.length} ${monomers.length - 1} 0 0 0\n`;
|
|
49
|
+
molV3000 += `M V30 COUNTS ${monomers.length} ${monomers.length ? monomers.length - 1 : 0} 0 0 0\n`;
|
|
50
50
|
molV3000 += 'M V30 BEGIN ATOM\n';
|
|
51
51
|
|
|
52
52
|
for (let atomRowI = 0; atomRowI < monomers.length; atomRowI++) {
|
package/src/package.ts
CHANGED
|
@@ -10,9 +10,9 @@ import {MacromoleculeDifferenceCellRenderer, MonomerCellRenderer} from './utils/
|
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
12
12
|
import {SequenceAlignment, Aligned} from './seq_align';
|
|
13
|
-
import {getEmbeddingColsNames, sequenceSpace} from './analysis/sequence-space';
|
|
13
|
+
import {getEmbeddingColsNames, sequenceSpace, sequenceSpaceByFingerprints} from './analysis/sequence-space';
|
|
14
14
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
15
|
-
import {createLinesGrid, createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
|
|
15
|
+
import {createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMarix, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
|
|
16
16
|
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq} from '@datagrok-libraries/bio/src/utils/monomer-utils';
|
|
17
17
|
import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
|
|
18
18
|
import {getMacroMol} from './utils/atomic-works';
|
|
@@ -168,9 +168,6 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
168
168
|
similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
|
|
169
169
|
if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
|
|
170
170
|
return;
|
|
171
|
-
const encodedCol = encodeMonomers(macroMolecule);
|
|
172
|
-
if (!encodedCol)
|
|
173
|
-
return;
|
|
174
171
|
const axesNames = getEmbeddingColsNames(df);
|
|
175
172
|
const options = {
|
|
176
173
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
@@ -184,17 +181,17 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
184
181
|
const sp = await getActivityCliffs(
|
|
185
182
|
df,
|
|
186
183
|
macroMolecule,
|
|
187
|
-
|
|
184
|
+
null,
|
|
188
185
|
axesNames,
|
|
189
186
|
'Activity cliffs',
|
|
190
187
|
activities,
|
|
191
188
|
similarity,
|
|
192
|
-
'
|
|
189
|
+
'Tanimoto',
|
|
193
190
|
methodName,
|
|
194
191
|
DG.SEMTYPE.MACROMOLECULE,
|
|
195
192
|
tags,
|
|
196
|
-
|
|
197
|
-
|
|
193
|
+
sequenceSpaceByFingerprints,
|
|
194
|
+
getChemSimilaritiesMarix,
|
|
198
195
|
createTooltipElement,
|
|
199
196
|
createPropPanelElement,
|
|
200
197
|
createLinesGrid,
|
|
@@ -216,26 +213,30 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
216
213
|
if (!checkInputColumnUi(macroMolecule, 'Sequence space'))
|
|
217
214
|
return;
|
|
218
215
|
|
|
219
|
-
if (macroMolecule.version !== macroMolecule.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
220
|
-
await invalidateMols(macroMolecule, false);
|
|
221
216
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
217
|
+
const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
|
|
218
|
+
const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, macroMolecule);
|
|
222
219
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
col: macroMolecule.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
220
|
+
const chemSpaceParams = {
|
|
221
|
+
seqCol: withoutEmptyValues.col(macroMolecule.name)!,
|
|
226
222
|
methodName: methodName,
|
|
227
223
|
similarityMetric: similarityMetric,
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
224
|
+
embedAxesNames: embedColsNames
|
|
225
|
+
};
|
|
226
|
+
const sequenceSpaceRes = await sequenceSpaceByFingerprints(chemSpaceParams);
|
|
227
|
+
const embeddings = sequenceSpaceRes.coordinates;
|
|
228
|
+
for (const col of embeddings) {
|
|
229
|
+
const listValues = col.toList();
|
|
230
|
+
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
231
|
+
table.columns.add(DG.Column.fromList('double', col.name, listValues));
|
|
232
|
+
}
|
|
232
233
|
if (plotEmbeddings) {
|
|
233
234
|
return grok.shell
|
|
234
235
|
.tableView(table.name)
|
|
235
236
|
.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
236
237
|
};
|
|
237
238
|
|
|
238
|
-
/* const encodedCol = encodeMonomers(macroMolecule);
|
|
239
|
+
/* const encodedCol = encodeMonomers(macroMolecule);
|
|
239
240
|
if (!encodedCol)
|
|
240
241
|
return;
|
|
241
242
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
@@ -16,7 +16,7 @@ category('activityCliffs', async () => {
|
|
|
16
16
|
test('activityCliffsOpens', async () => {
|
|
17
17
|
actCliffsDf = await readDataframe('tests/sample_MSA_data.csv');
|
|
18
18
|
actCliffsTableView = grok.shell.addTableView(actCliffsDf);
|
|
19
|
-
await _testActivityCliffsOpen(actCliffsDf,
|
|
19
|
+
await _testActivityCliffsOpen(actCliffsDf, 57, 'UMAP', 'MSA');
|
|
20
20
|
grok.shell.closeTable(actCliffsDf);
|
|
21
21
|
actCliffsTableView.close();
|
|
22
22
|
});
|
|
@@ -24,7 +24,7 @@ category('activityCliffs', async () => {
|
|
|
24
24
|
test('activityCliffsWithEmptyRows', async () => {
|
|
25
25
|
actCliffsDfWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
|
|
26
26
|
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
27
|
-
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows,
|
|
27
|
+
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 57, 'UMAP', 'MSA');
|
|
28
28
|
grok.shell.closeTable(actCliffsDfWithEmptyRows);
|
|
29
29
|
actCliffsTableViewWithEmptyRows.close();
|
|
30
30
|
});
|