@datagrok/bio 2.0.30 → 2.0.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.0.30",
8
+ "version": "2.0.32",
9
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
10
10
  "repository": {
11
11
  "type": "git",
@@ -14,12 +14,12 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@biowasm/aioli": "^3.1.0",
17
- "@datagrok-libraries/bio": "^5.7.0",
17
+ "@datagrok-libraries/bio": "^5.8.0",
18
18
  "@datagrok-libraries/chem-meta": "1.0.1",
19
- "@datagrok-libraries/ml": "^6.2.1",
20
- "@datagrok-libraries/utils": "^1.11.1",
19
+ "@datagrok-libraries/ml": "^6.2.2",
20
+ "@datagrok-libraries/utils": "^1.14.1",
21
21
  "cash-dom": "^8.0.0",
22
- "datagrok-api": "^1.7.0",
22
+ "datagrok-api": "^1.8.1",
23
23
  "dayjs": "^1.11.4",
24
24
  "openchemlib": "6.0.1",
25
25
  "rxjs": "^6.5.5",
@@ -10,6 +10,7 @@ import {TAGS} from '../utils/constants';
10
10
  import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
11
11
  import * as C from '../utils/constants';
12
12
  import { GridColumn } from 'datagrok-api/dg';
13
+ import { invalidateMols, MONOMERIC_COL_TAGS } from '../substructure-search/substructure-search';
13
14
 
14
15
  export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
15
16
  const stringArray = col.toList();
@@ -39,6 +40,23 @@ export async function getSimilaritiesMarix(dim: number, seqCol: DG.Column, df: D
39
40
  return simArr;
40
41
  }
41
42
 
43
+ export async function getChemSimilaritiesMarix(dim: number, seqCol: DG.Column,
44
+ df: DG.DataFrame, colName: string, simArr: DG.Column[])
45
+ : Promise<DG.Column[]> {
46
+ if (seqCol.version !== seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
47
+ await invalidateMols(seqCol, false);
48
+ const fpDf = DG.DataFrame.create(seqCol.length);
49
+ fpDf.columns.addNewString(colName).init((i) => seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS].get(i));
50
+ const res = await grok.functions.call('Chem:getChemSimilaritiesMatrix', {
51
+ dim: dim,
52
+ col: seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
53
+ df: fpDf,
54
+ colName: colName,
55
+ simArr: simArr
56
+ });
57
+ return res;
58
+ }
59
+
42
60
  export function createTooltipElement(params: ITooltipAndPanelParams): HTMLDivElement {
43
61
  const tooltipElement = ui.divH([]);
44
62
  const columnNames = ui.divV([
@@ -5,6 +5,8 @@ import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-m
5
5
  import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
6
6
  import BitArray from '@datagrok-libraries/utils/src/bit-array';
7
7
  import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
8
+ import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
9
+ import * as grok from 'datagrok-api/grok';
8
10
 
9
11
  export interface ISequenceSpaceResult {
10
12
  distance: Matrix;
@@ -12,9 +14,8 @@ export interface ISequenceSpaceResult {
12
14
  }
13
15
 
14
16
  export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
15
-
16
- // code deprecated since seqCol is encoded
17
- /* let preparedData: any;
17
+ // code deprecated since seqCol is encoded
18
+ /* let preparedData: any;
18
19
  if (!(spaceParams.seqCol!.tags[DG.TAGS.UNITS] === 'HELM')) {
19
20
  const sep = spaceParams.seqCol.getTag(UnitsHandler.TAGS.separator);
20
21
  const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
@@ -37,6 +38,20 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
37
38
  return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
38
39
  }
39
40
 
41
+ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
42
+ if (spaceParams.seqCol.version !== spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
43
+ await invalidateMols(spaceParams.seqCol, false);
44
+
45
+ const result = await grok.functions.call('Chem:getChemSpaceEmbeddings', {
46
+ col: spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
47
+ methodName: spaceParams.methodName,
48
+ similarityMetric: spaceParams.similarityMetric,
49
+ xAxis: spaceParams.embedAxesNames[0],
50
+ yAxis: spaceParams.embedAxesNames[1]
51
+ });
52
+ return result;
53
+ }
54
+
40
55
 
41
56
  export function getEmbeddingColsNames(df: DG.DataFrame) {
42
57
  const axes = ['Embed_X', 'Embed_Y'];
@@ -46,7 +46,7 @@ function molV3000FromNonHelmSequence(
46
46
  M V30 BEGIN CTAB
47
47
  `;
48
48
 
49
- molV3000 += `M V30 COUNTS ${monomers.length} ${monomers.length - 1} 0 0 0\n`;
49
+ molV3000 += `M V30 COUNTS ${monomers.length} ${monomers.length ? monomers.length - 1 : 0} 0 0 0\n`;
50
50
  molV3000 += 'M V30 BEGIN ATOM\n';
51
51
 
52
52
  for (let atomRowI = 0; atomRowI < monomers.length; atomRowI++) {
package/src/package.ts CHANGED
@@ -10,9 +10,9 @@ import {MacromoleculeDifferenceCellRenderer, MonomerCellRenderer} from './utils/
10
10
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
11
11
  import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
12
12
  import {SequenceAlignment, Aligned} from './seq_align';
13
- import {getEmbeddingColsNames, sequenceSpace} from './analysis/sequence-space';
13
+ import {getEmbeddingColsNames, sequenceSpace, sequenceSpaceByFingerprints} from './analysis/sequence-space';
14
14
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
15
- import {createLinesGrid, createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
15
+ import {createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMarix, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
16
16
  import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq} from '@datagrok-libraries/bio/src/utils/monomer-utils';
17
17
  import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
18
18
  import {getMacroMol} from './utils/atomic-works';
@@ -168,9 +168,6 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
168
168
  similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
169
169
  if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
170
170
  return;
171
- const encodedCol = encodeMonomers(macroMolecule);
172
- if (!encodedCol)
173
- return;
174
171
  const axesNames = getEmbeddingColsNames(df);
175
172
  const options = {
176
173
  'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
@@ -184,17 +181,17 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
184
181
  const sp = await getActivityCliffs(
185
182
  df,
186
183
  macroMolecule,
187
- encodedCol,
184
+ null,
188
185
  axesNames,
189
186
  'Activity cliffs',
190
187
  activities,
191
188
  similarity,
192
- 'Levenshtein',
189
+ 'Tanimoto',
193
190
  methodName,
194
191
  DG.SEMTYPE.MACROMOLECULE,
195
192
  tags,
196
- sequenceSpace,
197
- getSimilaritiesMarix,
193
+ sequenceSpaceByFingerprints,
194
+ getChemSimilaritiesMarix,
198
195
  createTooltipElement,
199
196
  createPropPanelElement,
200
197
  createLinesGrid,
@@ -216,26 +213,30 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
216
213
  if (!checkInputColumnUi(macroMolecule, 'Sequence space'))
217
214
  return;
218
215
 
219
- if (macroMolecule.version !== macroMolecule.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
220
- await invalidateMols(macroMolecule, false);
221
216
  const embedColsNames = getEmbeddingColsNames(table);
217
+ const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
218
+ const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, macroMolecule);
222
219
 
223
- await grok.functions.call('Chem:getChemSpaceEmbeddings', {
224
- table: table,
225
- col: macroMolecule.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
220
+ const chemSpaceParams = {
221
+ seqCol: withoutEmptyValues.col(macroMolecule.name)!,
226
222
  methodName: methodName,
227
223
  similarityMetric: similarityMetric,
228
- xAxis: embedColsNames[0],
229
- yAxis: embedColsNames[1]
230
- });
231
-
224
+ embedAxesNames: embedColsNames
225
+ };
226
+ const sequenceSpaceRes = await sequenceSpaceByFingerprints(chemSpaceParams);
227
+ const embeddings = sequenceSpaceRes.coordinates;
228
+ for (const col of embeddings) {
229
+ const listValues = col.toList();
230
+ emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
231
+ table.columns.add(DG.Column.fromList('double', col.name, listValues));
232
+ }
232
233
  if (plotEmbeddings) {
233
234
  return grok.shell
234
235
  .tableView(table.name)
235
236
  .scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
236
237
  };
237
238
 
238
- /* const encodedCol = encodeMonomers(macroMolecule);
239
+ /* const encodedCol = encodeMonomers(macroMolecule);
239
240
  if (!encodedCol)
240
241
  return;
241
242
  const embedColsNames = getEmbeddingColsNames(table);
@@ -16,7 +16,7 @@ category('activityCliffs', async () => {
16
16
  test('activityCliffsOpens', async () => {
17
17
  actCliffsDf = await readDataframe('tests/sample_MSA_data.csv');
18
18
  actCliffsTableView = grok.shell.addTableView(actCliffsDf);
19
- await _testActivityCliffsOpen(actCliffsDf, 36, 'UMAP', 'MSA');
19
+ await _testActivityCliffsOpen(actCliffsDf, 57, 'UMAP', 'MSA');
20
20
  grok.shell.closeTable(actCliffsDf);
21
21
  actCliffsTableView.close();
22
22
  });
@@ -24,7 +24,7 @@ category('activityCliffs', async () => {
24
24
  test('activityCliffsWithEmptyRows', async () => {
25
25
  actCliffsDfWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
26
26
  actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
27
- await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 37, 'UMAP', 'MSA');
27
+ await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 57, 'UMAP', 'MSA');
28
28
  grok.shell.closeTable(actCliffsDfWithEmptyRows);
29
29
  actCliffsTableViewWithEmptyRows.close();
30
30
  });
@@ -1,20 +1,21 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
+ import * as grok from 'datagrok-api/grok';
3
+
2
4
  import {delay, expect} from '@datagrok-libraries/utils/src/test';
3
5
  import {_package} from '../package-test';
4
- import { activityCliffs } from '../package';
5
- import * as grok from 'datagrok-api/grok';
6
+ import {activityCliffs} from '../package';
6
7
 
7
8
  export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: string, colName: string) {
8
9
  await grok.data.detectSemanticTypes(df);
9
10
  const scatterPlot = await activityCliffs(
10
- df,
11
- df.col(colName)!,
12
- df.col('Activity')!,
13
- 90,
14
- method);
11
+ df,
12
+ df.col(colName)!,
13
+ df.col('Activity')!,
14
+ 50,
15
+ method);
15
16
 
16
- expect(scatterPlot != null, true);
17
+ expect(scatterPlot != null, true);
17
18
 
18
- const cliffsLink = Array.from(scatterPlot!.root.children).filter(it => it.className === 'ui-btn ui-btn-ok');
19
- expect((cliffsLink[0] as HTMLElement).innerText, `${numberCliffs} cliffs`);
19
+ const cliffsLink = Array.from(scatterPlot!.root.children).filter(it => it.className === 'ui-btn ui-btn-ok');
20
+ expect((cliffsLink[0] as HTMLElement).innerText.toLowerCase(), `${numberCliffs} cliffs`);
20
21
  }
@@ -58,7 +58,7 @@ category('renderers', () => {
58
58
 
59
59
  test('setRendererManually', async () => {
60
60
  await _setRendererManually();
61
- });
61
+ }, {skipReason: 'GROK-11212'});
62
62
 
63
63
  async function _rendererMacromoleculeFasta() {
64
64
  const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.csv');
@@ -45,9 +45,21 @@ category('substructureFilters', async () => {
45
45
  const filter = new BioSubstructureFilter();
46
46
  await grok.data.detectSemanticTypes(helm);
47
47
  filter.attach(helm);
48
+
49
+ const helmFilterChanged = new Promise((resolve, reject) => {
50
+ helm.onFilterChanged.subscribe(async (_: any) => {
51
+ try {
52
+ resolve(true);
53
+ } catch (error) {
54
+ reject(error);
55
+ }
56
+ });
57
+ });
48
58
  (filter.bioFilter! as HelmFilter).helmSubstructure = 'PEPTIDE1{C}$$$$V2.0';
49
59
  filter.bioFilter!.onChanged.next();
50
- await delay(2000);
60
+ await helmFilterChanged;
61
+
62
+ //await delay(3000);
51
63
  expect(filter.dataFrame!.filter.trueCount, 2);
52
64
  expect(filter.dataFrame!.filter.get(0), true);
53
65
  expect(filter.dataFrame!.filter.get(3), true);