@datagrok/bio 2.10.29 → 2.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.10.29",
8
+ "version": "2.11.1",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,9 +34,9 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.38.13",
37
+ "@datagrok-libraries/bio": "^5.39.0",
38
38
  "@datagrok-libraries/chem-meta": "^1.0.1",
39
- "@datagrok-libraries/ml": "^6.3.50",
39
+ "@datagrok-libraries/ml": "^6.3.51",
40
40
  "@datagrok-libraries/tutorials": "^1.3.6",
41
41
  "@datagrok-libraries/utils": "^4.0.17",
42
42
  "cash-dom": "^8.0.0",
@@ -4,7 +4,7 @@ import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import wu from 'wu';
6
6
 
7
- import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
+ import {IWebLogoViewer, WebLogoProps} from '@datagrok-libraries/bio/src/viewers/web-logo';
8
8
 
9
9
  import {PROPS as wlPROPS} from '../viewers/web-logo-viewer';
10
10
 
@@ -14,7 +14,11 @@ export class WebLogoApp {
14
14
  df: DG.DataFrame;
15
15
  view: DG.TableView;
16
16
 
17
- constructor(private readonly urlParams: URLSearchParams, private readonly funcName: string) {}
17
+ constructor(
18
+ private readonly urlParams: URLSearchParams,
19
+ private readonly funcName: string,
20
+ private readonly options: Partial<WebLogoProps> = {}
21
+ ) {}
18
22
 
19
23
  async init(df: DG.DataFrame): Promise<void> {
20
24
  this.df = df;
@@ -32,7 +36,7 @@ export class WebLogoApp {
32
36
  this.view = grok.shell.addTableView(this.df);
33
37
  this.view.path = this.view.basePath = `func/${_package.name}.${this.funcName}?${urlParamsTxt}`;
34
38
 
35
- const options: { [p: string]: any } = {sequenceColumnName: 'sequence'};
39
+ const options: { [p: string]: any } = {...this.options, ...{sequenceColumnName: 'sequence'}};
36
40
  for (const [optName, optValue] of this.urlParams.entries()) {
37
41
  switch (optName) {
38
42
  // boolean
package/src/package.ts CHANGED
@@ -3,36 +3,52 @@ import * as grok from 'datagrok-api/grok';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
  import * as DG from 'datagrok-api/dg';
5
5
 
6
+
7
+ import {delay} from '@datagrok-libraries/utils/src/test';
8
+ import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
9
+ import {Options} from '@datagrok-libraries/utils/src/type-declarations';
10
+ import {RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
11
+ import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
12
+ import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
13
+ import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
14
+ import {
15
+ ISequenceSpaceParams, getActivityCliffs, SequenceSpaceFunc
16
+ } from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
17
+ import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
18
+ import {BitArrayMetrics, BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
19
+ import {
20
+ TAGS as bioTAGS, ALPHABET, NOTATION,
21
+ } from '@datagrok-libraries/bio/src/utils/macromolecule';
22
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
23
+ import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
24
+ import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
25
+ import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
26
+ import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
27
+ import {SCORE, calculateScores} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
28
+ import {
29
+ createJsonMonomerLibFromSdf, IMonomerLibHelper
30
+ } from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
31
+
32
+ import {getMacromoleculeColumns} from './utils/ui-utils';
6
33
  import {
7
34
  MacromoleculeDifferenceCellRenderer, MacromoleculeSequenceCellRenderer,
8
35
  } from './utils/cell-renderer';
9
36
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
10
37
  import {SequenceAlignment} from './seq_align';
11
- import {ISequenceSpaceResult, getEmbeddingColsNames, getSequenceSpace} from './analysis/sequence-space';
12
- import {ISequenceSpaceParams, getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
38
+ import {
39
+ ISequenceSpaceResult, getEmbeddingColsNames, getSequenceSpace, sequenceSpaceByFingerprints
40
+ } from './analysis/sequence-space';
13
41
  import {
14
42
  createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
15
43
  } from './analysis/sequence-activity-cliffs';
16
- import {convert} from './utils/convert';
17
- import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
18
- import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
19
- import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
20
- import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
21
-
22
44
  import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
23
45
  import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
24
46
  import {SubstructureSearchDialog} from './substructure-search/substructure-search';
47
+ import {convert} from './utils/convert';
48
+ import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
25
49
  import {saveAsFastaUI} from './utils/save-as-fasta';
26
50
  import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
27
- import {delay} from '@datagrok-libraries/utils/src/test';
28
- import {
29
- TAGS as bioTAGS, ALPHABET, NOTATION,
30
- } from '@datagrok-libraries/bio/src/utils/macromolecule';
31
- import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
32
- import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
33
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
34
51
  import {WebLogoViewer} from './viewers/web-logo-viewer';
35
- import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
36
52
  import {
37
53
  MonomerLibHelper,
38
54
  getUserLibSettings,
@@ -40,12 +56,6 @@ import {
40
56
  getLibFileNameList,
41
57
  getLibraryPanelUI
42
58
  } from './utils/monomer-lib';
43
- import {getMacromoleculeColumns} from './utils/ui-utils';
44
- import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
45
- import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
46
- import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
47
- import {SCORE, calculateScores} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
48
-
49
59
  import {demoBio01UI} from './demo/bio01-similarity-diversity';
50
60
  import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
51
61
  import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
@@ -53,14 +63,11 @@ import {demoBio03UI} from './demo/bio03-atomic-level';
53
63
  import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
54
64
  import {checkInputColumnUI} from './utils/check-input-column';
55
65
  import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
56
- import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
57
- import {BitArrayMetrics, BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
58
66
  import {WebLogoApp} from './apps/web-logo-app';
59
67
  import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
60
68
  import {splitToMonomersUI} from './utils/split-to-monomers';
61
69
  import {MonomerCellRenderer} from './utils/monomer-cell-renderer';
62
70
  import {BioPackage, BioPackageProperties} from './package-types';
63
- import {RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
64
71
  import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-widget';
65
72
  import {getCompositionAnalysisWidget} from './widgets/composition-analysis-widget';
66
73
  import {MacromoleculeColumnWidget} from './utils/macromolecule-column-widget';
@@ -71,11 +78,12 @@ import {getRegionDo} from './utils/get-region';
71
78
  import {GetRegionApp} from './apps/get-region-app';
72
79
  import {GetRegionFuncEditor} from './utils/get-region-func-editor';
73
80
  import {HelmToMolfileConverter} from './utils/helm-to-molfile';
74
- import {DIMENSIONALITY_REDUCER_TERMINATE_EVENT}
75
- from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
76
- import {Options} from '@datagrok-libraries/utils/src/type-declarations';
77
81
  import {sequenceToMolfile} from './utils/sequence-to-mol';
82
+ import {errInfo} from './utils/err-info';
83
+
78
84
  import {SHOW_SCATTERPLOT_PROGRESS} from '@datagrok-libraries/ml/src/functionEditors/seq-space-base-editor';
85
+ import {DIMENSIONALITY_REDUCER_TERMINATE_EVENT}
86
+ from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
79
87
 
80
88
  export const _package = new BioPackage();
81
89
 
@@ -404,6 +412,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
404
412
  const ncUH = UnitsHandler.getOrCreate(macroMolecule);
405
413
  let columnDistanceMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto;
406
414
  let seqCol = macroMolecule;
415
+ let sequenceSpaceFunc: SequenceSpaceFunc = sequenceSpaceByFingerprints;
407
416
  if (ncUH.isFasta() || (ncUH.isSeparator() && ncUH.alphabet && ncUH.alphabet !== ALPHABET.UN)) {
408
417
  if (ncUH.isFasta()) {
409
418
  columnDistanceMetric = ncUH.getDistanceFunctionName();
@@ -413,6 +422,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
413
422
  columnDistanceMetric = uh.getDistanceFunctionName();
414
423
  tags.units = NOTATION.FASTA;
415
424
  }
425
+ sequenceSpaceFunc = getSequenceSpace;
416
426
  }
417
427
  const runCliffs = async () => {
418
428
  const sp = await getActivityCliffs(
@@ -427,7 +437,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
427
437
  methodName,
428
438
  DG.SEMTYPE.MACROMOLECULE,
429
439
  tags,
430
- getSequenceSpace,
440
+ sequenceSpaceFunc,
431
441
  getChemSimilaritiesMatrix,
432
442
  createTooltipElement,
433
443
  createPropPanelElement,
@@ -443,20 +453,23 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
443
453
  return;
444
454
  }
445
455
 
446
- if (df.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
447
- ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
456
+ return new Promise<DG.Viewer>((resolve, reject) => {
457
+ if (df.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
458
+ ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
448
459
  Do you want to continue?`))
449
- .onOK(async () => {
450
- const progressBar = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
451
- const res = await runCliffs();
452
- progressBar.close();
453
- return res;
454
- })
455
- .show();
456
- } else {
457
- const res = await runCliffs();
458
- return res;
459
- }
460
+ .onOK(async () => {
461
+ const progressBar = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
462
+ runCliffs().then((res) => resolve(res)).catch((err) => reject(err)).finally(() => { progressBar.close();});
463
+ })
464
+ .show();
465
+ } else {
466
+ runCliffs().then((res) => resolve(res)).catch((err) => reject(err));
467
+ }
468
+ }).catch((err: any) => {
469
+ const [errMsg, errStack] = errInfo(err);
470
+ _package.logger.error(errMsg, undefined, errStack);
471
+ throw err;
472
+ });
460
473
  }
461
474
 
462
475
  //top-menu: Bio | Analyze | Sequence Space...
@@ -585,7 +598,7 @@ export async function sequenceSpaceTopMenu(
585
598
  table.columns.add(embedCol);
586
599
  }
587
600
  embedCol.init((i) => listValues[i]);
588
- //table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
601
+ //table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
589
602
  }
590
603
  if (plotEmbeddings) {
591
604
  if (!scatterPlot) {
@@ -976,6 +989,20 @@ export async function webLogoLargeApp(): Promise<void> {
976
989
  }
977
990
  }
978
991
 
992
+ //name: webLogoAggApp
993
+ export async function webLogoAggApp(): Promise<void> {
994
+ const pi = DG.TaskBarProgressIndicator.create('WebLogo ...');
995
+ try {
996
+ const urlParams = new URLSearchParams(window.location.search);
997
+ const app = new WebLogoApp(urlParams, 'webLogoAggApp');
998
+ const df: DG.DataFrame = await _package.files.readCsv('data/sample_FASTA_PT_activity.csv');
999
+ await grok.data.detectSemanticTypes(df);
1000
+ await app.init(df);
1001
+ } finally {
1002
+ pi.close();
1003
+ }
1004
+ }
1005
+
979
1006
  //name: getRegionApp
980
1007
  export async function getRegionApp(): Promise<void> {
981
1008
  const pi = DG.TaskBarProgressIndicator.create('getRegion ...');
@@ -55,7 +55,7 @@ ATC-G-TTGC--
55
55
  for (let i = 0; i < positions.length; i++) {
56
56
  expect(positions[i].name, resAllDf1[i].name);
57
57
  for (const m of positions[i].getMonomers())
58
- expect(positions[i].getFreq(m).count, resAllDf1[i].getFreq(m).count);
58
+ expect(positions[i].getFreq(m).rowCount, resAllDf1[i].getFreq(m).rowCount);
59
59
  }
60
60
  });
61
61
 
@@ -104,7 +104,7 @@ ATC-G-TTGC--
104
104
  for (let i = 0; i < positions.length; i++) {
105
105
  expect(positions[i].name, resAllDf1[i].name);
106
106
  for (const m of positions[i].getMonomers())
107
- expect(positions[i].getFreq(m).count, resAllDf1[i].getFreq(m).count);
107
+ expect(positions[i].getFreq(m).rowCount, resAllDf1[i].getFreq(m).rowCount);
108
108
  }
109
109
  });
110
110
 
@@ -204,7 +204,7 @@ function expectPositionInfo(actualPos: PI, expectedPos: PI): void {
204
204
  expectArray(actualPos.getMonomers(), expectedPos.getMonomers());
205
205
  for (const key of actualPos.getMonomers()) {
206
206
  //
207
- expect(actualPos.getFreq(key).count, expectedPos.getFreq(key).count);
207
+ expect(actualPos.getFreq(key).rowCount, expectedPos.getFreq(key).rowCount);
208
208
  }
209
209
  }
210
210
 
@@ -1,5 +1,4 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
- import * as ui from 'datagrok-api/ui';
3
2
  import * as DG from 'datagrok-api/dg';
4
3
 
5
4
  import {after, before, category, test} from '@datagrok-libraries/utils/src/test';
@@ -8,6 +7,8 @@ import {readDataframe} from './utils';
8
7
  import {_testActivityCliffsOpen} from './activity-cliffs-utils';
9
8
  import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
10
9
 
10
+ import {_package} from '../package-test';
11
+
11
12
 
12
13
  category('activityCliffs', async () => {
13
14
  let actCliffsTableView: DG.TableView;
@@ -37,7 +38,8 @@ category('activityCliffs', async () => {
37
38
  viewList.push(actCliffsTableView);
38
39
  const cliffsNum = DG.Test.isInBenchmark ? 6 : 3;
39
40
 
40
- await _testActivityCliffsOpen(actCliffsDf, cliffsNum, DimReductionMethods.UMAP, 'sequence');
41
+ await _testActivityCliffsOpen(actCliffsDf, DimReductionMethods.UMAP,
42
+ 'sequence', 'Activity', 90, cliffsNum);
41
43
  });
42
44
 
43
45
  test('activityCliffsWithEmptyRows', async () => {
@@ -46,6 +48,15 @@ category('activityCliffs', async () => {
46
48
  actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
47
49
  viewList.push(actCliffsTableViewWithEmptyRows);
48
50
 
49
- await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 3, DimReductionMethods.UMAP, 'sequence');
51
+ await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, DimReductionMethods.UMAP,
52
+ 'sequence', 'Activity', 90, 3);
53
+ });
54
+
55
+ test('Helm', async () => {
56
+ const df = await _package.files.readCsv('samples/sample_HELM.csv');
57
+ const view = grok.shell.addTableView(df);
58
+
59
+ await _testActivityCliffsOpen(df, DimReductionMethods.UMAP,
60
+ 'HELM', 'Activity', 90, 53);
50
61
  });
51
62
  });
@@ -5,12 +5,13 @@ import {expect} from '@datagrok-libraries/utils/src/test';
5
5
  import {activityCliffs, BYPASS_LARGE_DATA_WARNING} from '../package';
6
6
  import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
7
7
 
8
- export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: DimReductionMethods,
9
- colName: string) {
8
+ export async function _testActivityCliffsOpen(df: DG.DataFrame, drMethod: DimReductionMethods,
9
+ seqColName: string, activityColName: string, similarityThr: number, tgtNumberCliffs: number
10
+ ): Promise<void> {
10
11
  await grok.data.detectSemanticTypes(df);
11
12
  const scatterPlot = await activityCliffs(
12
- df, df.getCol(colName), df.getCol('activity'),
13
- 90, method, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
13
+ df, df.getCol(seqColName), df.getCol(activityColName),
14
+ similarityThr, drMethod, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
14
15
  // const scatterPlot = (await grok.functions.call('Bio:activityCliffs', {
15
16
  // table: df, molecules: df.getCol(colName), activities: df.getCol('Activity'),
16
17
  // similarity: 50, methodName: method
@@ -27,5 +28,5 @@ export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: nu
27
28
  const classList: string[] = el.className.split(' ');
28
29
  return ['ui-btn', 'ui-btn-ok'].every((reqClassName) => classList.includes(reqClassName));
29
30
  });
30
- expect((cliffsLink as HTMLElement).innerText.toLowerCase(), `${numberCliffs} cliffs`);
31
+ expect((cliffsLink as HTMLElement).innerText.toLowerCase(), `${tgtNumberCliffs} cliffs`);
31
32
  }
@@ -0,0 +1,29 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ export type AggValueListType = (number | null)[] | Float32Array | Int32Array;
6
+ export type AggFunc = (valueList: AggValueListType) => number | null;
7
+
8
+ export function getAgg(agg: DG.AggregationType): AggFunc {
9
+ let res: AggFunc;
10
+
11
+ function buildCol(valueList: AggValueListType): DG.Column<number> {
12
+ let resCol: DG.Column<number>;
13
+ const resColName = `agg`;
14
+ if (valueList instanceof Float32Array)
15
+ resCol = DG.Column.fromFloat32Array(resColName, valueList as Float32Array);
16
+ else if (valueList instanceof Int32Array)
17
+ resCol = DG.Column.fromInt32Array(resColName, valueList as Int32Array);
18
+ else
19
+ resCol = DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, resColName, valueList as (number | null)[]);
20
+
21
+ return resCol;
22
+ }
23
+
24
+ return (valueList: AggValueListType): number | null => {
25
+ const aggCol = buildCol(valueList);
26
+ const res = aggCol.aggregate(agg);
27
+ return res;
28
+ };
29
+ }