@datagrok/bio 2.4.30 → 2.4.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/.eslintrc.json +6 -8
  2. package/README.md +22 -7
  3. package/detectors.js +21 -12
  4. package/dist/1.js +2 -0
  5. package/dist/1.js.map +1 -0
  6. package/dist/18.js +2 -0
  7. package/dist/18.js.map +1 -0
  8. package/dist/190.js +2 -0
  9. package/dist/190.js.map +1 -0
  10. package/dist/452.js +2 -0
  11. package/dist/452.js.map +1 -0
  12. package/dist/729.js +2 -0
  13. package/dist/729.js.map +1 -0
  14. package/dist/package-test.js +1 -1
  15. package/dist/package-test.js.map +1 -1
  16. package/dist/package.js +1 -1
  17. package/dist/package.js.map +1 -1
  18. package/files/libraries/broken-lib.sdf +136 -0
  19. package/files/libraries/group1/mock-lib-3.json +74 -0
  20. package/files/libraries/mock-lib-2.json +48 -0
  21. package/files/tests/100_3_clustests.csv +100 -0
  22. package/files/tests/100_3_clustests_empty_vals.csv +100 -0
  23. package/files/tests/peptides_motif-with-random_10000.csv +9998 -0
  24. package/package.json +4 -4
  25. package/scripts/sequence_generator.py +185 -48
  26. package/src/analysis/sequence-activity-cliffs.ts +9 -11
  27. package/src/analysis/sequence-diversity-viewer.ts +8 -3
  28. package/src/analysis/sequence-search-base-viewer.ts +4 -3
  29. package/src/analysis/sequence-similarity-viewer.ts +13 -7
  30. package/src/analysis/sequence-space.ts +15 -12
  31. package/src/analysis/workers/mm-distance-array-service.ts +48 -0
  32. package/src/analysis/workers/mm-distance-array-worker.ts +29 -0
  33. package/src/analysis/workers/mm-distance-worker-creator.ts +6 -9
  34. package/src/apps/web-logo-app.ts +34 -0
  35. package/src/calculations/monomerLevelMols.ts +10 -12
  36. package/src/demo/bio01-similarity-diversity.ts +4 -5
  37. package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +6 -7
  38. package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +8 -8
  39. package/src/demo/bio03-atomic-level.ts +1 -4
  40. package/src/demo/bio05-helm-msa-sequence-space.ts +8 -5
  41. package/src/demo/utils.ts +4 -3
  42. package/src/package-test.ts +1 -2
  43. package/src/package.ts +138 -83
  44. package/src/seq_align.ts +482 -483
  45. package/src/substructure-search/substructure-search.ts +3 -3
  46. package/src/tests/Palettes-test.ts +1 -1
  47. package/src/tests/WebLogo-positions-test.ts +12 -35
  48. package/src/tests/_first-tests.ts +1 -1
  49. package/src/tests/activity-cliffs-tests.ts +10 -6
  50. package/src/tests/activity-cliffs-utils.ts +6 -4
  51. package/src/tests/bio-tests.ts +20 -25
  52. package/src/tests/checkInputColumn-tests.ts +5 -11
  53. package/src/tests/converters-test.ts +19 -37
  54. package/src/tests/detectors-benchmark-tests.ts +35 -37
  55. package/src/tests/detectors-tests.ts +29 -34
  56. package/src/tests/detectors-weak-and-likely-tests.ts +11 -21
  57. package/src/tests/fasta-export-tests.ts +3 -3
  58. package/src/tests/fasta-handler-test.ts +2 -3
  59. package/src/tests/lib-tests.ts +2 -4
  60. package/src/tests/mm-distance-tests.ts +25 -17
  61. package/src/tests/monomer-libraries-tests.ts +1 -1
  62. package/src/tests/msa-tests.ts +12 -9
  63. package/src/tests/pepsea-tests.ts +6 -3
  64. package/src/tests/renderers-test.ts +13 -11
  65. package/src/tests/sequence-space-test.ts +10 -7
  66. package/src/tests/sequence-space-utils.ts +7 -3
  67. package/src/tests/similarity-diversity-tests.ts +47 -61
  68. package/src/tests/splitters-test.ts +14 -20
  69. package/src/tests/to-atomic-level-tests.ts +9 -17
  70. package/src/tests/units-handler-splitted-tests.ts +106 -0
  71. package/src/tests/units-handler-tests.ts +22 -26
  72. package/src/tests/utils/sequences-generators.ts +6 -2
  73. package/src/tests/utils.ts +10 -4
  74. package/src/tests/viewers.ts +1 -1
  75. package/src/utils/atomic-works.ts +49 -57
  76. package/src/utils/cell-renderer.ts +25 -8
  77. package/src/utils/check-input-column.ts +19 -4
  78. package/src/utils/constants.ts +3 -3
  79. package/src/utils/convert.ts +56 -23
  80. package/src/utils/monomer-lib.ts +83 -64
  81. package/src/utils/multiple-sequence-alignment-ui.ts +24 -21
  82. package/src/utils/multiple-sequence-alignment.ts +2 -2
  83. package/src/utils/pepsea.ts +17 -7
  84. package/src/utils/save-as-fasta.ts +11 -4
  85. package/src/utils/ui-utils.ts +1 -1
  86. package/src/viewers/vd-regions-viewer.ts +21 -22
  87. package/src/viewers/web-logo-viewer.ts +189 -154
  88. package/src/widgets/bio-substructure-filter.ts +9 -6
  89. package/src/widgets/representations.ts +11 -12
  90. package/tsconfig.json +1 -1
  91. package/dist/258.js +0 -2
  92. package/dist/258.js.map +0 -1
  93. package/dist/562.js +0 -2
  94. package/dist/562.js.map +0 -1
  95. package/dist/705.js +0 -2
  96. package/dist/705.js.map +0 -1
  97. package/dist/925.js +0 -2
  98. package/dist/925.js.map +0 -1
  99. package/src/analysis/workers/mm-distance-worker.ts +0 -16
@@ -1,19 +1,17 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
- import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
3
2
  import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
4
3
  import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
5
4
  import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
6
- import BitArray from '@datagrok-libraries/utils/src/bit-array';
7
5
  import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
8
6
  import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
9
7
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
10
8
  import * as grok from 'datagrok-api/grok';
11
- import { NotationConverter } from '@datagrok-libraries/bio/src/utils/notation-converter';
12
- import { ALPHABET, NOTATION } from '@datagrok-libraries/bio/src/utils/macromolecule';
13
- import { MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
9
+ import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
10
+ import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
11
+ import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
14
12
 
15
13
  export interface ISequenceSpaceResult {
16
- distance: Matrix;
14
+ distance?: Float32Array;
17
15
  coordinates: DG.ColumnList;
18
16
  }
19
17
 
@@ -44,7 +42,8 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
44
42
 
45
43
  export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
46
44
  if (spaceParams.seqCol.version !== spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
47
- await invalidateMols(spaceParams.seqCol, false);
45
+ //we expect only string columns here
46
+ await invalidateMols(spaceParams.seqCol as unknown as DG.Column<string>, false);
48
47
 
49
48
  const result = await grok.functions.call('Chem:getChemSpaceEmbeddings', {
50
49
  col: spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
@@ -52,7 +51,7 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
52
51
  similarityMetric: spaceParams.similarityMetric,
53
52
  xAxis: spaceParams.embedAxesNames[0],
54
53
  yAxis: spaceParams.embedAxesNames[1],
55
- options: spaceParams.options
54
+ options: spaceParams.options,
56
55
  });
57
56
  return result;
58
57
  }
@@ -65,17 +64,21 @@ export async function getSequenceSpace(spaceParams: ISequenceSpaceParams): Promi
65
64
  if (nc.isSeparator()) {
66
65
  const fastaCol = nc.convert(NOTATION.FASTA);
67
66
  seqList = fastaCol.toList();
68
- const uh = new UnitsHandler(fastaCol);
67
+ const uh = UnitsHandler.getOrCreate(fastaCol);
69
68
  distanceFName = uh.getDistanceFunctionName();
70
- }
71
- else {
69
+ } else {
72
70
  distanceFName = nc.getDistanceFunctionName();
73
71
  }
72
+ for (let i = 0; i < seqList.length; i++) {
73
+ // toList puts empty values in array and it causes downstream errors. replace with null
74
+ seqList[i] = spaceParams.seqCol.isNone(i) ? null : seqList[i];
75
+ }
74
76
  const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
75
77
  seqList,
76
78
  spaceParams.methodName,
77
79
  distanceFName,
78
- spaceParams.options);
80
+ spaceParams.options,
81
+ true);
79
82
  const cols: DG.Column[] = spaceParams.embedAxesNames.map(
80
83
  (name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
81
84
  return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
@@ -0,0 +1,48 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
5
+
6
+ export async function calculateMMDistancesArray(
7
+ macromoleculeCol: DG.Column, templateIdx: number
8
+ ): Promise<Float32Array> {
9
+ const values = macromoleculeCol.toList();
10
+ if (macromoleculeCol.semType !== DG.SEMTYPE.MACROMOLECULE)
11
+ throw new Error('Column has to be of macromolecule type');
12
+ const uh = UnitsHandler.getOrCreate(macromoleculeCol);
13
+ const fnName = uh.getDistanceFunctionName();
14
+ const threadCount = Math.min(Math.max(navigator.hardwareConcurrency - 2, 1), values.length);
15
+ const workers = new Array(threadCount).fill(null).map((_i) =>
16
+ new Worker(new URL('mm-distance-array-worker', import.meta.url)));
17
+ const res = new Float32Array(values.length);
18
+ let lmin = 0;
19
+ let lmax = Number.MIN_VALUE;
20
+ const promises = workers.map((worker, i) => {
21
+ const start = Math.floor(i * values.length / threadCount);
22
+ const end = i === workers.length - 1 ? Math.floor((i + 1) * values.length / threadCount) : values.length;
23
+ return new Promise<void>((resolve, reject) => {
24
+ worker.onmessage = ({data: {error, distanceArrayData, min, max}}) => {
25
+ if (error) {
26
+ reject(error);
27
+ } else {
28
+ lmin = Math.min(lmin, min);
29
+ lmax = Math.max(lmax, max);
30
+ res.set(distanceArrayData, start);
31
+ resolve();
32
+ }
33
+ };
34
+ worker.postMessage({fnName, values, templateIdx, start, end});
35
+ });
36
+ });
37
+
38
+ try {
39
+ await Promise.all(promises);
40
+ res.forEach((value, index) => { res[index] = (value - lmin) / (lmax - lmin); });
41
+ workers.forEach((worker) => worker.terminate());
42
+ } catch (e) {
43
+ workers.forEach((worker) => worker.terminate());
44
+ throw e;
45
+ }
46
+
47
+ return res;
48
+ }
@@ -0,0 +1,29 @@
1
+ import {isNil} from '@datagrok-libraries/ml/src/distance-matrix';
2
+ import {mmDistanceFunctions} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
3
+
4
+ onmessage = (event) => {
5
+ const {fnName, values, templateIdx, start, end} = event.data;
6
+ const data: { error?: any, distanceArrayData?: Float32Array, min?: number, max?: number} = {};
7
+ try {
8
+ let lmin = 0;
9
+ let lmax = Number.MIN_VALUE;
10
+ const retVal = new Float32Array(end - start);
11
+ const distanceFn = mmDistanceFunctions[fnName as keyof typeof mmDistanceFunctions]();
12
+
13
+ for (let i = start; i < end; i++) {
14
+ const value = !isNil(values[i]) && !isNil(values[templateIdx]) ?
15
+ distanceFn(values[i], values[templateIdx]) : 1;
16
+ retVal[i - start] = value;
17
+ if (value < lmin)
18
+ lmin = value;
19
+ if (value > lmax)
20
+ lmax = value;
21
+ }
22
+ data.distanceArrayData = retVal;
23
+ data.min = lmin;
24
+ data.max = lmax;
25
+ } catch (e) {
26
+ data.error = e;
27
+ }
28
+ postMessage(data);
29
+ };
@@ -2,21 +2,18 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
5
+ import {DistanceMatrixService} from '@datagrok-libraries/ml/src/distance-matrix';
5
6
 
6
7
  export async function calcMmDistanceMatrix(column: DG.Column<any>): Promise<Float32Array> {
7
8
  const values = column.toList();
8
- const worker = new Worker(new URL('./mm-distance-worker.ts', import.meta.url));
9
9
  if (column.semType !== DG.SEMTYPE.MACROMOLECULE)
10
10
  throw new Error('Column has to be of macromolecule type');
11
- const uh = new UnitsHandler(column);
11
+ const uh = UnitsHandler.getOrCreate(column);
12
12
  const fnName = uh.getDistanceFunctionName();
13
- worker.postMessage({values, fnName});
14
- return new Promise((resolve, reject) => {
15
- worker.onmessage = ({data: {error, distanceMatrixData}}): void => {
16
- worker.terminate();
17
- error ? reject(error) : resolve(distanceMatrixData);
18
- };
19
- });
13
+ const distanceMatrixService = new DistanceMatrixService(true, false);
14
+ const dm = await distanceMatrixService.calc(values, fnName);
15
+ distanceMatrixService.terminate();
16
+ return dm;
20
17
  }
21
18
 
22
19
  // gets index of compressed distance matrix from 2d coordinates
@@ -0,0 +1,34 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
6
+ import {_package} from '../package';
7
+
8
+ export class WebLogoApp {
9
+ private _funcName: string = '';
10
+
11
+ df: DG.DataFrame;
12
+ view: DG.TableView;
13
+
14
+ constructor() {}
15
+
16
+ async init(df: DG.DataFrame, funcName: string): Promise<void> {
17
+ this._funcName = funcName;
18
+ this.df = df;
19
+
20
+ await this.buildView();
21
+ }
22
+
23
+ // -- View --
24
+
25
+ async buildView(): Promise<void> {
26
+ this.view = grok.shell.addTableView(this.df);
27
+ this.view.path = this.view.basePath = `func/${_package.name}.${this._funcName}`;
28
+
29
+ const viewer: DG.Viewer & IWebLogoViewer = (await this.view.dataFrame.plot.fromType('WebLogo', {
30
+ sequenceColumnName: 'sequence',
31
+ }));
32
+ this.view.dockManager.dock(viewer, DG.DOCK_TYPE.DOWN, null, 'WebLogo', 0.35);
33
+ }
34
+ }
@@ -1,35 +1,33 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
-
5
- import * as C from '../utils/constants';
6
4
  import {getHelmMonomers} from '../package';
7
- import {TAGS as bioTAGS, getSplitter, getStats} from '@datagrok-libraries/bio/src/utils/macromolecule';
5
+ import {TAGS as bioTAGS, getSplitter, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
7
 
9
8
  const V2000_ATOM_NAME_POS = 31;
10
9
 
11
- export async function getMonomericMols(mcol: DG.Column<string>,
12
- pattern: boolean = false, monomersDict?: Map<string, string>): Promise<DG.Column> {
13
- const separator: string = mcol.tags[bioTAGS.separator];
14
- const units: string = mcol.tags[DG.TAGS.UNITS];
15
- const splitter = getSplitter(units, separator);
10
+ export async function getMonomericMols(
11
+ mcol: DG.Column<string>, pattern: boolean = false, monomersDict?: Map<string, string>
12
+ ): Promise<DG.Column> {
13
+ const uh = UnitsHandler.getOrCreate(mcol);
16
14
  let molV3000Array;
17
15
  monomersDict ??= new Map();
18
- const monomers = units === 'helm' ?
19
- getHelmMonomers(mcol) : Object.keys(getStats(mcol, 0, splitter).freq).filter((it) => it !== '');
16
+ const monomers = uh.units === NOTATION.HELM ?
17
+ getHelmMonomers(mcol) : Object.keys(uh.stats.freq).filter((it) => it !== '');
20
18
 
21
19
  for (let i = 0; i < monomers.length; i++) {
22
20
  if (!monomersDict.has(monomers[i]))
23
21
  monomersDict.set(monomers[i], `${monomersDict.size + 1}`);
24
22
  }
25
23
 
26
- if (units === 'helm') {
24
+ if (uh.units === NOTATION.HELM) {
27
25
  molV3000Array = await grok.functions.call('HELM:getMolFiles', {col: mcol});
28
26
  molV3000Array = changeV2000ToV3000(molV3000Array, monomersDict, pattern);
29
27
  } else {
30
28
  molV3000Array = new Array<string>(mcol.length);
31
29
  for (let i = 0; i < mcol.length; i++) {
32
- const sequenceMonomers = splitter(mcol.get(i)!).filter((it) => it !== '');
30
+ const sequenceMonomers = uh.splitted[i].filter((it) => it !== '');
33
31
  const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
34
32
  molV3000Array[i] = molV3000;
35
33
  }
@@ -4,7 +4,6 @@ import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import {_package} from '../package';
6
6
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
7
- import {delay} from '@datagrok-libraries/utils/src/test';
8
7
  import {handleError} from './utils';
9
8
  import {SequenceDiversityViewer} from '../analysis/sequence-diversity-viewer';
10
9
  import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
@@ -36,7 +35,7 @@ export async function demoBio01UI() {
36
35
  // TODO: Fix column width
37
36
  }, {
38
37
  description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
39
- delay: 2000
38
+ delay: 2000,
40
39
  })
41
40
  .step('Find the most similar sequences to the current one', async () => {
42
41
  const simViewer = await df.plot.fromType('Sequence Similarity Search', {
@@ -46,17 +45,17 @@ export async function demoBio01UI() {
46
45
  view.dockManager.dock(simViewer, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.35);
47
46
  }, {
48
47
  description: `Add 'Sequence Similarity Search' viewer.`,
49
- delay: 2000
48
+ delay: 2000,
50
49
  })
51
50
  .step('Explore most diverse sequences in a dataset', async () => {
52
51
  const divViewer = await df.plot.fromType('Sequence Diversity Search', {
53
52
  moleculeColumnName: 'sequence',
54
- diverseColumnLabel: 'Top diverse sequences of all data'
53
+ diverseColumnLabel: 'Top diverse sequences of all data',
55
54
  }) as SequenceDiversityViewer;
56
55
  view.dockManager.dock(divViewer, DG.DOCK_TYPE.DOWN, null, 'Diversity search', 0.27);
57
56
  }, {
58
57
  description: `Add 'Sequence Deversity Search' viewer.`,
59
- delay: 2000
58
+ delay: 2000,
60
59
  })
61
60
  .step('Choose another sequence for similarity search', async () => {
62
61
  df.currentRowIdx = 3;
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
5
5
  import {_package} from '../package';
6
6
 
7
7
  import * as lev from 'fastest-levenshtein';
8
- import {DistanceMatrix} from '@datagrok-libraries/bio/src/trees/distance-matrix';
8
+ import {DistanceMatrix} from '@datagrok-libraries/ml/src/distance-matrix';
9
9
  import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree-helper';
10
10
  import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
11
11
  import {demoSequenceSpace, handleError} from './utils';
@@ -19,11 +19,9 @@ export async function demoBio01aUI() {
19
19
  let dendrogramSvc: IDendrogramService;
20
20
  let view: DG.TableView;
21
21
  let df: DG.DataFrame;
22
- let spViewer: DG.ScatterPlotViewer;
22
+ let _spViewer: DG.ScatterPlotViewer;
23
23
 
24
24
  const dimRedMethod: string = 'UMAP';
25
- const idRows: { [id: number]: number } = {};
26
- const embedCols: { [colName: string]: DG.Column<number> } = {};
27
25
 
28
26
  try {
29
27
  const demoScript = new DemoScript(
@@ -34,7 +32,7 @@ export async function demoBio01aUI() {
34
32
  [df, treeHelper, dendrogramSvc] = await Promise.all([
35
33
  _package.files.readCsv(dataFn),
36
34
  getTreeHelper(),
37
- getDendrogramService()
35
+ getDendrogramService(),
38
36
  ]);
39
37
  view = grok.shell.addTableView(df);
40
38
  view.grid.props.rowHeight = 22;
@@ -49,10 +47,10 @@ export async function demoBio01aUI() {
49
47
  delay: 2000,
50
48
  })
51
49
  .step('Build sequence space', async () => {
52
- spViewer = await demoSequenceSpace(view, df, seqColName, dimRedMethod);
50
+ _spViewer = await demoSequenceSpace(view, df, seqColName, dimRedMethod);
53
51
  }, {
54
52
  description: `Reduce sequence space dimensionality to display on 2D representation.`,
55
- delay: 2000
53
+ delay: 2000,
56
54
  })
57
55
  .step('Cluster sequences', async () => {
58
56
  const seqCol: DG.Column<string> = df.getCol(seqColName);
@@ -80,6 +78,7 @@ export async function demoBio01aUI() {
80
78
  });
81
79
  df.currentRowIdx = 27;
82
80
  }, {
81
+ // eslint-disable-next-line max-len
83
82
  description: 'Selecting a group of rows from a data frame to show their similarity and proximity to each other on a viewer..',
84
83
  delay: 2000,
85
84
  })
@@ -2,16 +2,17 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {_package, activityCliffs,} from '../package';
5
+ import {_package, activityCliffs} from '../package';
6
6
  import $ from 'cash-dom';
7
7
 
8
8
  import {TEMPS as acTEMPS} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
9
9
  import * as lev from 'fastest-levenshtein';
10
- import {DistanceMatrix} from '@datagrok-libraries/bio/src/trees/distance-matrix';
10
+ import {DistanceMatrix} from '@datagrok-libraries/ml/src/distance-matrix';
11
11
  import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree-helper';
12
12
  import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
13
13
  import {handleError} from './utils';
14
14
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
15
+ import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
15
16
 
16
17
  const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
17
18
 
@@ -23,8 +24,7 @@ export async function demoBio01bUI() {
23
24
  let view: DG.TableView;
24
25
  let activityCliffsViewer: DG.ScatterPlotViewer;
25
26
 
26
- const dimRedMethod: string = 'UMAP';
27
- const idRows: { [id: number]: number } = {};
27
+ const dimRedMethod: DimReductionMethods = DimReductionMethods.UMAP;
28
28
 
29
29
  try {
30
30
  const demoScript = new DemoScript(
@@ -38,7 +38,7 @@ export async function demoBio01bUI() {
38
38
  [df, treeHelper, dendrogramSvc] = await Promise.all([
39
39
  _package.files.readCsv(dataFn),
40
40
  getTreeHelper(),
41
- getDendrogramService()
41
+ getDendrogramService(),
42
42
  ]);
43
43
 
44
44
  view = grok.shell.addTableView(df);
@@ -62,7 +62,7 @@ export async function demoBio01bUI() {
62
62
  cliffsLink.click();
63
63
  }, {
64
64
  description: 'Reveal similar sequences with a cliff of activity.',
65
- delay: 2000
65
+ delay: 2000,
66
66
  })
67
67
  .step('Cluster sequences', async () => {
68
68
  const seqCol: DG.Column<string> = df.getCol('sequence');
@@ -79,7 +79,7 @@ export async function demoBio01bUI() {
79
79
  activityGCol.scrollIntoView();
80
80
  }, {
81
81
  description: 'Perform hierarchical clustering to reveal relationships between sequences.',
82
- delay: 2000
82
+ delay: 2000,
83
83
  })
84
84
  .step('Browse the cliff', async () => {
85
85
  //cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
@@ -99,7 +99,7 @@ export async function demoBio01bUI() {
99
99
  // }
100
100
  }, {
101
101
  description: 'Zoom in to explore selected activity cliff details.',
102
- delay: 2000
102
+ delay: 2000,
103
103
  })
104
104
  .start();
105
105
  } catch (err: any) {
@@ -1,10 +1,7 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
-
5
-
6
4
  import {_package, toAtomicLevel} from '../package';
7
- import $ from 'cash-dom';
8
5
  import {handleError} from './utils';
9
6
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
10
7
  import {delay} from '@datagrok-libraries/utils/src/test';
@@ -20,7 +17,7 @@ export async function demoBio03UI(): Promise<void> {
20
17
  try {
21
18
  await new DemoScript(
22
19
  'Atomic Level',
23
- 'Atomic level structure of Macromolecules'
20
+ 'Atomic level structure of Macromolecules',
24
21
  )
25
22
  .step(`Loading Macromolecules notation 'Helm'`, async () => {
26
23
  grok.shell.windows.showContextPanel = false;
@@ -7,8 +7,9 @@ import {handleError} from './utils';
7
7
 
8
8
  import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
9
9
  import {pepseaMethods, runPepsea} from '../utils/pepsea';
10
- import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
11
10
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
11
+ import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
12
+ import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
12
13
 
13
14
  const helmFn: string = 'samples/sample_HELM.csv';
14
15
 
@@ -22,7 +23,7 @@ export async function demoBio05UI(): Promise<void> {
22
23
 
23
24
  const helmColName: string = 'HELM';
24
25
  const msaHelmColName: string = 'msa(HELM)';
25
- const dimRedMethod: string = 'UMAP';
26
+ const dimRedMethod: DimReductionMethods = DimReductionMethods.UMAP;
26
27
 
27
28
  try {
28
29
  const demoScript = new DemoScript(
@@ -43,20 +44,21 @@ export async function demoBio05UI(): Promise<void> {
43
44
  const method: string = pepseaMethods[0];
44
45
  const gapOpen: number = 1.53;
45
46
  const gapExtend: number = 0;
46
- msaHelmCol = await runPepsea(helmCol, msaHelmColName, method, gapOpen, gapExtend, undefined);
47
+ msaHelmCol = (await runPepsea(helmCol, msaHelmColName, method, gapOpen, gapExtend, undefined))!;
47
48
  df.columns.add(msaHelmCol);
48
49
  await grok.data.detectSemanticTypes(df);
49
50
  }, {
51
+ // eslint-disable-next-line max-len
50
52
  description: 'Multiple sequence alignment (MSA) performed with PepSeA tool operating on non-natural aminoacids as well.',
51
53
  delay: 2000,
52
54
  })
53
55
  .step('Build sequence space', async () => {
54
56
  ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
55
- dimRedMethod, StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
57
+ dimRedMethod, MmDistanceFunctionsNames.LEVENSHTEIN, true)) as DG.ScatterPlotViewer;
56
58
  view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
57
59
  }, {
58
60
  description: 'Reduce sequence space dimensionality to display on 2D representation.',
59
- delay: 2000
61
+ delay: 2000,
60
62
  })
61
63
  .step('Analyse sequence composition', async () => {
62
64
  wlViewer = await df.plot.fromType('WebLogo', {
@@ -66,6 +68,7 @@ export async function demoBio05UI(): Promise<void> {
66
68
  }) as DG.Viewer & IWebLogoViewer;
67
69
  view.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.2);
68
70
  }, {
71
+ // eslint-disable-next-line max-len
69
72
  description: 'Composition analysis allows to reveal functional features of sequences like motifs, or variable loops.',
70
73
  delay: 2000,
71
74
  })
package/src/demo/utils.ts CHANGED
@@ -5,7 +5,8 @@ import * as ui from 'datagrok-api/ui';
5
5
  import {_package, sequenceSpaceTopMenu} from '../package';
6
6
  import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
7
7
  import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
8
- import {delay} from '@datagrok-libraries/utils/src/test';
8
+ import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
9
+ import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
9
10
 
10
11
  enum EMBED_COL_NAMES {
11
12
  X = 'Embed_X',
@@ -13,7 +14,7 @@ enum EMBED_COL_NAMES {
13
14
  }
14
15
 
15
16
  export async function demoSequenceSpace(
16
- view: DG.TableView, df: DG.DataFrame, colName: string, method: string
17
+ view: DG.TableView, df: DG.DataFrame, colName: string, method: string,
17
18
  ): Promise<DG.ScatterPlotViewer> {
18
19
  let resSpaceViewer: DG.ScatterPlotViewer;
19
20
  if (true) {
@@ -63,7 +64,7 @@ export async function demoSequenceSpace(
63
64
  })) as DG.ScatterPlotViewer;
64
65
  } else {
65
66
  resSpaceViewer = (await sequenceSpaceTopMenu(df, df.getCol(colName),
66
- 'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
67
+ DimReductionMethods.UMAP, MmDistanceFunctionsNames.LEVENSHTEIN, true)) as DG.ScatterPlotViewer;
67
68
  }
68
69
  view.dockManager.dock(resSpaceViewer!, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
69
70
  return resSpaceViewer;
@@ -22,10 +22,9 @@ import './tests/substructure-filters-tests';
22
22
  import './tests/pepsea-tests';
23
23
  import './tests/viewers';
24
24
  import './tests/units-handler-tests';
25
+ import './tests/units-handler-splitted-tests';
25
26
  import './tests/to-atomic-level-tests';
26
27
  import './tests/mm-distance-tests';
27
-
28
- // Tests hanging github CI
29
28
  import './tests/activity-cliffs-tests';
30
29
  import './tests/sequence-space-test';
31
30