@datagrok/bio 2.12.18 → 2.12.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Aleksandr Tanas",
6
6
  "email": "atanas@datagrok.ai"
7
7
  },
8
- "version": "2.12.18",
8
+ "version": "2.12.19",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.41.2",
37
+ "@datagrok-libraries/bio": "^5.41.5",
38
38
  "@datagrok-libraries/chem-meta": "^1.2.5",
39
39
  "@datagrok-libraries/math": "^1.1.5",
40
40
  "@datagrok-libraries/ml": "^6.6.5",
@@ -57,7 +57,7 @@
57
57
  "devDependencies": {
58
58
  "@datagrok/chem": "^1.9.2",
59
59
  "@datagrok/dendrogram": "^1.2.29",
60
- "@datagrok/helm": "^2.1.34",
60
+ "@datagrok/helm": "^2.2.1",
61
61
  "@types/node": "^17.0.24",
62
62
  "@types/wu": "latest",
63
63
  "@typescript-eslint/eslint-plugin": "latest",
@@ -23,7 +23,7 @@ import './tests/WebLogo-project-tests';
23
23
  import './tests/WebLogo-layout-tests';
24
24
  import './tests/checkInputColumn-tests';
25
25
  import './tests/similarity-diversity-tests';
26
- // import './tests/substructure-filters-tests';
26
+ import './tests/substructure-filters-tests';
27
27
  import './tests/pepsea-tests';
28
28
  import './tests/viewers';
29
29
  import './tests/seq-handler-tests';
package/src/package.ts CHANGED
@@ -11,9 +11,7 @@ import {DimReductionBaseEditor, PreprocessFunctionReturnType}
11
11
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
12
12
  import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
13
13
  import {BitArrayMetrics, KnownMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
14
- import {
15
- TAGS as bioTAGS,
16
- } from '@datagrok-libraries/bio/src/utils/macromolecule';
14
+ import {NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
17
15
  import {SeqHandler, SeqTemps} from '@datagrok-libraries/bio/src/utils/seq-handler';
18
16
  import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
19
17
  import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
@@ -51,7 +49,7 @@ import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-c
51
49
  import {demoBio03UI} from './demo/bio03-atomic-level';
52
50
  import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
53
51
  import {checkInputColumnUI} from './utils/check-input-column';
54
- import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
52
+ import {MsaWarning, multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
55
53
  import {WebLogoApp} from './apps/web-logo-app';
56
54
  import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
57
55
  import {splitToMonomersUI} from './utils/split-to-monomers';
@@ -75,6 +73,8 @@ import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimen
75
73
  import {
76
74
  ITSNEOptions, IUMAPOptions
77
75
  } from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reducer';
76
+ import {generateLongSequence, generateLongSequence2} from '@datagrok-libraries/bio/src/utils/generator';
77
+
78
78
  import {CyclizedNotationProvider} from './utils/cyclized';
79
79
  import {getMolColumnFromHelm} from './utils/helm-to-molfile/utils';
80
80
 
@@ -598,7 +598,15 @@ export async function toAtomicLevel(table: DG.DataFrame, seqCol: DG.Column, nonl
598
598
  //description: Performs multiple sequence alignment
599
599
  //tags: bio, panel
600
600
  export function multipleSequenceAlignmentDialog(): void {
601
- multipleSequenceAlignmentUI();
601
+ multipleSequenceAlignmentUI()
602
+ .catch((err: any) => {
603
+ const [errMsg, _errStack] = errInfo(err);
604
+ if (err instanceof MsaWarning) {
605
+ _package.logger.warning(errMsg);
606
+ return;
607
+ }
608
+ throw err;
609
+ });
602
610
  }
603
611
 
604
612
  //name: Multiple Sequence Alignment
@@ -946,6 +954,26 @@ export async function getRegionHelmApp(): Promise<void> {
946
954
  }
947
955
  }
948
956
 
957
+ // -- Tests long seq --
958
+
959
+ //name: longSeqTableSeparator
960
+ export function longSeqTableSeparator(): void {
961
+ const df = DG.DataFrame.fromColumns(generateLongSequence());
962
+ grok.shell.addTableView(df);
963
+ }
964
+
965
+ //name: longSeqTableFasta
966
+ export function longSeqTableFasta(): void {
967
+ const df = DG.DataFrame.fromColumns([generateLongSequence2(NOTATION.FASTA)]);
968
+ grok.shell.addTableView(df);
969
+ }
970
+
971
+ //name: longSeqTableHelm
972
+ export function longSeqTableHelm(): void {
973
+ const df = DG.DataFrame.fromColumns([generateLongSequence2(NOTATION.HELM)]);
974
+ grok.shell.addTableView(df);
975
+ }
976
+
949
977
  // -- Handle context menu --
950
978
 
951
979
  ///name: addCopyMenu
@@ -4,15 +4,36 @@ import * as ui from 'datagrok-api/ui';
4
4
 
5
5
  import wu from 'wu';
6
6
 
7
- import {category, test} from '@datagrok-libraries/utils/src/test';
8
- import {MonomerPlacer} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
7
+ import {after, before, category, expect, test} from '@datagrok-libraries/utils/src/test';
8
+ import {MonomerPlacer, hitBounds} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
9
9
  import {monomerToShort} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
10
  import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
11
- import {getMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
11
+ import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
12
+ import {
13
+ getUserLibSettings, setUserLibSettings, setUserLibSettingsForTests
14
+ } from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
15
+ import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
12
16
 
13
17
  import {_package} from '../package-test';
14
18
 
15
19
  category('renderers: monomerPlacer', () => {
20
+ let libHelper: IMonomerLibHelper;
21
+ let libSettings: UserLibSettings;
22
+
23
+ before(async () => {
24
+ libHelper = await getMonomerLibHelper();
25
+ libSettings = await getUserLibSettings();
26
+
27
+ await setUserLibSettingsForTests();
28
+ await libHelper.awaitLoaded();
29
+ await libHelper.loadLibraries(true);
30
+ });
31
+
32
+ after(async () => {
33
+ await setUserLibSettings(libSettings);
34
+ await libHelper.loadLibraries(true);
35
+ });
36
+
16
37
  const tests = {
17
38
  splitter: {
18
39
  /**
@@ -53,10 +74,10 @@ category('renderers: monomerPlacer', () => {
53
74
  {src: {row: 1, x: 5}, tgt: {pos: 0}},
54
75
  {src: {row: 1, x: 37}, tgt: {pos: 0}},
55
76
  {src: {row: 1, x: 38}, tgt: {pos: 1}},
56
- {src: {row: 1, x: 170}, tgt: {pos: 4}},
77
+ {src: {row: 1, x: 170}, tgt: {pos: 5}},
57
78
  {src: {row: 1, x: 200}, tgt: {pos: 5}},
58
79
  {src: {row: 2, x: 20}, tgt: {pos: null}}, // empty value
59
- {src: {row: 3, x: 170}, tgt: {pos: 4}},
80
+ {src: {row: 3, x: 170}, tgt: {pos: 5}},
60
81
  {src: {row: 3, x: 200}, tgt: {pos: 5}},
61
82
  {src: {row: 3, x: 297}, tgt: {pos: null}},
62
83
  ]
@@ -122,7 +143,32 @@ id3,QHIRE--LT
122
143
  }
123
144
  }
124
145
  if (errorList.length > 0)
125
- throw new Error('Test failed error(s):\n' + errorList.join(', n'));
146
+ throw new Error('Test failed error(s):\n' + errorList.join(', \n'));
147
+ });
148
+ }
149
+ });
150
+
151
+ category('renderers: monomerPlacer', () => {
152
+ const boundsTestData = {
153
+ bounds: [10, 20, 30, 40, 50, 60],
154
+ tests: {
155
+ left: {x: 3, tgt: null},
156
+ c0left: {x: 10, tgt: 0},
157
+ c0mid: {x: 12, tgt: 0},
158
+ c0right: {x: 19, tgt: 0},
159
+ c1left: {x: 20, tgt: 1},
160
+ c2right: {x: 39, tgt: 2},
161
+ c4left: {x: 50, tgt: 4},
162
+ c4right: {x: 59, tgt: 4},
163
+ max: {x: 60, tgt: null},
164
+ right: {x: 65, tgt: null},
165
+ }
166
+ };
167
+
168
+ for (const [testName, testData] of Object.entries(boundsTestData.tests)) {
169
+ test('hitBounds-' + testName, async () => {
170
+ const res = hitBounds(boundsTestData.bounds, testData.x);
171
+ expect(res, testData.tgt);
126
172
  });
127
173
  }
128
174
  });
@@ -8,10 +8,11 @@ import {fromEvent} from 'rxjs';
8
8
  import {category, expect, test, delay, testEvent} from '@datagrok-libraries/utils/src/test';
9
9
  import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
10
  import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
11
+ import {generateLongSequence, generateManySequences} from '@datagrok-libraries/bio/src/utils/generator';
11
12
 
12
13
  import {importFasta} from '../package';
13
14
  import {convertDo} from '../utils/convert';
14
- import {generateLongSequence, generateManySequences, performanceTest} from './utils/sequences-generators';
15
+ import {performanceTest} from './utils/sequences-generators';
15
16
  import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
16
17
  import {awaitGrid} from './utils';
17
18
  import * as C from '../utils/constants';
@@ -23,9 +24,6 @@ category('renderers', () => {
23
24
  await performanceTest(generateLongSequence, 'Long sequences');
24
25
  });
25
26
 
26
- test('many sequence performance', async () => {
27
- await performanceTest(generateManySequences, 'Many sequences');
28
- });
29
27
  test('many sequence performance', async () => {
30
28
  await performanceTest(generateManySequences, 'Many sequences');
31
29
  });
@@ -186,7 +186,7 @@ category('bio-substructure-filters', async () => {
186
186
  }
187
187
  await filter.awaitRendered();
188
188
  await delay(3000); //TODO: await for grid.onLookChanged
189
- }, {skipReason: 'GROK-15678'});
189
+ });
190
190
 
191
191
  // Generates unhandled exception accessing isFiltering before bioFilter created
192
192
  test('helm-view', async () => {
@@ -310,7 +310,7 @@ category('bio-substructure-filters', async () => {
310
310
  await Promise.all([f1.awaitRendered(), f2.awaitRendered()]);
311
311
  await awaitGrid(view.grid);
312
312
  await delay(3000); //TODO: await for grid.onLookChanged
313
- }, {skipReason: 'GROK-15678'});
313
+ });
314
314
 
315
315
  // two seq columns
316
316
 
@@ -7,26 +7,6 @@ import {expect} from '@datagrok-libraries/utils/src/test';
7
7
 
8
8
  import {awaitGrid} from '../utils';
9
9
 
10
-
11
- export function generateManySequences(): DG.Column[] {
12
- const columns: DG.Column[] = [];
13
- columns.push(DG.Column.fromList('string', 'MSA',
14
- new Array(10 ** 6).fill(
15
- 'meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me')),
16
- );
17
- columns.push(DG.Column.fromList('string', 'Activity', new Array(10 ** 6).fill('5.30751')));
18
- return columns;
19
- }
20
-
21
- export function generateLongSequence(): DG.Column[] {
22
- const columns: DG.Column[] = [];
23
- const longSequence =
24
- `meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr`.repeat(10 ** 5);
25
- columns.push(DG.Column.fromList('string', 'MSA', new Array(10 ** 2).fill(longSequence)));
26
- columns.push(DG.Column.fromList('string', 'Activity', new Array(10 ** 2).fill('7.30751')));
27
- return columns;
28
- }
29
-
30
10
  export async function performanceTest(generateFunc: () => DG.Column[], testName: string) {
31
11
  const columns = generateFunc();
32
12
  const df: DG.DataFrame = DG.DataFrame.fromColumns(columns);
@@ -214,7 +214,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
214
214
 
215
215
  const separator = tableCol.getTag(bioTAGS.separator) ?? '';
216
216
  const minMonWidth = seqColTemp.props.separatorWidth + 1 * seqColTemp.props.monomerCharWidth;
217
- const splitLimit = w / minMonWidth;
217
+ const splitLimit = Math.ceil(w / minMonWidth);
218
218
  const sh = SeqHandler.forColumn(tableCol);
219
219
 
220
220
  const tempReferenceSequence: string | null = tableColTemp[tempTAGS.referenceSequence];
@@ -38,6 +38,8 @@ export class MacromoleculeColumnWidget extends DG.Widget {
38
38
  fitArea: false,
39
39
  // maxHeight: 100,
40
40
  // minHeight: 25,
41
+ positionNames: '', // to ensure position names by default
42
+ endPositionName: '50', // limit WebLogo for visible monomers
41
43
  }) as unknown as WebLogoViewer;
42
44
  this.wlViewer.root.style.height = `50px`;
43
45
 
@@ -2,9 +2,10 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as DG from 'datagrok-api/dg';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
 
5
+ import {ColumnInputOptions} from '@datagrok-libraries/utils/src/type-declarations';
6
+ import {delay} from '@datagrok-libraries/utils/src/test';
5
7
  import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
8
  import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
7
- import {ColumnInputOptions} from '@datagrok-libraries/utils/src/type-declarations';
8
9
 
9
10
  import {runKalign} from './multiple-sequence-alignment';
10
11
  import {pepseaMethods, runPepsea} from './pepsea';
@@ -36,9 +37,10 @@ export async function multipleSequenceAlignmentUI(
36
37
  const table = options.col?.dataFrame ?? grok.shell.t;
37
38
  const seqCol = options.col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
38
39
  if (seqCol == null) {
39
- const errMsg = `MSAError: dataset doesn't conain any Macromolecule column`;
40
+ const errMsg: string = `Multiple sequence analysis requires a dataset with a macromolecule column.`;
40
41
  grok.shell.warning(errMsg);
41
42
  reject(new MsaWarning(errMsg));
43
+ return; // Prevents creating the MSA dialog
42
44
  }
43
45
 
44
46
  // UI for PepSea alignment
@@ -72,39 +74,50 @@ export async function multipleSequenceAlignmentUI(
72
74
 
73
75
  let performAlignment: (() => Promise<DG.Column<string> | null>) | undefined;
74
76
 
75
- //TODO: remove when the new version of datagrok-api is available
76
- //TODO: allow only macromolecule columns to be chosen
77
- const colInput = ui.columnInput('Sequence', table, seqCol, async () => {
77
+ let prevSeqCol = seqCol;
78
+ const colInput = ui.columnInput(
79
+ 'Sequence', table, seqCol,
80
+ async (valueCol: DG.Column) => {
81
+ if (!valueCol || valueCol.semType !== DG.SEMTYPE.MACROMOLECULE) {
82
+ okBtn.disabled = true;
83
+ await delay(0); // to
84
+ colInput.value = prevSeqCol as DG.Column<string>;
85
+ return;
86
+ }
87
+ prevSeqCol = valueCol;
88
+ okBtn.disabled = false;
78
89
  performAlignment = await onColInputChange(
79
90
  colInput.value, table, pepseaInputRootStyles, kalignInputRootStyles,
80
91
  methodInput, clustersColInput, gapOpenInput, gapExtendInput, terminalGapInput,
81
92
  );
82
- //@ts-ignore
83
93
  }, {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE} as ColumnInputOptions
84
94
  ) as DG.InputBase<DG.Column<string>>;
85
95
  colInput.setTooltip('Sequences column to use for alignment');
86
96
  const clustersColInput = ui.columnInput('Clusters', table, options.clustersCol);
87
97
  clustersColInput.nullable = true;
88
- colInput.fireChanged();
98
+
99
+ const dlg = ui.dialog('MSA')
100
+ .add(colInput)
101
+ .add(clustersColInput)
102
+ .add(methodInput)
103
+ .add(msaParamsDiv)
104
+ .add(msaParamsButton)
105
+ .add(kalignVersionDiv)
106
+ .onOK(async () => { await onDialogOk(colInput, table, performAlignment, resolve, reject); });
107
+ const okBtn = dlg.getButton('OK');
108
+
109
+ colInput.fireChanged(); // changes okBtn
89
110
  //if column is specified (from tests), run alignment and resolve with the result
90
111
  if (options.col) {
91
112
  performAlignment = await onColInputChange(
92
113
  options.col, table, pepseaInputRootStyles, kalignInputRootStyles,
93
114
  methodInput, clustersColInput, gapOpenInput, gapExtendInput, terminalGapInput,
94
115
  );
95
-
96
116
  await onDialogOk(colInput, table, performAlignment, resolve, reject);
97
- return;
117
+ return; // Prevents show the dialog
98
118
  }
99
- const _dlg = ui.dialog('MSA')
100
- .add(colInput)
101
- .add(clustersColInput)
102
- .add(methodInput)
103
- .add(msaParamsDiv)
104
- .add(msaParamsButton)
105
- .add(kalignVersionDiv)
106
- .onOK(async () => { await onDialogOk(colInput, table, performAlignment, resolve, reject); })
107
- .show();
119
+
120
+ dlg.show();
108
121
  });
109
122
  }
110
123
 
@@ -953,16 +953,26 @@ export class WebLogoViewer extends DG.JsViewer implements IWebLogoViewer {
953
953
 
954
954
  // region updatePositions
955
955
 
956
+ /** positionNames and positionLabel can be set up through the column's tags only */
957
+ const positionNamesTxt = this.seqCol.getTag(bioTAGS.positionNames);
958
+ const positionLabelsTxt = this.seqCol.getTag(bioTAGS.positionLabels);
959
+
960
+ // WebLogo in the column tooltip / widget is limited, speed up for long sequences
961
+ let splitLimit: number | undefined = undefined;
962
+ if (!positionNamesTxt && this.endPositionName && /\d+/.test(this.endPositionName))
963
+ splitLimit = Number(this.endPositionName);
964
+ else if (positionNamesTxt && this.endPositionName) {
965
+ splitLimit = positionNamesTxt.split(positionSeparator).indexOf(this.endPositionName);
966
+ splitLimit = splitLimit !== -1 ? splitLimit : undefined;
967
+ }
968
+
956
969
  const dfFilter = this.getFilter();
957
970
  const maxLength: number = dfFilter.trueCount === 0 ? this.seqHandler!.maxLength :
958
971
  wu.count(0).take(this.seqHandler!.length).map((rowIdx) => {
959
- const mList = this.seqHandler!.getSplitted(rowIdx);
972
+ const mList = this.seqHandler!.getSplitted(rowIdx, splitLimit);
960
973
  return dfFilter.get(rowIdx) && !!mList ? mList.length : 0;
961
974
  }).reduce((max, l) => Math.max(max, l), 0);
962
975
 
963
- /** positionNames and positionLabel can be set up through the column's tags only */
964
- const positionNamesTxt = this.seqCol.getTag(bioTAGS.positionNames);
965
- const positionLabelsTxt = this.seqCol.getTag(bioTAGS.positionLabels);
966
976
  this.positionNames = !!positionNamesTxt ? positionNamesTxt.split(positionSeparator).map((v) => v.trim()) :
967
977
  [...Array(maxLength).keys()].map((jPos) => `${jPos + 1}`)/* fallback if tag is not provided */;
968
978
  this.positionLabels = !!positionLabelsTxt ? positionLabelsTxt.split(positionSeparator).map((v) => v.trim()) :
@@ -135,9 +135,15 @@ export class HelmBioFilter extends BioFilterBase<BioFilterProps> /* implements I
135
135
  }
136
136
 
137
137
  async substructureSearch(column: DG.Column): Promise<DG.BitSet | null> {
138
- await delay(10);
139
- const res = await helmSubstructureSearch(this.props.substructure, column);
140
- return res;
138
+ const logPrefix = `${this.viewerToLog()}.substructureSearch( column = <${column.name}> )`;
139
+ _package.logger.debug(`${logPrefix}, start`);
140
+ try {
141
+ await delay(10);
142
+ const res = await helmSubstructureSearch(this.props.substructure, column);
143
+ return res;
144
+ } finally {
145
+ _package.logger.debug(`${logPrefix}, end`);
146
+ }
141
147
  }
142
148
 
143
149
  // // -- IRenderer --