@datagrok/bio 2.8.6 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.8.6",
8
+ "version": "2.9.0",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,14 +34,14 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.36.0",
37
+ "@datagrok-libraries/bio": "^5.37.0",
38
38
  "@datagrok-libraries/chem-meta": "^1.0.1",
39
39
  "@datagrok-libraries/ml": "^6.3.39",
40
40
  "@datagrok-libraries/tutorials": "^1.3.6",
41
41
  "@datagrok-libraries/utils": "^4.0.17",
42
42
  "cash-dom": "^8.0.0",
43
43
  "css-loader": "^6.7.3",
44
- "datagrok-api": "^1.13.3",
44
+ "datagrok-api": "^1.16.0",
45
45
  "dayjs": "^1.11.4",
46
46
  "fastest-levenshtein": "^1.0.16",
47
47
  "openchemlib": "6.0.1",
@@ -28,6 +28,7 @@ import './tests/to-atomic-level-tests';
28
28
  import './tests/mm-distance-tests';
29
29
  import './tests/activity-cliffs-tests';
30
30
  import './tests/sequence-space-test';
31
+ import './tests/scoring';
31
32
 
32
33
 
33
34
  export const _package = new DG.Package();
@@ -14,7 +14,6 @@ export const enum BioPackagePropertiesNames {
14
14
 
15
15
 
16
16
  export class BioPackageProperties extends Map<string, any> {
17
-
18
17
  private _onPropertyChanged: Subject<string> = new Subject<string>();
19
18
  public get onPropertyChanged(): Observable<string> { return this._onPropertyChanged; }
20
19
 
package/src/package.ts CHANGED
@@ -21,7 +21,7 @@ import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-uti
21
21
 
22
22
  import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
23
23
  import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
24
- import {substructureSearchDialog} from './substructure-search/substructure-search';
24
+ import {SubstructureSearchDialog} from './substructure-search/substructure-search';
25
25
  import {saveAsFastaUI} from './utils/save-as-fasta';
26
26
  import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
27
27
  import {delay} from '@datagrok-libraries/utils/src/test';
@@ -40,10 +40,11 @@ import {
40
40
  getLibFileNameList,
41
41
  getLibraryPanelUI
42
42
  } from './utils/monomer-lib';
43
- import {getMacromoleculeColumn} from './utils/ui-utils';
43
+ import {getMacromoleculeColumns} from './utils/ui-utils';
44
44
  import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
45
45
  import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
46
46
  import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
47
+ import {SCORE, calculateScores} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
47
48
 
48
49
  import {demoBio01UI} from './demo/bio01-similarity-diversity';
49
50
  import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
@@ -602,7 +603,7 @@ export function importBam(fileContent: string): DG.DataFrame [] {
602
603
  //top-menu: Bio | Convert | Notation...
603
604
  //name: convertDialog
604
605
  export function convertDialog() {
605
- const col = getMacromoleculeColumn();
606
+ const col = getMacromoleculeColumns()[0];
606
607
  convert(col);
607
608
  }
608
609
 
@@ -734,12 +735,52 @@ export function diversitySearchTopMenu() {
734
735
  view.dockManager.dock(viewer, 'down');
735
736
  }
736
737
 
737
- //top-menu: Bio | Search | Substructure...
738
- //name: bioSubstructureSearch
739
- //description: Finds sequence with the given subsequence
740
- export function bioSubstructureSearch(): void {
741
- const col = getMacromoleculeColumn();
742
- substructureSearchDialog(col);
738
+ //name: SearchSubsequenceEditor
739
+ //tags: editor
740
+ //input: funccall call
741
+ export function searchSubsequenceEditor(call: DG.FuncCall) {
742
+ const columns = getMacromoleculeColumns();
743
+ if (columns.length === 1)
744
+ call.func.prepare({macromolecules: columns[0]}).call(true);
745
+ else
746
+ new SubstructureSearchDialog(columns);
747
+ }
748
+
749
+ //top-menu: Bio | Search | Subsequence...
750
+ //name: Subsequence Search
751
+ //input: column macromolecules
752
+ //editor: Bio:SearchSubsequenceEditor
753
+ export function SubsequenceSearchTopMenu(macromolecules: DG.Column): void {
754
+ grok.shell.tv.getFiltersGroup({createDefaultFilters: false}).updateOrAdd({
755
+ type: 'Bio:bioSubstructureFilter',
756
+ column: macromolecules.name,
757
+ columnName: macromolecules.name,
758
+ });
759
+ grok.shell.tv.grid.scrollToCell(macromolecules, 0);
760
+ }
761
+
762
+ //top-menu: Bio | Caclulate | Identity...
763
+ //name: Identity Scoring
764
+ //description: Adds a column with fraction of matching monomers
765
+ //input: dataframe table [Table containing Macromolecule column]
766
+ //input: column macromolecules {semType: Macromolecule} [Sequences to score]
767
+ //input: string reference [Sequence, matching column format]
768
+ //output: column scores
769
+ export async function sequenceIdentityScoring(table: DG.DataFrame, macromolecule: DG.Column, reference: string): Promise<DG.Column<number>> {
770
+ const scores = calculateScores(table, macromolecule, reference, SCORE.IDENTITY);
771
+ return scores;
772
+ }
773
+
774
+ //top-menu: Bio | Caclulate | Similarity...
775
+ //name: Similarity Scoring
776
+ //description: Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities
777
+ //input: dataframe table [Table containing Macromolecule column]
778
+ //input: column macromolecules {semType: Macromolecule} [Sequences to score]
779
+ //input: string reference [Sequence, matching column format]
780
+ //output: column scores
781
+ export async function sequenceSimilarityScoring(table: DG.DataFrame, macromolecule: DG.Column, reference: string): Promise<DG.Column<number>> {
782
+ const scores = calculateScores(table, macromolecule, reference, SCORE.SIMILARITY);
783
+ return scores;
743
784
  }
744
785
 
745
786
  //name: saveAsFasta
@@ -17,64 +17,93 @@ export const enum MONOMERIC_COL_TAGS {
17
17
 
18
18
  const SUBSTR_HELM_COL_NAME = 'substr_helm';
19
19
 
20
- /**
21
- * Searches substructure in each row of Macromolecule column
22
- *
23
- * @param {DG.column} col Column with 'Macromolecule' semantic type
24
- */
25
- export function substructureSearchDialog(col: DG.Column<string>): void {
26
- const units = col.getTag(DG.TAGS.UNITS);
27
- const separator = col.getTag(bioTAGS.separator);
28
- // const notations = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];
29
-
30
- const substructureInput = ui.textInput('Substructure', '');
31
-
32
- const editHelmLink = ui.link('Edit helm', async () => {
33
- updateDivInnerHTML(inputsDiv, grid.root);
34
- await ui.tools.waitForElementInDom(grid.root);
35
- setTimeout(() => {
36
- grid.cell(SUBSTR_HELM_COL_NAME, 0).element.children[0].dispatchEvent(
37
- new KeyboardEvent('keydown', {key: 'Enter'}));
38
- }, 100);
39
- });
20
+ export class SubstructureSearchDialog {
21
+ units: string;
22
+ separator: string;
23
+ inputsDiv: HTMLDivElement;
24
+ substructureInput: DG.InputBase<string>;
25
+ separatorInput: DG.InputBase<string>;
26
+ editHelmLink: HTMLAnchorElement;
27
+ columnsInput: DG.InputBase<DG.Column | null>;
28
+ grid: DG.Grid;
29
+ col: DG.Column;
30
+ dialog: DG.Dialog;
31
+
32
+ constructor(columns: DG.Column<string>[]) {
33
+ this.col = columns[0];
34
+ this.createUI();
35
+ }
36
+
37
+ editHelmLinkAction(): void {
38
+ updateDivInnerHTML(this.inputsDiv, this.grid.root);
39
+ ui.tools.waitForElementInDom(this.grid.root).then(() => {
40
+ setTimeout(() => {
41
+ this.grid.cell(SUBSTR_HELM_COL_NAME, 0).element.children[0].dispatchEvent(
42
+ new KeyboardEvent('keydown', {key: 'Enter'})
43
+ );
44
+ }, 100);
45
+ });
46
+ }
47
+
48
+ updateInputs(): void {
49
+ const selectedInput = this.units === NOTATION.HELM ? ui.divV([this.columnsInput, this.editHelmLink]) :
50
+ this.units === NOTATION.SEPARATOR ? ui.inputs([this.columnsInput, this.substructureInput, this.separatorInput]) :
51
+ ui.inputs([this.columnsInput, this.substructureInput]);
40
52
 
41
- const df = DG.DataFrame.create(1);
42
- df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((_i) => '');
43
- df.col(SUBSTR_HELM_COL_NAME)!.semType = col.semType;
44
- df.col(SUBSTR_HELM_COL_NAME)!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
45
- const grid = df.plot.grid();
46
- const separatorInput = ui.textInput('Separator', separator);
47
-
48
- const inputsDiv = ui.div();
49
-
50
- const inputs = units === NOTATION.HELM ? ui.divV([editHelmLink]) :
51
- units === NOTATION.SEPARATOR ? ui.inputs([substructureInput, separatorInput]) :
52
- ui.inputs([substructureInput]);
53
-
54
- updateDivInnerHTML(inputsDiv, inputs);
55
-
56
- ui.dialog('Substructure Search')
57
- .add(ui.divV([
58
- ui.divText(`Notation: ${units}`),
59
- inputsDiv,
60
- ]))
61
- .onOK(async () => {
62
- let substructure = units === NOTATION.HELM ? df.get(SUBSTR_HELM_COL_NAME, 0) : substructureInput.value;
63
- if (units === NOTATION.SEPARATOR && separatorInput.value !== separator && separatorInput.value !== '')
64
- substructure = substructure.replaceAll(separatorInput.value, separator);
65
- const matchesColName = `Matches: ${substructure}`;
66
- const colExists = col.dataFrame.columns.names()
67
- .filter((it) => it.toLocaleLowerCase() === matchesColName.toLocaleLowerCase()).length > 0;
68
- if (!colExists) {
53
+ updateDivInnerHTML(this.inputsDiv, selectedInput);
54
+ }
55
+
56
+ updateNotationDiv(): void {
57
+ this.units = this.col.getTag(DG.TAGS.UNITS);
58
+ this.separator = this.col.getTag(bioTAGS.separator);
59
+ const notationDiv = this.dialog.root.getElementsByClassName('notation-text')[0];
60
+ if (notationDiv)
61
+ notationDiv.textContent = `Notation: ${this.units}`;
62
+ }
63
+
64
+ createUI(): void {
65
+ const dataframe = grok.shell.tv.dataFrame;
66
+ const seqColOptions = {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE};
67
+ this.columnsInput = ui.columnInput('Column', dataframe, this.col, (column: DG.Column) => {
68
+ this.col = column;
69
+ this.updateNotationDiv();
70
+ this.updateInputs();
71
+ }, seqColOptions);
72
+
73
+ this.substructureInput = ui.stringInput('Substructure', '');
74
+
75
+ this.editHelmLink = ui.link('Edit helm', () => this.editHelmLinkAction(), undefined, {style: {position: 'relative', left: '95px'}});
76
+
77
+ const df = DG.DataFrame.create(1);
78
+ df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((_i) => '');
79
+ df.col(SUBSTR_HELM_COL_NAME)!.semType = this.col.semType;
80
+ df.col(SUBSTR_HELM_COL_NAME)!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
81
+ this.grid = df.plot.grid();
82
+ this.separatorInput = ui.stringInput('Separator', this.separator);
83
+
84
+ this.inputsDiv = ui.div();
85
+ this.units = this.col.getTag(DG.TAGS.UNITS);
86
+ this.separator = this.col.getTag(bioTAGS.separator);
87
+ this.updateInputs();
88
+
89
+ this.dialog = ui.dialog('Substructure Search')
90
+ .add(ui.divV([
91
+ ui.divText(`Notation: ${this.units}`, 'notation-text'),
92
+ this.inputsDiv,
93
+ ]))
94
+ .onOK(async () => {
95
+ let substructure = this.units === NOTATION.HELM ? df.get(SUBSTR_HELM_COL_NAME, 0) : this.substructureInput.value;
96
+ if (this.units === NOTATION.SEPARATOR && this.separatorInput.value !== this.separator && this.separatorInput.value !== '')
97
+ substructure = substructure.replaceAll(this.separatorInput.value, this.separator);
69
98
  let matches: DG.BitSet;
70
- if (units === NOTATION.HELM)
71
- matches = await helmSubstructureSearch(substructure, col);
99
+ if (this.units === NOTATION.HELM)
100
+ matches = await helmSubstructureSearch(substructure, this.col);
72
101
  else
73
- matches = linearSubstructureSearch(substructure, col);
74
- col.dataFrame.columns.add(DG.Column.fromBitSet(matchesColName, matches));
75
- } else { grok.shell.warning(`Search ${substructure} is already performed`); }
76
- })
77
- .show();
102
+ matches = linearSubstructureSearch(substructure, this.col);
103
+ this.col.dataFrame.filter.and(matches);
104
+ })
105
+ .show();
106
+ }
78
107
  }
79
108
 
80
109
  export function linearSubstructureSearch(substructure: string, col: DG.Column<string>, separator?: string): DG.BitSet {
@@ -47,5 +47,5 @@ category('activityCliffs', async () => {
47
47
  viewList.push(actCliffsTableViewWithEmptyRows);
48
48
 
49
49
  await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 3, DimReductionMethods.UMAP, 'sequence');
50
- });
50
+ }, {skipReason: 'GROK-13851: Unhandled exceptions'});
51
51
  });
@@ -274,13 +274,13 @@ MWRSWY-CKHP`;
274
274
  await _testDf(readSamples(Samples.fastaCsv), {
275
275
  'Sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
276
276
  });
277
- });
277
+ }, {skipReason: 'GROK-13851: Unhandled exceptions'});
278
278
 
279
279
  test('samplesFastaFasta', async () => {
280
280
  await _testDf(readSamples(Samples.fastaFasta), {
281
281
  'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
282
282
  });
283
- });
283
+ }, {skipReason: 'GROK-13851: Unhandled exceptions'});
284
284
 
285
285
  // peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
286
286
  // test('samplesPeptidesComplexPositiveAlignedSequence', async () => {
@@ -0,0 +1,38 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ import {category, test, expectFloat, before} from '@datagrok-libraries/utils/src/test';
4
+ import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
5
+ import {sequenceIdentityScoring, sequenceSimilarityScoring} from '../package';
6
+ import {getMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
7
+
8
+ category('Scoring', () => {
9
+ const sequence = 'sequence';
10
+ const expectedSimilarity = 'expected_similarity';
11
+ const expectedIdentity = 'expected_identity';
12
+ const table = DG.DataFrame.fromCsv(`${sequence},${expectedSimilarity},${expectedIdentity}
13
+ PEPTIDE1{Aca.Orn.gGlu.Pqa.D-His_1Bn.dH.hHis.4Abz.D-Tic.D-Dap.Y.Iva.meS.F.P.F.D-1Nal}$$$$,1.0,1.0
14
+ PEPTIDE1{Iva.Gly_allyl.gGlu.Pqa.D-Dip.dH.hHis.4Abz.D-aHyp.D-Dap.Y.Iva.I.Tyr_26diMe.P.Asu.meC}$$$$,0.68,0.53
15
+ PEPTIDE1{[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal]}$$$$V2.0,0.34,0.0
16
+ `);
17
+ const seqCol: DG.Column<string> = table.getCol(sequence);
18
+ seqCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
19
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
20
+ const reference = seqCol.get(0)!;
21
+
22
+ before(async () => {
23
+ const monomerLibHelper = await getMonomerLibHelper();
24
+ await monomerLibHelper.loadLibraries(true);
25
+ });
26
+
27
+ test('Identity', async () => {
28
+ const scoresCol = await sequenceIdentityScoring(table, seqCol, reference);
29
+ for (let i = 0; i < scoresCol.length; i++)
30
+ expectFloat(scoresCol.get(i)!, table.get(expectedIdentity, i), 0.01, `Wrong identity score for sequence at position ${i}`);
31
+ });
32
+
33
+ test('Similarity', async () => {
34
+ const scoresCol = await sequenceSimilarityScoring(table, seqCol, reference);
35
+ for (let i = 0; i < scoresCol.length; i++)
36
+ expectFloat(scoresCol.get(i)!, table.get(expectedSimilarity, i), 0.01, `Wrong similarity score for sequence at position ${i}`);
37
+ });
38
+ });
@@ -26,6 +26,7 @@ import * as C from './constants';
26
26
 
27
27
  import {_package, getBioLib} from '../package';
28
28
  import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
29
+ import {getSplitter} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
29
30
 
30
31
 
31
32
  type TempType = { [tagName: string]: any };
@@ -292,14 +293,15 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
292
293
  const cell = gridCell.cell;
293
294
  const tableCol = gridCell.tableColumn as DG.Column<string>;
294
295
  const s: string = cell.value ?? '';
296
+ const separator = tableCol.tags[bioTAGS.separator];
297
+ const units: string = tableCol.tags[DG.TAGS.UNITS];
295
298
  w = getUpdatedWidth(grid, g, x, w);
296
299
  //TODO: can this be replaced/merged with splitSequence?
297
300
  const [s1, s2] = s.split('#');
298
- const uh = UnitsHandler.getOrCreate(tableCol);
299
- const splitter = uh.getSplitter();
301
+ const splitter = getSplitter(units, separator);
300
302
  const subParts1 = splitter(s1);
301
303
  const subParts2 = splitter(s2);
302
- drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, uh.units);
304
+ drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, units);
303
305
  }
304
306
  }
305
307
 
@@ -1,13 +1,13 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
2
  import * as DG from 'datagrok-api/dg';
3
3
 
4
- export function getMacromoleculeColumn(): DG.Column | any {
5
- const col = grok.shell.t.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
6
- if (col === null) {
4
+ export function getMacromoleculeColumns(): DG.Column<any>[] | any {
5
+ const columns = grok.shell.t.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
6
+ if (columns === null) {
7
7
  grok.shell.error('Current table does not contain macromolecules');
8
8
  return;
9
9
  }
10
- return col;
10
+ return columns;
11
11
  }
12
12
 
13
13
  export function updateDivInnerHTML(div: HTMLElement, content: string | Node): void {