@datagrok/bio 2.8.4 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,12 +24,12 @@ RUN savedAptMark="$(apt-mark showmanual)" ; \
24
24
  ; \
25
25
  apt-mark auto '.*' > /dev/null ; \
26
26
  [ -z "$savedAptMark" ] || apt-mark manual $savedAptMark ; \
27
- wget https://mafft.cbrc.jp/alignment/software/mafft_7.520-1_amd64.deb -O mafft.deb; \
28
- apt install -y ./mafft.deb; \
29
- rm -rf mafft.deb; \
27
+ wget https://mafft.cbrc.jp/alignment/software/mafft_7.520-1_amd64.deb -O mafft.deb; \
28
+ apt install -y ./mafft.deb; \
29
+ rm -rf mafft.deb; \
30
30
  wget https://github.com/Merck/PepSeA/archive/refs/heads/main.zip -O PepSeA.zip; \
31
31
  unzip -q PepSeA.zip -d /opt ; \
32
- rm -rf PepSeA.zip; \
32
+ rm -rf PepSeA.zip; \
33
33
  chown -R grok:grok /opt/PepSeA-main ; \
34
34
  apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false ; \
35
35
  apt-get clean ; \
@@ -57,6 +57,7 @@ def distout():\n\
57
57
  \n\
58
58
  return distout_output\n' >> /opt/PepSeA-main/alignment/api.py;
59
59
 
60
+ # It is important to run docker container as user and not as root
60
61
  USER grok:grok
61
62
 
62
63
  # Command source does not work for Docker, cause it will apply only to one layer
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.8.4",
8
+ "version": "2.9.0",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,14 +34,14 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.34.3",
37
+ "@datagrok-libraries/bio": "^5.37.0",
38
38
  "@datagrok-libraries/chem-meta": "^1.0.1",
39
39
  "@datagrok-libraries/ml": "^6.3.39",
40
40
  "@datagrok-libraries/tutorials": "^1.3.6",
41
41
  "@datagrok-libraries/utils": "^4.0.17",
42
42
  "cash-dom": "^8.0.0",
43
43
  "css-loader": "^6.7.3",
44
- "datagrok-api": "^1.13.3",
44
+ "datagrok-api": "^1.16.0",
45
45
  "dayjs": "^1.11.4",
46
46
  "fastest-levenshtein": "^1.0.16",
47
47
  "openchemlib": "6.0.1",
@@ -7,7 +7,9 @@ import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-met
7
7
  import {AvailableMetrics, DistanceMetricsSubjects, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
8
8
  import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
9
9
  import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
10
- import {getSplitter, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
+ import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
11
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
12
+ import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
11
13
 
12
14
  export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
13
15
  const stringArray = col.toList();
@@ -103,12 +105,11 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
103
105
  });
104
106
 
105
107
  const molDifferences: { [key: number]: HTMLCanvasElement } = {};
106
- const units = params.seqCol.getTag(DG.TAGS.UNITS);
107
- const separator = params.seqCol.getTag(bioTAGS.separator);
108
- const splitter = getSplitter(units, separator);
108
+ const uh = UnitsHandler.getOrCreate(params.seqCol);
109
+ const splitter = uh.getSplitter();
109
110
  const subParts1 = splitter(sequencesArray[0]);
110
111
  const subParts2 = splitter(sequencesArray[1]);
111
- const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
112
+ const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
112
113
  propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
113
114
 
114
115
  propPanel.append(createDifferencesWithPositions(molDifferences));
@@ -127,8 +128,8 @@ function createPropPanelField(name: string, value: number): HTMLDivElement {
127
128
  }
128
129
 
129
130
  export function createDifferenceCanvas(
130
- subParts1: string[],
131
- subParts2: string[],
131
+ subParts1: ISeqSplitted,
132
+ subParts2: ISeqSplitted,
132
133
  units: string,
133
134
  molDifferences: { [key: number]: HTMLCanvasElement }): HTMLCanvasElement {
134
135
  const canvas = document.createElement('canvas');
@@ -7,7 +7,6 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
7
7
  import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
8
8
  import {updateDivInnerHTML} from '../utils/ui-utils';
9
9
  import {Subject} from 'rxjs';
10
- import {TAGS as bioTAGS, getSplitter} from '@datagrok-libraries/bio/src/utils/macromolecule';
11
10
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
12
11
  import {calcMmDistanceMatrix, dmLinearIndex} from './workers/mm-distance-worker-creator';
13
12
  import {calculateMMDistancesArray} from './workers/mm-distance-array-service';
@@ -106,9 +105,10 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
106
105
  const linearizeFunc = dmLinearIndex(len);
107
106
  // array that keeps track of the indexes and scores together
108
107
  const indexWScore = Array(len).fill(0)
109
- .map((_, i) => ({idx: i, score: i === this.targetMoleculeIdx ? 1 :
110
- this.preComputeDistanceMatrix ? 1 - this.mmDistanceMatrix[linearizeFunc(this.targetMoleculeIdx, i)] :
111
- 1 - distanceArray[i]
108
+ .map((_, i) => ({
109
+ idx: i, score: i === this.targetMoleculeIdx ? 1 :
110
+ this.preComputeDistanceMatrix ? 1 - this.mmDistanceMatrix[linearizeFunc(this.targetMoleculeIdx, i)] :
111
+ 1 - distanceArray[i]
112
112
  }));
113
113
  indexWScore.sort((a, b) => b.score - a.score);
114
114
  // get the most similar molecules
@@ -122,12 +122,12 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
122
122
  const propPanel = ui.div();
123
123
  const molDifferences: { [key: number]: HTMLCanvasElement } = {};
124
124
  const molColName = this.molCol?.name!;
125
- const units = resDf.col(molColName)!.getTag(DG.TAGS.UNITS);
126
- const separator = resDf.col(molColName)!.getTag(bioTAGS.separator);
127
- const splitter = getSplitter(units, separator);
125
+ const col = resDf.col(molColName)!;
126
+ const uh = UnitsHandler.getOrCreate(col);
127
+ const splitter = uh.getSplitter();
128
128
  const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
129
129
  const subParts2 = splitter(resDf.get(molColName, resDf.currentRowIdx));
130
- const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
130
+ const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
131
131
  propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
132
132
  if (subParts1.length !== subParts2.length) {
133
133
  propPanel.append(ui.divV([
@@ -2,7 +2,12 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
+ import wu from 'wu';
6
+
5
7
  import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
8
+
9
+ import {PROPS as wlPROPS} from '../viewers/web-logo-viewer';
10
+
6
11
  import {_package} from '../package';
7
12
 
8
13
  export class WebLogoApp {
@@ -11,7 +16,7 @@ export class WebLogoApp {
11
16
  df: DG.DataFrame;
12
17
  view: DG.TableView;
13
18
 
14
- constructor() {}
19
+ constructor(private readonly urlParams: URLSearchParams) {}
15
20
 
16
21
  async init(df: DG.DataFrame, funcName: string): Promise<void> {
17
22
  this._funcName = funcName;
@@ -23,12 +28,27 @@ export class WebLogoApp {
23
28
  // -- View --
24
29
 
25
30
  async buildView(): Promise<void> {
26
- this.view = grok.shell.addTableView(this.df);
27
- this.view.path = this.view.basePath = `func/${_package.name}.${this._funcName}`;
31
+ const urlParamsTxt = wu(this.urlParams.entries())
32
+ .map(([key, value]) => `${key}=${encodeURIComponent(value)}`)
33
+ .toArray().join('&');
28
34
 
29
- const viewer: DG.Viewer & IWebLogoViewer = (await this.view.dataFrame.plot.fromType('WebLogo', {
30
- sequenceColumnName: 'sequence',
31
- }));
35
+ this.view = grok.shell.addTableView(this.df);
36
+ this.view.path = this.view.basePath = `func/${_package.name}.${this._funcName}?${urlParamsTxt}`;
37
+
38
+ const options: { [p: string]: any } = {sequenceColumnName: 'sequence'};
39
+ for (const [optName, optValue] of this.urlParams.entries()) {
40
+ switch (optName) {
41
+ // boolean
42
+ case wlPROPS.fixWidth:
43
+ case wlPROPS.fitArea:
44
+ options[optName] = ((v) => { return ['1', 'on', 'true'].includes(v.toLowerCase()); })(optValue);
45
+ break;
46
+ default:
47
+ options[optName] = optValue;
48
+ }
49
+ }
50
+ const viewer: DG.Viewer & IWebLogoViewer = (await this.view.dataFrame.plot
51
+ .fromType('WebLogo', options)) as DG.Viewer & IWebLogoViewer;
32
52
  this.view.dockManager.dock(viewer, DG.DOCK_TYPE.DOWN, null, 'WebLogo', 0.35);
33
53
  }
34
54
  }
@@ -1,9 +1,12 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
+
5
+ import wu from 'wu';
6
+
4
7
  import {getHelmMonomers} from '../package';
5
- import {TAGS as bioTAGS, getSplitter, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
8
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
+ import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
7
10
 
8
11
  const V2000_ATOM_NAME_POS = 31;
9
12
 
@@ -27,7 +30,7 @@ export async function getMonomericMols(
27
30
  } else {
28
31
  molV3000Array = new Array<string>(mcol.length);
29
32
  for (let i = 0; i < mcol.length; i++) {
30
- const sequenceMonomers = uh.splitted[i].filter((it) => it !== '');
33
+ const sequenceMonomers = wu(uh.splitted[i]).filter((it) => it !== '').toArray();
31
34
  const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
32
35
  molV3000Array[i] = molV3000;
33
36
  }
@@ -36,7 +39,7 @@ export async function getMonomericMols(
36
39
  }
37
40
 
38
41
  function molV3000FromNonHelmSequence(
39
- monomers: Array<string>, monomersDict: Map<string, string>, pattern: boolean = false) {
42
+ monomers: ISeqSplitted, monomersDict: Map<string, string>, pattern: boolean = false) {
40
43
  let molV3000 = `
41
44
  Datagrok macromolecule handler
42
45
 
@@ -28,6 +28,7 @@ import './tests/to-atomic-level-tests';
28
28
  import './tests/mm-distance-tests';
29
29
  import './tests/activity-cliffs-tests';
30
30
  import './tests/sequence-space-test';
31
+ import './tests/scoring';
31
32
 
32
33
 
33
34
  export const _package = new DG.Package();
@@ -14,7 +14,6 @@ export const enum BioPackagePropertiesNames {
14
14
 
15
15
 
16
16
  export class BioPackageProperties extends Map<string, any> {
17
-
18
17
  private _onPropertyChanged: Subject<string> = new Subject<string>();
19
18
  public get onPropertyChanged(): Observable<string> { return this._onPropertyChanged; }
20
19
 
package/src/package.ts CHANGED
@@ -21,7 +21,7 @@ import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-uti
21
21
 
22
22
  import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
23
23
  import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
24
- import {substructureSearchDialog} from './substructure-search/substructure-search';
24
+ import {SubstructureSearchDialog} from './substructure-search/substructure-search';
25
25
  import {saveAsFastaUI} from './utils/save-as-fasta';
26
26
  import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
27
27
  import {delay} from '@datagrok-libraries/utils/src/test';
@@ -40,10 +40,11 @@ import {
40
40
  getLibFileNameList,
41
41
  getLibraryPanelUI
42
42
  } from './utils/monomer-lib';
43
- import {getMacromoleculeColumn} from './utils/ui-utils';
43
+ import {getMacromoleculeColumns} from './utils/ui-utils';
44
44
  import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
45
45
  import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
46
46
  import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
47
+ import {SCORE, calculateScores} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
47
48
 
48
49
  import {demoBio01UI} from './demo/bio01-similarity-diversity';
49
50
  import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
@@ -602,7 +603,7 @@ export function importBam(fileContent: string): DG.DataFrame [] {
602
603
  //top-menu: Bio | Convert | Notation...
603
604
  //name: convertDialog
604
605
  export function convertDialog() {
605
- const col = getMacromoleculeColumn();
606
+ const col = getMacromoleculeColumns()[0];
606
607
  convert(col);
607
608
  }
608
609
 
@@ -734,12 +735,52 @@ export function diversitySearchTopMenu() {
734
735
  view.dockManager.dock(viewer, 'down');
735
736
  }
736
737
 
737
- //top-menu: Bio | Search | Substructure...
738
- //name: bioSubstructureSearch
739
- //description: Finds sequence with the given subsequence
740
- export function bioSubstructureSearch(): void {
741
- const col = getMacromoleculeColumn();
742
- substructureSearchDialog(col);
738
+ //name: SearchSubsequenceEditor
739
+ //tags: editor
740
+ //input: funccall call
741
+ export function searchSubsequenceEditor(call: DG.FuncCall) {
742
+ const columns = getMacromoleculeColumns();
743
+ if (columns.length === 1)
744
+ call.func.prepare({macromolecules: columns[0]}).call(true);
745
+ else
746
+ new SubstructureSearchDialog(columns);
747
+ }
748
+
749
+ //top-menu: Bio | Search | Subsequence...
750
+ //name: Subsequence Search
751
+ //input: column macromolecules
752
+ //editor: Bio:SearchSubsequenceEditor
753
+ export function SubsequenceSearchTopMenu(macromolecules: DG.Column): void {
754
+ grok.shell.tv.getFiltersGroup({createDefaultFilters: false}).updateOrAdd({
755
+ type: 'Bio:bioSubstructureFilter',
756
+ column: macromolecules.name,
757
+ columnName: macromolecules.name,
758
+ });
759
+ grok.shell.tv.grid.scrollToCell(macromolecules, 0);
760
+ }
761
+
762
+ //top-menu: Bio | Caclulate | Identity...
763
+ //name: Identity Scoring
764
+ //description: Adds a column with fraction of matching monomers
765
+ //input: dataframe table [Table containing Macromolecule column]
766
+ //input: column macromolecules {semType: Macromolecule} [Sequences to score]
767
+ //input: string reference [Sequence, matching column format]
768
+ //output: column scores
769
+ export async function sequenceIdentityScoring(table: DG.DataFrame, macromolecule: DG.Column, reference: string): Promise<DG.Column<number>> {
770
+ const scores = calculateScores(table, macromolecule, reference, SCORE.IDENTITY);
771
+ return scores;
772
+ }
773
+
774
+ //top-menu: Bio | Caclulate | Similarity...
775
+ //name: Similarity Scoring
776
+ //description: Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities
777
+ //input: dataframe table [Table containing Macromolecule column]
778
+ //input: column macromolecules {semType: Macromolecule} [Sequences to score]
779
+ //input: string reference [Sequence, matching column format]
780
+ //output: column scores
781
+ export async function sequenceSimilarityScoring(table: DG.DataFrame, macromolecule: DG.Column, reference: string): Promise<DG.Column<number>> {
782
+ const scores = calculateScores(table, macromolecule, reference, SCORE.SIMILARITY);
783
+ return scores;
743
784
  }
744
785
 
745
786
  //name: saveAsFasta
@@ -764,7 +805,8 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
764
805
  export async function webLogoLargeApp(): Promise<void> {
765
806
  const pi = DG.TaskBarProgressIndicator.create('WebLogo');
766
807
  try {
767
- const app = new WebLogoApp();
808
+ const urlParams = new URLSearchParams(window.location.search);
809
+ const app = new WebLogoApp(urlParams);
768
810
  const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_100000x5.csv');
769
811
  await grok.data.detectSemanticTypes(df);
770
812
  await app.init(df, 'webLogoLargeApp');
@@ -17,64 +17,93 @@ export const enum MONOMERIC_COL_TAGS {
17
17
 
18
18
  const SUBSTR_HELM_COL_NAME = 'substr_helm';
19
19
 
20
- /**
21
- * Searches substructure in each row of Macromolecule column
22
- *
23
- * @param {DG.column} col Column with 'Macromolecule' semantic type
24
- */
25
- export function substructureSearchDialog(col: DG.Column<string>): void {
26
- const units = col.getTag(DG.TAGS.UNITS);
27
- const separator = col.getTag(bioTAGS.separator);
28
- // const notations = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];
29
-
30
- const substructureInput = ui.textInput('Substructure', '');
31
-
32
- const editHelmLink = ui.link('Edit helm', async () => {
33
- updateDivInnerHTML(inputsDiv, grid.root);
34
- await ui.tools.waitForElementInDom(grid.root);
35
- setTimeout(() => {
36
- grid.cell(SUBSTR_HELM_COL_NAME, 0).element.children[0].dispatchEvent(
37
- new KeyboardEvent('keydown', {key: 'Enter'}));
38
- }, 100);
39
- });
20
+ export class SubstructureSearchDialog {
21
+ units: string;
22
+ separator: string;
23
+ inputsDiv: HTMLDivElement;
24
+ substructureInput: DG.InputBase<string>;
25
+ separatorInput: DG.InputBase<string>;
26
+ editHelmLink: HTMLAnchorElement;
27
+ columnsInput: DG.InputBase<DG.Column | null>;
28
+ grid: DG.Grid;
29
+ col: DG.Column;
30
+ dialog: DG.Dialog;
31
+
32
+ constructor(columns: DG.Column<string>[]) {
33
+ this.col = columns[0];
34
+ this.createUI();
35
+ }
36
+
37
+ editHelmLinkAction(): void {
38
+ updateDivInnerHTML(this.inputsDiv, this.grid.root);
39
+ ui.tools.waitForElementInDom(this.grid.root).then(() => {
40
+ setTimeout(() => {
41
+ this.grid.cell(SUBSTR_HELM_COL_NAME, 0).element.children[0].dispatchEvent(
42
+ new KeyboardEvent('keydown', {key: 'Enter'})
43
+ );
44
+ }, 100);
45
+ });
46
+ }
47
+
48
+ updateInputs(): void {
49
+ const selectedInput = this.units === NOTATION.HELM ? ui.divV([this.columnsInput, this.editHelmLink]) :
50
+ this.units === NOTATION.SEPARATOR ? ui.inputs([this.columnsInput, this.substructureInput, this.separatorInput]) :
51
+ ui.inputs([this.columnsInput, this.substructureInput]);
40
52
 
41
- const df = DG.DataFrame.create(1);
42
- df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((_i) => '');
43
- df.col(SUBSTR_HELM_COL_NAME)!.semType = col.semType;
44
- df.col(SUBSTR_HELM_COL_NAME)!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
45
- const grid = df.plot.grid();
46
- const separatorInput = ui.textInput('Separator', separator);
47
-
48
- const inputsDiv = ui.div();
49
-
50
- const inputs = units === NOTATION.HELM ? ui.divV([editHelmLink]) :
51
- units === NOTATION.SEPARATOR ? ui.inputs([substructureInput, separatorInput]) :
52
- ui.inputs([substructureInput]);
53
-
54
- updateDivInnerHTML(inputsDiv, inputs);
55
-
56
- ui.dialog('Substructure Search')
57
- .add(ui.divV([
58
- ui.divText(`Notation: ${units}`),
59
- inputsDiv,
60
- ]))
61
- .onOK(async () => {
62
- let substructure = units === NOTATION.HELM ? df.get(SUBSTR_HELM_COL_NAME, 0) : substructureInput.value;
63
- if (units === NOTATION.SEPARATOR && separatorInput.value !== separator && separatorInput.value !== '')
64
- substructure = substructure.replaceAll(separatorInput.value, separator);
65
- const matchesColName = `Matches: ${substructure}`;
66
- const colExists = col.dataFrame.columns.names()
67
- .filter((it) => it.toLocaleLowerCase() === matchesColName.toLocaleLowerCase()).length > 0;
68
- if (!colExists) {
53
+ updateDivInnerHTML(this.inputsDiv, selectedInput);
54
+ }
55
+
56
+ updateNotationDiv(): void {
57
+ this.units = this.col.getTag(DG.TAGS.UNITS);
58
+ this.separator = this.col.getTag(bioTAGS.separator);
59
+ const notationDiv = this.dialog.root.getElementsByClassName('notation-text')[0];
60
+ if (notationDiv)
61
+ notationDiv.textContent = `Notation: ${this.units}`;
62
+ }
63
+
64
+ createUI(): void {
65
+ const dataframe = grok.shell.tv.dataFrame;
66
+ const seqColOptions = {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE};
67
+ this.columnsInput = ui.columnInput('Column', dataframe, this.col, (column: DG.Column) => {
68
+ this.col = column;
69
+ this.updateNotationDiv();
70
+ this.updateInputs();
71
+ }, seqColOptions);
72
+
73
+ this.substructureInput = ui.stringInput('Substructure', '');
74
+
75
+ this.editHelmLink = ui.link('Edit helm', () => this.editHelmLinkAction(), undefined, {style: {position: 'relative', left: '95px'}});
76
+
77
+ const df = DG.DataFrame.create(1);
78
+ df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((_i) => '');
79
+ df.col(SUBSTR_HELM_COL_NAME)!.semType = this.col.semType;
80
+ df.col(SUBSTR_HELM_COL_NAME)!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
81
+ this.grid = df.plot.grid();
82
+ this.separatorInput = ui.stringInput('Separator', this.separator);
83
+
84
+ this.inputsDiv = ui.div();
85
+ this.units = this.col.getTag(DG.TAGS.UNITS);
86
+ this.separator = this.col.getTag(bioTAGS.separator);
87
+ this.updateInputs();
88
+
89
+ this.dialog = ui.dialog('Substructure Search')
90
+ .add(ui.divV([
91
+ ui.divText(`Notation: ${this.units}`, 'notation-text'),
92
+ this.inputsDiv,
93
+ ]))
94
+ .onOK(async () => {
95
+ let substructure = this.units === NOTATION.HELM ? df.get(SUBSTR_HELM_COL_NAME, 0) : this.substructureInput.value;
96
+ if (this.units === NOTATION.SEPARATOR && this.separatorInput.value !== this.separator && this.separatorInput.value !== '')
97
+ substructure = substructure.replaceAll(this.separatorInput.value, this.separator);
69
98
  let matches: DG.BitSet;
70
- if (units === NOTATION.HELM)
71
- matches = await helmSubstructureSearch(substructure, col);
99
+ if (this.units === NOTATION.HELM)
100
+ matches = await helmSubstructureSearch(substructure, this.col);
72
101
  else
73
- matches = linearSubstructureSearch(substructure, col);
74
- col.dataFrame.columns.add(DG.Column.fromBitSet(matchesColName, matches));
75
- } else { grok.shell.warning(`Search ${substructure} is already performed`); }
76
- })
77
- .show();
102
+ matches = linearSubstructureSearch(substructure, this.col);
103
+ this.col.dataFrame.filter.and(matches);
104
+ })
105
+ .show();
106
+ }
78
107
  }
79
108
 
80
109
  export function linearSubstructureSearch(substructure: string, col: DG.Column<string>, separator?: string): DG.BitSet {
@@ -47,5 +47,5 @@ category('activityCliffs', async () => {
47
47
  viewList.push(actCliffsTableViewWithEmptyRows);
48
48
 
49
49
  await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 3, DimReductionMethods.UMAP, 'sequence');
50
- });
50
+ }, {skipReason: 'GROK-13851: Unhandled exceptions'});
51
51
  });
@@ -136,7 +136,7 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$`,
136
136
  return function(srcCol: DG.Column): DG.Column {
137
137
  const converter = new NotationConverter(srcCol);
138
138
  const resCol = converter.convert(tgtNotation, tgtSeparator);
139
- expect(resCol.getTag('units'), tgtNotation);
139
+ expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
140
140
  return resCol;
141
141
  };
142
142
  }
@@ -274,13 +274,13 @@ MWRSWY-CKHP`;
274
274
  await _testDf(readSamples(Samples.fastaCsv), {
275
275
  'Sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
276
276
  });
277
- });
277
+ }, {skipReason: 'GROK-13851: Unhandled exceptions'});
278
278
 
279
279
  test('samplesFastaFasta', async () => {
280
280
  await _testDf(readSamples(Samples.fastaFasta), {
281
281
  'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
282
282
  });
283
- });
283
+ }, {skipReason: 'GROK-13851: Unhandled exceptions'});
284
284
 
285
285
  // peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
286
286
  // test('samplesPeptidesComplexPositiveAlignedSequence', async () => {
@@ -7,7 +7,6 @@ import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-librarie
7
7
  import {runKalign} from '../utils/multiple-sequence-alignment';
8
8
  import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
9
9
  import {awaitContainerStart} from './utils';
10
- //import * as grok from 'datagrok-api/grok';
11
10
 
12
11
  category('MSA', async () => {
13
12
  //table = await grok.data.files.openTable('Demo:Files/bio/peptides.csv');
@@ -113,8 +112,8 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
113
112
  srcCol.semType = semType;
114
113
 
115
114
  const tgtCol: DG.Column = tgtDf.getCol('seq')!;
116
- const msaCol: DG.Column = await runKalign(srcCol, true);
117
- expectArray(msaCol.toList(), tgtCol.toList());
115
+ const resCol: DG.Column = await runKalign(srcCol, true);
116
+ expectArray(resCol.toList(), tgtCol.toList());
118
117
  }
119
118
 
120
119
  async function _testMSAOnColumn(
@@ -1,6 +1,8 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
2
  import * as DG from 'datagrok-api/dg';
3
3
 
4
+ import $ from 'cash-dom';
5
+
4
6
  import {category, expect, test, awaitCheck, delay} from '@datagrok-libraries/utils/src/test';
5
7
 
6
8
  import {importFasta} from '../package';
@@ -47,8 +49,12 @@ category('renderers', () => {
47
49
  await _selectRendererBySemType();
48
50
  });
49
51
 
50
- test('setRendererManually', async () => {
51
- await _setRendererManually();
52
+ test('setRendererManuallyBeforeAddColumn', async () => {
53
+ await _setRendererManuallyBeforeAddColumn();
54
+ }, {skipReason: 'GROK-11212'});
55
+
56
+ test('setRendererManuallyAfterAddColumn', async () => {
57
+ await _setRendererManuallyAfterAddColumn();
52
58
  }, {skipReason: 'GROK-11212'});
53
59
 
54
60
  async function _rendererMacromoleculeFasta() {
@@ -204,7 +210,7 @@ category('renderers', () => {
204
210
 
205
211
  /** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
206
212
  * https://reddata.atlassian.net/browse/GROK-11212 */
207
- async function _setRendererManually() {
213
+ async function _setRendererManuallyBeforeAddColumn() {
208
214
  const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
209
215
  ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
210
216
  seqDiffCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
@@ -227,4 +233,32 @@ category('renderers', () => {
227
233
  `instead of manual '${tgtCellRenderer}'.`);
228
234
  }
229
235
  }
236
+
237
+ /** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
238
+ * https://reddata.atlassian.net/browse/GROK-11212 */
239
+ async function _setRendererManuallyAfterAddColumn() {
240
+ const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
241
+ ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
242
+ seqDiffCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
243
+ seqDiffCol.setTag(bioTAGS.separator, '/');
244
+ seqDiffCol.setTag(bioTAGS.aligned, 'SEQ');
245
+ seqDiffCol.setTag(bioTAGS.alphabet, 'UN');
246
+ seqDiffCol.setTag(bioTAGS.alphabetIsMultichar, 'true');
247
+ seqDiffCol.semType = DG.SEMTYPE.MACROMOLECULE;
248
+ const tgtCellRenderer = 'MacromoleculeDifference';
249
+ const df = DG.DataFrame.fromColumns([seqDiffCol]);
250
+ await grok.data.detectSemanticTypes(df);
251
+ const tv = grok.shell.addTableView(df);
252
+ await awaitCheck(() => $(tv.root).find('.d4-grid canvas').length > 0, 'View grid canvas not found', 200);
253
+
254
+ seqDiffCol.setTag(DG.TAGS.CELL_RENDERER, tgtCellRenderer);
255
+ await awaitCheck(() => $(tv.root).find('.d4-grid canvas').length > 0, 'View grid canvas not found', 200);
256
+
257
+ const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
258
+ if (resCellRenderer !== tgtCellRenderer) { // this is value of MacromoleculeDifferenceCR.cellType
259
+ throw new Error(`Tag 'cell.renderer' has been manually set to '${tgtCellRenderer}' for column ` +
260
+ `but after df was added as table, tag 'cell.renderer' has reset to '${resCellRenderer}' ` +
261
+ `instead of manual '${tgtCellRenderer}'.`);
262
+ }
263
+ }
230
264
  });
@@ -0,0 +1,38 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ import {category, test, expectFloat, before} from '@datagrok-libraries/utils/src/test';
4
+ import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
5
+ import {sequenceIdentityScoring, sequenceSimilarityScoring} from '../package';
6
+ import {getMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
7
+
8
+ category('Scoring', () => {
9
+ const sequence = 'sequence';
10
+ const expectedSimilarity = 'expected_similarity';
11
+ const expectedIdentity = 'expected_identity';
12
+ const table = DG.DataFrame.fromCsv(`${sequence},${expectedSimilarity},${expectedIdentity}
13
+ PEPTIDE1{Aca.Orn.gGlu.Pqa.D-His_1Bn.dH.hHis.4Abz.D-Tic.D-Dap.Y.Iva.meS.F.P.F.D-1Nal}$$$$,1.0,1.0
14
+ PEPTIDE1{Iva.Gly_allyl.gGlu.Pqa.D-Dip.dH.hHis.4Abz.D-aHyp.D-Dap.Y.Iva.I.Tyr_26diMe.P.Asu.meC}$$$$,0.68,0.53
15
+ PEPTIDE1{[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal]}$$$$V2.0,0.34,0.0
16
+ `);
17
+ const seqCol: DG.Column<string> = table.getCol(sequence);
18
+ seqCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
19
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
20
+ const reference = seqCol.get(0)!;
21
+
22
+ before(async () => {
23
+ const monomerLibHelper = await getMonomerLibHelper();
24
+ await monomerLibHelper.loadLibraries(true);
25
+ });
26
+
27
+ test('Identity', async () => {
28
+ const scoresCol = await sequenceIdentityScoring(table, seqCol, reference);
29
+ for (let i = 0; i < scoresCol.length; i++)
30
+ expectFloat(scoresCol.get(i)!, table.get(expectedIdentity, i), 0.01, `Wrong identity score for sequence at position ${i}`);
31
+ });
32
+
33
+ test('Similarity', async () => {
34
+ const scoresCol = await sequenceSimilarityScoring(table, seqCol, reference);
35
+ for (let i = 0; i < scoresCol.length; i++)
36
+ expectFloat(scoresCol.get(i)!, table.get(expectedSimilarity, i), 0.01, `Wrong similarity score for sequence at position ${i}`);
37
+ });
38
+ });