@datagrok/bio 2.8.4 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -9
- package/README.md +39 -20
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/dockerfiles/Dockerfile +5 -4
- package/package.json +3 -3
- package/src/analysis/sequence-activity-cliffs.ts +8 -7
- package/src/analysis/sequence-similarity-viewer.ts +8 -8
- package/src/apps/web-logo-app.ts +26 -6
- package/src/calculations/monomerLevelMols.ts +6 -3
- package/src/package-test.ts +1 -0
- package/src/package-types.ts +0 -1
- package/src/package.ts +52 -10
- package/src/substructure-search/substructure-search.ts +84 -55
- package/src/tests/activity-cliffs-tests.ts +1 -1
- package/src/tests/converters-test.ts +1 -1
- package/src/tests/detectors-tests.ts +2 -2
- package/src/tests/msa-tests.ts +2 -3
- package/src/tests/renderers-test.ts +37 -3
- package/src/tests/scoring.ts +38 -0
- package/src/tests/splitters-test.ts +27 -1
- package/src/tests/units-handler-splitted-tests.ts +19 -12
- package/src/tests/units-handler-tests.ts +15 -15
- package/src/utils/cell-renderer.ts +31 -20
- package/src/utils/monomer-cell-renderer.ts +14 -14
- package/src/utils/save-as-fasta.ts +1 -1
- package/src/utils/split-to-monomers.ts +40 -6
- package/src/utils/ui-utils.ts +4 -4
- package/src/viewers/vd-regions-viewer.ts +88 -51
- package/src/viewers/web-logo-viewer.ts +307 -310
- package/src/widgets/composition-analysis-widget.ts +6 -2
package/dockerfiles/Dockerfile
CHANGED
|
@@ -24,12 +24,12 @@ RUN savedAptMark="$(apt-mark showmanual)" ; \
|
|
|
24
24
|
; \
|
|
25
25
|
apt-mark auto '.*' > /dev/null ; \
|
|
26
26
|
[ -z "$savedAptMark" ] || apt-mark manual $savedAptMark ; \
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
wget https://mafft.cbrc.jp/alignment/software/mafft_7.520-1_amd64.deb -O mafft.deb; \
|
|
28
|
+
apt install -y ./mafft.deb; \
|
|
29
|
+
rm -rf mafft.deb; \
|
|
30
30
|
wget https://github.com/Merck/PepSeA/archive/refs/heads/main.zip -O PepSeA.zip; \
|
|
31
31
|
unzip -q PepSeA.zip -d /opt ; \
|
|
32
|
-
|
|
32
|
+
rm -rf PepSeA.zip; \
|
|
33
33
|
chown -R grok:grok /opt/PepSeA-main ; \
|
|
34
34
|
apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false ; \
|
|
35
35
|
apt-get clean ; \
|
|
@@ -57,6 +57,7 @@ def distout():\n\
|
|
|
57
57
|
\n\
|
|
58
58
|
return distout_output\n' >> /opt/PepSeA-main/alignment/api.py;
|
|
59
59
|
|
|
60
|
+
# It is important to run docker container as user and not as root
|
|
60
61
|
USER grok:grok
|
|
61
62
|
|
|
62
63
|
# Command source does not work for Docker, cause it will apply only to one layer
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.9.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,14 +34,14 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.37.0",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
39
39
|
"@datagrok-libraries/ml": "^6.3.39",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
41
41
|
"@datagrok-libraries/utils": "^4.0.17",
|
|
42
42
|
"cash-dom": "^8.0.0",
|
|
43
43
|
"css-loader": "^6.7.3",
|
|
44
|
-
"datagrok-api": "^1.
|
|
44
|
+
"datagrok-api": "^1.16.0",
|
|
45
45
|
"dayjs": "^1.11.4",
|
|
46
46
|
"fastest-levenshtein": "^1.0.16",
|
|
47
47
|
"openchemlib": "6.0.1",
|
|
@@ -7,7 +7,9 @@ import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-met
|
|
|
7
7
|
import {AvailableMetrics, DistanceMetricsSubjects, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
8
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
9
9
|
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
10
|
-
import {
|
|
10
|
+
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
12
|
+
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
11
13
|
|
|
12
14
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
13
15
|
const stringArray = col.toList();
|
|
@@ -103,12 +105,11 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
|
|
|
103
105
|
});
|
|
104
106
|
|
|
105
107
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
106
|
-
const
|
|
107
|
-
const
|
|
108
|
-
const splitter = getSplitter(units, separator);
|
|
108
|
+
const uh = UnitsHandler.getOrCreate(params.seqCol);
|
|
109
|
+
const splitter = uh.getSplitter();
|
|
109
110
|
const subParts1 = splitter(sequencesArray[0]);
|
|
110
111
|
const subParts2 = splitter(sequencesArray[1]);
|
|
111
|
-
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
112
|
+
const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
|
|
112
113
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
113
114
|
|
|
114
115
|
propPanel.append(createDifferencesWithPositions(molDifferences));
|
|
@@ -127,8 +128,8 @@ function createPropPanelField(name: string, value: number): HTMLDivElement {
|
|
|
127
128
|
}
|
|
128
129
|
|
|
129
130
|
export function createDifferenceCanvas(
|
|
130
|
-
subParts1:
|
|
131
|
-
subParts2:
|
|
131
|
+
subParts1: ISeqSplitted,
|
|
132
|
+
subParts2: ISeqSplitted,
|
|
132
133
|
units: string,
|
|
133
134
|
molDifferences: { [key: number]: HTMLCanvasElement }): HTMLCanvasElement {
|
|
134
135
|
const canvas = document.createElement('canvas');
|
|
@@ -7,7 +7,6 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
|
7
7
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
8
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
|
-
import {TAGS as bioTAGS, getSplitter} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
10
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
12
11
|
import {calcMmDistanceMatrix, dmLinearIndex} from './workers/mm-distance-worker-creator';
|
|
13
12
|
import {calculateMMDistancesArray} from './workers/mm-distance-array-service';
|
|
@@ -106,9 +105,10 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
106
105
|
const linearizeFunc = dmLinearIndex(len);
|
|
107
106
|
// array that keeps track of the indexes and scores together
|
|
108
107
|
const indexWScore = Array(len).fill(0)
|
|
109
|
-
.map((_, i) => ({
|
|
110
|
-
|
|
111
|
-
1 -
|
|
108
|
+
.map((_, i) => ({
|
|
109
|
+
idx: i, score: i === this.targetMoleculeIdx ? 1 :
|
|
110
|
+
this.preComputeDistanceMatrix ? 1 - this.mmDistanceMatrix[linearizeFunc(this.targetMoleculeIdx, i)] :
|
|
111
|
+
1 - distanceArray[i]
|
|
112
112
|
}));
|
|
113
113
|
indexWScore.sort((a, b) => b.score - a.score);
|
|
114
114
|
// get the most similar molecules
|
|
@@ -122,12 +122,12 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
122
122
|
const propPanel = ui.div();
|
|
123
123
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
124
124
|
const molColName = this.molCol?.name!;
|
|
125
|
-
const
|
|
126
|
-
const
|
|
127
|
-
const splitter = getSplitter(
|
|
125
|
+
const col = resDf.col(molColName)!;
|
|
126
|
+
const uh = UnitsHandler.getOrCreate(col);
|
|
127
|
+
const splitter = uh.getSplitter();
|
|
128
128
|
const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
|
|
129
129
|
const subParts2 = splitter(resDf.get(molColName, resDf.currentRowIdx));
|
|
130
|
-
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
130
|
+
const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
|
|
131
131
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
132
132
|
if (subParts1.length !== subParts2.length) {
|
|
133
133
|
propPanel.append(ui.divV([
|
package/src/apps/web-logo-app.ts
CHANGED
|
@@ -2,7 +2,12 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
5
7
|
import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
8
|
+
|
|
9
|
+
import {PROPS as wlPROPS} from '../viewers/web-logo-viewer';
|
|
10
|
+
|
|
6
11
|
import {_package} from '../package';
|
|
7
12
|
|
|
8
13
|
export class WebLogoApp {
|
|
@@ -11,7 +16,7 @@ export class WebLogoApp {
|
|
|
11
16
|
df: DG.DataFrame;
|
|
12
17
|
view: DG.TableView;
|
|
13
18
|
|
|
14
|
-
constructor() {}
|
|
19
|
+
constructor(private readonly urlParams: URLSearchParams) {}
|
|
15
20
|
|
|
16
21
|
async init(df: DG.DataFrame, funcName: string): Promise<void> {
|
|
17
22
|
this._funcName = funcName;
|
|
@@ -23,12 +28,27 @@ export class WebLogoApp {
|
|
|
23
28
|
// -- View --
|
|
24
29
|
|
|
25
30
|
async buildView(): Promise<void> {
|
|
26
|
-
|
|
27
|
-
|
|
31
|
+
const urlParamsTxt = wu(this.urlParams.entries())
|
|
32
|
+
.map(([key, value]) => `${key}=${encodeURIComponent(value)}`)
|
|
33
|
+
.toArray().join('&');
|
|
28
34
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
35
|
+
this.view = grok.shell.addTableView(this.df);
|
|
36
|
+
this.view.path = this.view.basePath = `func/${_package.name}.${this._funcName}?${urlParamsTxt}`;
|
|
37
|
+
|
|
38
|
+
const options: { [p: string]: any } = {sequenceColumnName: 'sequence'};
|
|
39
|
+
for (const [optName, optValue] of this.urlParams.entries()) {
|
|
40
|
+
switch (optName) {
|
|
41
|
+
// boolean
|
|
42
|
+
case wlPROPS.fixWidth:
|
|
43
|
+
case wlPROPS.fitArea:
|
|
44
|
+
options[optName] = ((v) => { return ['1', 'on', 'true'].includes(v.toLowerCase()); })(optValue);
|
|
45
|
+
break;
|
|
46
|
+
default:
|
|
47
|
+
options[optName] = optValue;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
const viewer: DG.Viewer & IWebLogoViewer = (await this.view.dataFrame.plot
|
|
51
|
+
.fromType('WebLogo', options)) as DG.Viewer & IWebLogoViewer;
|
|
32
52
|
this.view.dockManager.dock(viewer, DG.DOCK_TYPE.DOWN, null, 'WebLogo', 0.35);
|
|
33
53
|
}
|
|
34
54
|
}
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
4
7
|
import {getHelmMonomers} from '../package';
|
|
5
|
-
import {TAGS as bioTAGS, getSplitter, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
8
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
|
+
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
7
10
|
|
|
8
11
|
const V2000_ATOM_NAME_POS = 31;
|
|
9
12
|
|
|
@@ -27,7 +30,7 @@ export async function getMonomericMols(
|
|
|
27
30
|
} else {
|
|
28
31
|
molV3000Array = new Array<string>(mcol.length);
|
|
29
32
|
for (let i = 0; i < mcol.length; i++) {
|
|
30
|
-
const sequenceMonomers = uh.splitted[i].filter((it) => it !== '');
|
|
33
|
+
const sequenceMonomers = wu(uh.splitted[i]).filter((it) => it !== '').toArray();
|
|
31
34
|
const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
|
|
32
35
|
molV3000Array[i] = molV3000;
|
|
33
36
|
}
|
|
@@ -36,7 +39,7 @@ export async function getMonomericMols(
|
|
|
36
39
|
}
|
|
37
40
|
|
|
38
41
|
function molV3000FromNonHelmSequence(
|
|
39
|
-
monomers:
|
|
42
|
+
monomers: ISeqSplitted, monomersDict: Map<string, string>, pattern: boolean = false) {
|
|
40
43
|
let molV3000 = `
|
|
41
44
|
Datagrok macromolecule handler
|
|
42
45
|
|
package/src/package-test.ts
CHANGED
package/src/package-types.ts
CHANGED
|
@@ -14,7 +14,6 @@ export const enum BioPackagePropertiesNames {
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
export class BioPackageProperties extends Map<string, any> {
|
|
17
|
-
|
|
18
17
|
private _onPropertyChanged: Subject<string> = new Subject<string>();
|
|
19
18
|
public get onPropertyChanged(): Observable<string> { return this._onPropertyChanged; }
|
|
20
19
|
|
package/src/package.ts
CHANGED
|
@@ -21,7 +21,7 @@ import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-uti
|
|
|
21
21
|
|
|
22
22
|
import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
23
23
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
24
|
-
import {
|
|
24
|
+
import {SubstructureSearchDialog} from './substructure-search/substructure-search';
|
|
25
25
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
26
26
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
27
27
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
@@ -40,10 +40,11 @@ import {
|
|
|
40
40
|
getLibFileNameList,
|
|
41
41
|
getLibraryPanelUI
|
|
42
42
|
} from './utils/monomer-lib';
|
|
43
|
-
import {
|
|
43
|
+
import {getMacromoleculeColumns} from './utils/ui-utils';
|
|
44
44
|
import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
45
45
|
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
46
46
|
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
47
|
+
import {SCORE, calculateScores} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
|
|
47
48
|
|
|
48
49
|
import {demoBio01UI} from './demo/bio01-similarity-diversity';
|
|
49
50
|
import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
|
|
@@ -602,7 +603,7 @@ export function importBam(fileContent: string): DG.DataFrame [] {
|
|
|
602
603
|
//top-menu: Bio | Convert | Notation...
|
|
603
604
|
//name: convertDialog
|
|
604
605
|
export function convertDialog() {
|
|
605
|
-
const col =
|
|
606
|
+
const col = getMacromoleculeColumns()[0];
|
|
606
607
|
convert(col);
|
|
607
608
|
}
|
|
608
609
|
|
|
@@ -734,12 +735,52 @@ export function diversitySearchTopMenu() {
|
|
|
734
735
|
view.dockManager.dock(viewer, 'down');
|
|
735
736
|
}
|
|
736
737
|
|
|
737
|
-
//
|
|
738
|
-
//
|
|
739
|
-
//
|
|
740
|
-
export function
|
|
741
|
-
const
|
|
742
|
-
|
|
738
|
+
//name: SearchSubsequenceEditor
|
|
739
|
+
//tags: editor
|
|
740
|
+
//input: funccall call
|
|
741
|
+
export function searchSubsequenceEditor(call: DG.FuncCall) {
|
|
742
|
+
const columns = getMacromoleculeColumns();
|
|
743
|
+
if (columns.length === 1)
|
|
744
|
+
call.func.prepare({macromolecules: columns[0]}).call(true);
|
|
745
|
+
else
|
|
746
|
+
new SubstructureSearchDialog(columns);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
//top-menu: Bio | Search | Subsequence...
|
|
750
|
+
//name: Subsequence Search
|
|
751
|
+
//input: column macromolecules
|
|
752
|
+
//editor: Bio:SearchSubsequenceEditor
|
|
753
|
+
export function SubsequenceSearchTopMenu(macromolecules: DG.Column): void {
|
|
754
|
+
grok.shell.tv.getFiltersGroup({createDefaultFilters: false}).updateOrAdd({
|
|
755
|
+
type: 'Bio:bioSubstructureFilter',
|
|
756
|
+
column: macromolecules.name,
|
|
757
|
+
columnName: macromolecules.name,
|
|
758
|
+
});
|
|
759
|
+
grok.shell.tv.grid.scrollToCell(macromolecules, 0);
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
//top-menu: Bio | Caclulate | Identity...
|
|
763
|
+
//name: Identity Scoring
|
|
764
|
+
//description: Adds a column with fraction of matching monomers
|
|
765
|
+
//input: dataframe table [Table containing Macromolecule column]
|
|
766
|
+
//input: column macromolecules {semType: Macromolecule} [Sequences to score]
|
|
767
|
+
//input: string reference [Sequence, matching column format]
|
|
768
|
+
//output: column scores
|
|
769
|
+
export async function sequenceIdentityScoring(table: DG.DataFrame, macromolecule: DG.Column, reference: string): Promise<DG.Column<number>> {
|
|
770
|
+
const scores = calculateScores(table, macromolecule, reference, SCORE.IDENTITY);
|
|
771
|
+
return scores;
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
//top-menu: Bio | Caclulate | Similarity...
|
|
775
|
+
//name: Similarity Scoring
|
|
776
|
+
//description: Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities
|
|
777
|
+
//input: dataframe table [Table containing Macromolecule column]
|
|
778
|
+
//input: column macromolecules {semType: Macromolecule} [Sequences to score]
|
|
779
|
+
//input: string reference [Sequence, matching column format]
|
|
780
|
+
//output: column scores
|
|
781
|
+
export async function sequenceSimilarityScoring(table: DG.DataFrame, macromolecule: DG.Column, reference: string): Promise<DG.Column<number>> {
|
|
782
|
+
const scores = calculateScores(table, macromolecule, reference, SCORE.SIMILARITY);
|
|
783
|
+
return scores;
|
|
743
784
|
}
|
|
744
785
|
|
|
745
786
|
//name: saveAsFasta
|
|
@@ -764,7 +805,8 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
|
764
805
|
export async function webLogoLargeApp(): Promise<void> {
|
|
765
806
|
const pi = DG.TaskBarProgressIndicator.create('WebLogo');
|
|
766
807
|
try {
|
|
767
|
-
const
|
|
808
|
+
const urlParams = new URLSearchParams(window.location.search);
|
|
809
|
+
const app = new WebLogoApp(urlParams);
|
|
768
810
|
const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_100000x5.csv');
|
|
769
811
|
await grok.data.detectSemanticTypes(df);
|
|
770
812
|
await app.init(df, 'webLogoLargeApp');
|
|
@@ -17,64 +17,93 @@ export const enum MONOMERIC_COL_TAGS {
|
|
|
17
17
|
|
|
18
18
|
const SUBSTR_HELM_COL_NAME = 'substr_helm';
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
20
|
+
export class SubstructureSearchDialog {
|
|
21
|
+
units: string;
|
|
22
|
+
separator: string;
|
|
23
|
+
inputsDiv: HTMLDivElement;
|
|
24
|
+
substructureInput: DG.InputBase<string>;
|
|
25
|
+
separatorInput: DG.InputBase<string>;
|
|
26
|
+
editHelmLink: HTMLAnchorElement;
|
|
27
|
+
columnsInput: DG.InputBase<DG.Column | null>;
|
|
28
|
+
grid: DG.Grid;
|
|
29
|
+
col: DG.Column;
|
|
30
|
+
dialog: DG.Dialog;
|
|
31
|
+
|
|
32
|
+
constructor(columns: DG.Column<string>[]) {
|
|
33
|
+
this.col = columns[0];
|
|
34
|
+
this.createUI();
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
editHelmLinkAction(): void {
|
|
38
|
+
updateDivInnerHTML(this.inputsDiv, this.grid.root);
|
|
39
|
+
ui.tools.waitForElementInDom(this.grid.root).then(() => {
|
|
40
|
+
setTimeout(() => {
|
|
41
|
+
this.grid.cell(SUBSTR_HELM_COL_NAME, 0).element.children[0].dispatchEvent(
|
|
42
|
+
new KeyboardEvent('keydown', {key: 'Enter'})
|
|
43
|
+
);
|
|
44
|
+
}, 100);
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
updateInputs(): void {
|
|
49
|
+
const selectedInput = this.units === NOTATION.HELM ? ui.divV([this.columnsInput, this.editHelmLink]) :
|
|
50
|
+
this.units === NOTATION.SEPARATOR ? ui.inputs([this.columnsInput, this.substructureInput, this.separatorInput]) :
|
|
51
|
+
ui.inputs([this.columnsInput, this.substructureInput]);
|
|
40
52
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
.
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
53
|
+
updateDivInnerHTML(this.inputsDiv, selectedInput);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
updateNotationDiv(): void {
|
|
57
|
+
this.units = this.col.getTag(DG.TAGS.UNITS);
|
|
58
|
+
this.separator = this.col.getTag(bioTAGS.separator);
|
|
59
|
+
const notationDiv = this.dialog.root.getElementsByClassName('notation-text')[0];
|
|
60
|
+
if (notationDiv)
|
|
61
|
+
notationDiv.textContent = `Notation: ${this.units}`;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
createUI(): void {
|
|
65
|
+
const dataframe = grok.shell.tv.dataFrame;
|
|
66
|
+
const seqColOptions = {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE};
|
|
67
|
+
this.columnsInput = ui.columnInput('Column', dataframe, this.col, (column: DG.Column) => {
|
|
68
|
+
this.col = column;
|
|
69
|
+
this.updateNotationDiv();
|
|
70
|
+
this.updateInputs();
|
|
71
|
+
}, seqColOptions);
|
|
72
|
+
|
|
73
|
+
this.substructureInput = ui.stringInput('Substructure', '');
|
|
74
|
+
|
|
75
|
+
this.editHelmLink = ui.link('Edit helm', () => this.editHelmLinkAction(), undefined, {style: {position: 'relative', left: '95px'}});
|
|
76
|
+
|
|
77
|
+
const df = DG.DataFrame.create(1);
|
|
78
|
+
df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((_i) => '');
|
|
79
|
+
df.col(SUBSTR_HELM_COL_NAME)!.semType = this.col.semType;
|
|
80
|
+
df.col(SUBSTR_HELM_COL_NAME)!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
81
|
+
this.grid = df.plot.grid();
|
|
82
|
+
this.separatorInput = ui.stringInput('Separator', this.separator);
|
|
83
|
+
|
|
84
|
+
this.inputsDiv = ui.div();
|
|
85
|
+
this.units = this.col.getTag(DG.TAGS.UNITS);
|
|
86
|
+
this.separator = this.col.getTag(bioTAGS.separator);
|
|
87
|
+
this.updateInputs();
|
|
88
|
+
|
|
89
|
+
this.dialog = ui.dialog('Substructure Search')
|
|
90
|
+
.add(ui.divV([
|
|
91
|
+
ui.divText(`Notation: ${this.units}`, 'notation-text'),
|
|
92
|
+
this.inputsDiv,
|
|
93
|
+
]))
|
|
94
|
+
.onOK(async () => {
|
|
95
|
+
let substructure = this.units === NOTATION.HELM ? df.get(SUBSTR_HELM_COL_NAME, 0) : this.substructureInput.value;
|
|
96
|
+
if (this.units === NOTATION.SEPARATOR && this.separatorInput.value !== this.separator && this.separatorInput.value !== '')
|
|
97
|
+
substructure = substructure.replaceAll(this.separatorInput.value, this.separator);
|
|
69
98
|
let matches: DG.BitSet;
|
|
70
|
-
if (units === NOTATION.HELM)
|
|
71
|
-
matches = await helmSubstructureSearch(substructure, col);
|
|
99
|
+
if (this.units === NOTATION.HELM)
|
|
100
|
+
matches = await helmSubstructureSearch(substructure, this.col);
|
|
72
101
|
else
|
|
73
|
-
matches = linearSubstructureSearch(substructure, col);
|
|
74
|
-
col.dataFrame.
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
|
|
102
|
+
matches = linearSubstructureSearch(substructure, this.col);
|
|
103
|
+
this.col.dataFrame.filter.and(matches);
|
|
104
|
+
})
|
|
105
|
+
.show();
|
|
106
|
+
}
|
|
78
107
|
}
|
|
79
108
|
|
|
80
109
|
export function linearSubstructureSearch(substructure: string, col: DG.Column<string>, separator?: string): DG.BitSet {
|
|
@@ -47,5 +47,5 @@ category('activityCliffs', async () => {
|
|
|
47
47
|
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
48
48
|
|
|
49
49
|
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 3, DimReductionMethods.UMAP, 'sequence');
|
|
50
|
-
});
|
|
50
|
+
}, {skipReason: 'GROK-13851: Unhandled exceptions'});
|
|
51
51
|
});
|
|
@@ -136,7 +136,7 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$`,
|
|
|
136
136
|
return function(srcCol: DG.Column): DG.Column {
|
|
137
137
|
const converter = new NotationConverter(srcCol);
|
|
138
138
|
const resCol = converter.convert(tgtNotation, tgtSeparator);
|
|
139
|
-
expect(resCol.getTag(
|
|
139
|
+
expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
|
|
140
140
|
return resCol;
|
|
141
141
|
};
|
|
142
142
|
}
|
|
@@ -274,13 +274,13 @@ MWRSWY-CKHP`;
|
|
|
274
274
|
await _testDf(readSamples(Samples.fastaCsv), {
|
|
275
275
|
'Sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
|
|
276
276
|
});
|
|
277
|
-
});
|
|
277
|
+
}, {skipReason: 'GROK-13851: Unhandled exceptions'});
|
|
278
278
|
|
|
279
279
|
test('samplesFastaFasta', async () => {
|
|
280
280
|
await _testDf(readSamples(Samples.fastaFasta), {
|
|
281
281
|
'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
|
|
282
282
|
});
|
|
283
|
-
});
|
|
283
|
+
}, {skipReason: 'GROK-13851: Unhandled exceptions'});
|
|
284
284
|
|
|
285
285
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
286
286
|
// test('samplesPeptidesComplexPositiveAlignedSequence', async () => {
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -7,7 +7,6 @@ import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-librarie
|
|
|
7
7
|
import {runKalign} from '../utils/multiple-sequence-alignment';
|
|
8
8
|
import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
|
|
9
9
|
import {awaitContainerStart} from './utils';
|
|
10
|
-
//import * as grok from 'datagrok-api/grok';
|
|
11
10
|
|
|
12
11
|
category('MSA', async () => {
|
|
13
12
|
//table = await grok.data.files.openTable('Demo:Files/bio/peptides.csv');
|
|
@@ -113,8 +112,8 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
|
|
|
113
112
|
srcCol.semType = semType;
|
|
114
113
|
|
|
115
114
|
const tgtCol: DG.Column = tgtDf.getCol('seq')!;
|
|
116
|
-
const
|
|
117
|
-
expectArray(
|
|
115
|
+
const resCol: DG.Column = await runKalign(srcCol, true);
|
|
116
|
+
expectArray(resCol.toList(), tgtCol.toList());
|
|
118
117
|
}
|
|
119
118
|
|
|
120
119
|
async function _testMSAOnColumn(
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
|
+
import $ from 'cash-dom';
|
|
5
|
+
|
|
4
6
|
import {category, expect, test, awaitCheck, delay} from '@datagrok-libraries/utils/src/test';
|
|
5
7
|
|
|
6
8
|
import {importFasta} from '../package';
|
|
@@ -47,8 +49,12 @@ category('renderers', () => {
|
|
|
47
49
|
await _selectRendererBySemType();
|
|
48
50
|
});
|
|
49
51
|
|
|
50
|
-
test('
|
|
51
|
-
await
|
|
52
|
+
test('setRendererManuallyBeforeAddColumn', async () => {
|
|
53
|
+
await _setRendererManuallyBeforeAddColumn();
|
|
54
|
+
}, {skipReason: 'GROK-11212'});
|
|
55
|
+
|
|
56
|
+
test('setRendererManuallyAfterAddColumn', async () => {
|
|
57
|
+
await _setRendererManuallyAfterAddColumn();
|
|
52
58
|
}, {skipReason: 'GROK-11212'});
|
|
53
59
|
|
|
54
60
|
async function _rendererMacromoleculeFasta() {
|
|
@@ -204,7 +210,7 @@ category('renderers', () => {
|
|
|
204
210
|
|
|
205
211
|
/** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
|
|
206
212
|
* https://reddata.atlassian.net/browse/GROK-11212 */
|
|
207
|
-
async function
|
|
213
|
+
async function _setRendererManuallyBeforeAddColumn() {
|
|
208
214
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
209
215
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
210
216
|
seqDiffCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
|
|
@@ -227,4 +233,32 @@ category('renderers', () => {
|
|
|
227
233
|
`instead of manual '${tgtCellRenderer}'.`);
|
|
228
234
|
}
|
|
229
235
|
}
|
|
236
|
+
|
|
237
|
+
/** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
|
|
238
|
+
* https://reddata.atlassian.net/browse/GROK-11212 */
|
|
239
|
+
async function _setRendererManuallyAfterAddColumn() {
|
|
240
|
+
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
241
|
+
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
242
|
+
seqDiffCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
|
|
243
|
+
seqDiffCol.setTag(bioTAGS.separator, '/');
|
|
244
|
+
seqDiffCol.setTag(bioTAGS.aligned, 'SEQ');
|
|
245
|
+
seqDiffCol.setTag(bioTAGS.alphabet, 'UN');
|
|
246
|
+
seqDiffCol.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
247
|
+
seqDiffCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
248
|
+
const tgtCellRenderer = 'MacromoleculeDifference';
|
|
249
|
+
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
250
|
+
await grok.data.detectSemanticTypes(df);
|
|
251
|
+
const tv = grok.shell.addTableView(df);
|
|
252
|
+
await awaitCheck(() => $(tv.root).find('.d4-grid canvas').length > 0, 'View grid canvas not found', 200);
|
|
253
|
+
|
|
254
|
+
seqDiffCol.setTag(DG.TAGS.CELL_RENDERER, tgtCellRenderer);
|
|
255
|
+
await awaitCheck(() => $(tv.root).find('.d4-grid canvas').length > 0, 'View grid canvas not found', 200);
|
|
256
|
+
|
|
257
|
+
const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
258
|
+
if (resCellRenderer !== tgtCellRenderer) { // this is value of MacromoleculeDifferenceCR.cellType
|
|
259
|
+
throw new Error(`Tag 'cell.renderer' has been manually set to '${tgtCellRenderer}' for column ` +
|
|
260
|
+
`but after df was added as table, tag 'cell.renderer' has reset to '${resCellRenderer}' ` +
|
|
261
|
+
`instead of manual '${tgtCellRenderer}'.`);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
230
264
|
});
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
import {category, test, expectFloat, before} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
5
|
+
import {sequenceIdentityScoring, sequenceSimilarityScoring} from '../package';
|
|
6
|
+
import {getMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
7
|
+
|
|
8
|
+
category('Scoring', () => {
|
|
9
|
+
const sequence = 'sequence';
|
|
10
|
+
const expectedSimilarity = 'expected_similarity';
|
|
11
|
+
const expectedIdentity = 'expected_identity';
|
|
12
|
+
const table = DG.DataFrame.fromCsv(`${sequence},${expectedSimilarity},${expectedIdentity}
|
|
13
|
+
PEPTIDE1{Aca.Orn.gGlu.Pqa.D-His_1Bn.dH.hHis.4Abz.D-Tic.D-Dap.Y.Iva.meS.F.P.F.D-1Nal}$$$$,1.0,1.0
|
|
14
|
+
PEPTIDE1{Iva.Gly_allyl.gGlu.Pqa.D-Dip.dH.hHis.4Abz.D-aHyp.D-Dap.Y.Iva.I.Tyr_26diMe.P.Asu.meC}$$$$,0.68,0.53
|
|
15
|
+
PEPTIDE1{[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal]}$$$$V2.0,0.34,0.0
|
|
16
|
+
`);
|
|
17
|
+
const seqCol: DG.Column<string> = table.getCol(sequence);
|
|
18
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
19
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
20
|
+
const reference = seqCol.get(0)!;
|
|
21
|
+
|
|
22
|
+
before(async () => {
|
|
23
|
+
const monomerLibHelper = await getMonomerLibHelper();
|
|
24
|
+
await monomerLibHelper.loadLibraries(true);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
test('Identity', async () => {
|
|
28
|
+
const scoresCol = await sequenceIdentityScoring(table, seqCol, reference);
|
|
29
|
+
for (let i = 0; i < scoresCol.length; i++)
|
|
30
|
+
expectFloat(scoresCol.get(i)!, table.get(expectedIdentity, i), 0.01, `Wrong identity score for sequence at position ${i}`);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
test('Similarity', async () => {
|
|
34
|
+
const scoresCol = await sequenceSimilarityScoring(table, seqCol, reference);
|
|
35
|
+
for (let i = 0; i < scoresCol.length; i++)
|
|
36
|
+
expectFloat(scoresCol.get(i)!, table.get(expectedSimilarity, i), 0.01, `Wrong similarity score for sequence at position ${i}`);
|
|
37
|
+
});
|
|
38
|
+
});
|