@datagrok/bio 2.11.42 → 2.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/README.md +1 -1
- package/detectors.js +11 -11
- package/dist/36.js +1 -1
- package/dist/36.js.map +1 -1
- package/dist/413.js +1 -1
- package/dist/413.js.map +1 -1
- package/dist/590.js +1 -1
- package/dist/590.js.map +1 -1
- package/dist/709.js +1 -1
- package/dist/709.js.map +1 -1
- package/dist/895.js +1 -1
- package/dist/895.js.map +1 -1
- package/dist/package-test.js +3 -3
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/files/tests/libraries/HELMmonomerSchema.json +1 -1
- package/package.json +11 -11
- package/src/analysis/sequence-activity-cliffs.ts +9 -9
- package/src/analysis/sequence-diversity-viewer.ts +3 -3
- package/src/analysis/sequence-search-base-viewer.ts +2 -2
- package/src/analysis/sequence-similarity-viewer.ts +10 -10
- package/src/analysis/sequence-space.ts +26 -23
- package/src/calculations/monomerLevelMols.ts +13 -11
- package/src/package.ts +12 -15
- package/src/tests/WebLogo-layout-tests.ts +5 -2
- package/src/tests/WebLogo-positions-test.ts +5 -5
- package/src/tests/bio-tests.ts +13 -6
- package/src/tests/converters-test.ts +4 -4
- package/src/tests/detectors-benchmark-tests.ts +5 -5
- package/src/tests/detectors-tests.ts +13 -13
- package/src/tests/fasta-export-tests.ts +10 -4
- package/src/tests/mm-distance-tests.ts +10 -10
- package/src/tests/msa-tests.ts +8 -15
- package/src/tests/renderers-monomer-placer.ts +3 -3
- package/src/tests/renderers-test.ts +6 -8
- package/src/tests/splitters-test.ts +14 -13
- package/src/tests/substructure-filters-tests.ts +143 -1
- package/src/tests/to-atomic-level-tests.ts +2 -2
- package/src/tests/units-handler-get-region.ts +4 -4
- package/src/tests/units-handler-splitted-tests.ts +19 -17
- package/src/tests/units-handler-tests.ts +32 -32
- package/src/utils/cell-renderer.ts +40 -34
- package/src/utils/check-input-column.ts +5 -5
- package/src/utils/context-menu.ts +9 -6
- package/src/utils/convert.ts +9 -9
- package/src/utils/get-region-func-editor.ts +11 -11
- package/src/utils/get-region.ts +10 -12
- package/src/utils/macromolecule-column-widget.ts +4 -3
- package/src/utils/monomer-lib/library-file-manager/event-manager.ts +1 -1
- package/src/utils/multiple-sequence-alignment-ui.ts +6 -6
- package/src/utils/pepsea.ts +1 -0
- package/src/utils/poly-tool/transformation.ts +3 -3
- package/src/utils/poly-tool/ui.ts +46 -135
- package/src/utils/save-as-fasta.ts +14 -15
- package/src/utils/sequence-to-mol.ts +4 -4
- package/src/viewers/web-logo-viewer.ts +46 -54
- package/src/widgets/bio-substructure-filter-types.ts +19 -45
- package/src/widgets/bio-substructure-filter.ts +45 -23
- package/src/widgets/composition-analysis-widget.ts +8 -8
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
},
|
|
22
22
|
"id": {
|
|
23
23
|
"description": "Unique ID for the monomer. There is no meaning associated with this ID value.",
|
|
24
|
-
"type": "integer"
|
|
24
|
+
"type": ["string", "integer"]
|
|
25
25
|
},
|
|
26
26
|
"rgroups": {
|
|
27
27
|
"description": "An array of the monomer R groups and required information.",
|
package/package.json
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"friendlyName": "Bio",
|
|
4
4
|
"author": {
|
|
5
|
-
"name": "
|
|
6
|
-
"email": "
|
|
5
|
+
"name": "Aleksandr Tanas",
|
|
6
|
+
"email": "atanas@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.12.1",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,13 +34,13 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "5.
|
|
38
|
-
"@datagrok-libraries/chem-meta": "^1.2.
|
|
39
|
-
"@datagrok-libraries/ml": "^6.4.
|
|
40
|
-
"@datagrok-libraries/tutorials": "^1.3.
|
|
37
|
+
"@datagrok-libraries/bio": "5.40.0",
|
|
38
|
+
"@datagrok-libraries/chem-meta": "^1.2.3",
|
|
39
|
+
"@datagrok-libraries/ml": "^6.4.12",
|
|
40
|
+
"@datagrok-libraries/tutorials": "^1.3.12",
|
|
41
41
|
"ajv": "^8.12.0",
|
|
42
42
|
"ajv-errors": "^3.0.0",
|
|
43
|
-
"@datagrok-libraries/utils": "^4.
|
|
43
|
+
"@datagrok-libraries/utils": "^4.2.0",
|
|
44
44
|
"@datagrok-libraries/math": "^1.0.7",
|
|
45
45
|
"cash-dom": "^8.0.0",
|
|
46
46
|
"css-loader": "^6.7.3",
|
|
@@ -54,9 +54,9 @@
|
|
|
54
54
|
"wu": "latest"
|
|
55
55
|
},
|
|
56
56
|
"devDependencies": {
|
|
57
|
-
"@datagrok/chem": "^1.
|
|
58
|
-
"@datagrok/dendrogram": "^1.2.
|
|
59
|
-
"@datagrok/helm": "^2.1.
|
|
57
|
+
"@datagrok/chem": "^1.9.0",
|
|
58
|
+
"@datagrok/dendrogram": "^1.2.27",
|
|
59
|
+
"@datagrok/helm": "^2.1.30",
|
|
60
60
|
"@types/node": "^17.0.24",
|
|
61
61
|
"@types/wu": "latest",
|
|
62
62
|
"@typescript-eslint/eslint-plugin": "latest",
|
|
@@ -2,13 +2,15 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
5
7
|
import {ITooltipAndPanelParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
6
8
|
import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-metrics-methods';
|
|
7
9
|
import {AvailableMetrics, DistanceMetricsSubjects, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
10
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
9
11
|
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
10
12
|
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
|
-
import {
|
|
13
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
12
14
|
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
13
15
|
|
|
14
16
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
@@ -97,19 +99,16 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
|
|
|
97
99
|
|
|
98
100
|
propPanel.append(ui.divText(params.seqCol.name, {style: {fontWeight: 'bold'}}));
|
|
99
101
|
|
|
100
|
-
const sequencesArray = new Array<string>(2);
|
|
101
102
|
const activitiesArray = new Array<number>(2);
|
|
102
103
|
params.points.forEach((molIdx, idx) => {
|
|
103
|
-
sequencesArray[idx] = params.seqCol.get(molIdx);
|
|
104
104
|
activitiesArray[idx] = params.activityCol.get(molIdx);
|
|
105
105
|
});
|
|
106
106
|
|
|
107
107
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
108
|
-
const
|
|
109
|
-
const
|
|
110
|
-
const
|
|
111
|
-
const
|
|
112
|
-
const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
|
|
108
|
+
const sh = SeqHandler.forColumn(params.seqCol);
|
|
109
|
+
const subParts1 = sh.getSplitted(params.points[0]); // splitter(sequencesArray[0], {uh, rowIdx: -1});
|
|
110
|
+
const subParts2 = sh.getSplitted(params.points[1]); // splitter(sequencesArray[1], {uh, rowIdx: -1});
|
|
111
|
+
const canvas = createDifferenceCanvas(subParts1, subParts2, sh.units, molDifferences);
|
|
113
112
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
114
113
|
|
|
115
114
|
propPanel.append(createDifferencesWithPositions(molDifferences));
|
|
@@ -135,7 +134,8 @@ export function createDifferenceCanvas(
|
|
|
135
134
|
const canvas = document.createElement('canvas');
|
|
136
135
|
const context = canvas.getContext('2d');
|
|
137
136
|
canvas.height = 30;
|
|
138
|
-
drawMoleculeDifferenceOnCanvas(context!, 0, 0, 0, 30,
|
|
137
|
+
drawMoleculeDifferenceOnCanvas(context!, 0, 0, 0, 30,
|
|
138
|
+
wu(subParts1.canonicals).toArray(), wu(subParts2.canonicals).toArray(), units, true, molDifferences);
|
|
139
139
|
return canvas;
|
|
140
140
|
}
|
|
141
141
|
|
|
@@ -7,7 +7,7 @@ import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
|
7
7
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
8
8
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
|
-
import {
|
|
10
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
11
11
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
12
12
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
13
13
|
import {DistanceMatrixService, dmLinearIndex} from '@datagrok-libraries/ml/src/distance-matrix';
|
|
@@ -29,8 +29,8 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
29
29
|
return;
|
|
30
30
|
if (this.dataFrame) {
|
|
31
31
|
if (computeData && this.moleculeColumn) {
|
|
32
|
-
const
|
|
33
|
-
await (
|
|
32
|
+
const sh = SeqHandler.forColumn(this.moleculeColumn);
|
|
33
|
+
await (sh.isFasta() ? this.computeByMM() : this.computeByChem());
|
|
34
34
|
|
|
35
35
|
const diverseColumnName: string = this.diverseColumnLabel != null ? this.diverseColumnLabel :
|
|
36
36
|
`diverse (${this.moleculeColumnName})`;
|
|
@@ -14,7 +14,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
|
|
|
14
14
|
fingerprint: string;
|
|
15
15
|
metricsProperties = ['distanceMetric', 'fingerprint'];
|
|
16
16
|
fingerprintChoices = ['Morgan', 'Pattern'];
|
|
17
|
-
moleculeColumn?: DG.Column
|
|
17
|
+
moleculeColumn?: DG.Column<string>;
|
|
18
18
|
moleculeColumnName: string;
|
|
19
19
|
initialized: boolean = false;
|
|
20
20
|
tags = [DG.TAGS.UNITS, bioTAGS.aligned, bioTAGS.separator, bioTAGS.alphabet];
|
|
@@ -51,7 +51,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
|
|
|
51
51
|
.subscribe((_: any) => this.render(false)));
|
|
52
52
|
this.subs.push(DG.debounce(ui.onSizeChanged(this.root), 50)
|
|
53
53
|
.subscribe((_: any) => this.render(false)));
|
|
54
|
-
this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE)
|
|
54
|
+
this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE) as DG.Column<string>;
|
|
55
55
|
this.moleculeColumnName = this.moleculeColumn?.name!;
|
|
56
56
|
this.getProperty('limit')!.fromOptions({min: 1, max: this.dataFrame.rowCount});
|
|
57
57
|
}
|
|
@@ -7,7 +7,7 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
|
7
7
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
8
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
|
-
import {
|
|
10
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
11
11
|
import {alignSequencePair} from '@datagrok-libraries/bio/src/utils/macromolecule/alignment';
|
|
12
12
|
import {KnnResult, SparseMatrixService} from '@datagrok-libraries/ml/src/distance-matrix/sparse-matrix-service';
|
|
13
13
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
@@ -50,9 +50,9 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
50
50
|
this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
51
51
|
if (computeData && !this.gridSelect) {
|
|
52
52
|
this.targetMoleculeIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
53
|
-
const
|
|
53
|
+
const sh = SeqHandler.forColumn(this.moleculeColumn!);
|
|
54
54
|
|
|
55
|
-
await (!
|
|
55
|
+
await (!sh.isHelm() ? this.computeByMM() : this.computeByChem());
|
|
56
56
|
const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
|
|
57
57
|
`similar (${this.moleculeColumnName})`;
|
|
58
58
|
this.molCol = DG.Column.string(similarColumnName,
|
|
@@ -121,13 +121,13 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
121
121
|
const propPanel = ui.div();
|
|
122
122
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
123
123
|
const molColName = this.molCol?.name!;
|
|
124
|
-
const
|
|
125
|
-
const
|
|
126
|
-
const
|
|
127
|
-
const subParts1 =
|
|
128
|
-
const subParts2 =
|
|
129
|
-
const alignment = alignSequencePair(
|
|
130
|
-
const canvas = createDifferenceCanvas(alignment.seq1Splitted, alignment.seq2Splitted,
|
|
124
|
+
const resCol: DG.Column<string> = resDf.col(molColName)!;
|
|
125
|
+
const molColSh = SeqHandler.forColumn(this.moleculeColumn!);
|
|
126
|
+
const resSh = SeqHandler.forColumn(resCol);
|
|
127
|
+
const subParts1 = molColSh.getSplitted(this.targetMoleculeIdx);
|
|
128
|
+
const subParts2 = resSh.getSplitted(resDf.currentRowIdx);
|
|
129
|
+
const alignment = alignSequencePair(subParts1, subParts2);
|
|
130
|
+
const canvas = createDifferenceCanvas(alignment.seq1Splitted, alignment.seq2Splitted, resSh.units, molDifferences);
|
|
131
131
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
132
132
|
if (subParts1.length !== subParts2.length) {
|
|
133
133
|
propPanel.append(ui.divV([
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import * as ui from 'datagrok-api/ui';
|
|
1
2
|
import * as DG from 'datagrok-api/dg';
|
|
2
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
3
5
|
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
4
6
|
import {mmDistanceFunctionArgs} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
|
|
5
|
-
import {
|
|
7
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
6
8
|
import {getMonomerSubstitutionMatrix} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
7
9
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
8
10
|
|
|
@@ -13,32 +15,35 @@ export interface ISequenceSpaceResult {
|
|
|
13
15
|
|
|
14
16
|
export async function getEncodedSeqSpaceCol(
|
|
15
17
|
seqCol: DG.Column, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames, fingerprintType: string = 'Morgan'
|
|
16
|
-
): Promise<{seqList:string[], options: {[_:string]: any}}> {
|
|
17
|
-
// encodes sequences using utf
|
|
18
|
-
const
|
|
19
|
-
const
|
|
20
|
-
const
|
|
21
|
-
const seqColLength = seqList.length;
|
|
18
|
+
): Promise<{ seqList: string[], options: { [_: string]: any } }> {
|
|
19
|
+
// encodes sequences using utf characters to also support multichar and non fasta sequences
|
|
20
|
+
const rowCount = seqCol.length;
|
|
21
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
22
|
+
const encList = Array<string>(rowCount);
|
|
22
23
|
let charCodeCounter = 36;
|
|
23
24
|
const charCodeMap = new Map<string, string>();
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
const seqColCats = seqCol.categories;
|
|
26
|
+
const seqColRawData = seqCol.getRawData();
|
|
27
|
+
for (let rowIdx = 0; rowIdx < rowCount; rowIdx++) {
|
|
28
|
+
const catI = seqColRawData[rowIdx];
|
|
29
|
+
const seq = seqColCats[catI];
|
|
30
|
+
if (seq === null || seqCol.isNone(rowIdx)) {
|
|
31
|
+
// @ts-ignore
|
|
32
|
+
encList[rowIdx] = null;
|
|
28
33
|
continue;
|
|
29
34
|
}
|
|
30
|
-
|
|
31
|
-
const splittedSeq =
|
|
35
|
+
encList[rowIdx] = '';
|
|
36
|
+
const splittedSeq = sh.getSplitted(rowIdx);
|
|
32
37
|
for (let j = 0; j < splittedSeq.length; j++) {
|
|
33
|
-
const char = splittedSeq
|
|
38
|
+
const char = splittedSeq.getCanonical(j);
|
|
34
39
|
if (!charCodeMap.has(char)) {
|
|
35
40
|
charCodeMap.set(char, String.fromCharCode(charCodeCounter));
|
|
36
41
|
charCodeCounter++;
|
|
37
42
|
}
|
|
38
|
-
|
|
43
|
+
encList[rowIdx] += charCodeMap.get(char)!;
|
|
39
44
|
}
|
|
40
45
|
}
|
|
41
|
-
let options = {};
|
|
46
|
+
let options = {} as mmDistanceFunctionArgs;
|
|
42
47
|
if (similarityMetric === MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE) {
|
|
43
48
|
const monomers = Array.from(charCodeMap.keys());
|
|
44
49
|
const monomerRes = await getMonomerSubstitutionMatrix(monomers, fingerprintType);
|
|
@@ -48,13 +53,12 @@ export async function getEncodedSeqSpaceCol(
|
|
|
48
53
|
monomerRes.scoringMatrix[i][j] = 1 - val;
|
|
49
54
|
});
|
|
50
55
|
});
|
|
51
|
-
const monomerHashToMatrixMap: {[_: string]: number} = {};
|
|
56
|
+
const monomerHashToMatrixMap: { [_: string]: number } = {};
|
|
52
57
|
Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
|
|
53
58
|
monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
|
|
54
59
|
});
|
|
55
60
|
// sets distance function args in place.
|
|
56
|
-
options = {scoringMatrix: monomerRes.scoringMatrix,
|
|
57
|
-
alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
|
|
61
|
+
options = {scoringMatrix: monomerRes.scoringMatrix, alphabetIndexes: monomerHashToMatrixMap};
|
|
58
62
|
} else if (similarityMetric === MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH) {
|
|
59
63
|
const monomers = Array.from(charCodeMap.keys());
|
|
60
64
|
const monomerRes = await getMonomerSubstitutionMatrix(monomers, fingerprintType);
|
|
@@ -64,13 +68,12 @@ export async function getEncodedSeqSpaceCol(
|
|
|
64
68
|
// monomerRes.scoringMatrix[i][j] = 1 - val;
|
|
65
69
|
// });
|
|
66
70
|
// });
|
|
67
|
-
const monomerHashToMatrixMap: {[_: string]: number} = {};
|
|
71
|
+
const monomerHashToMatrixMap: { [_: string]: number } = {};
|
|
68
72
|
Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
|
|
69
73
|
monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
|
|
70
74
|
});
|
|
71
75
|
// sets distance function args in place.
|
|
72
|
-
options = {scoringMatrix: monomerRes.scoringMatrix,
|
|
73
|
-
alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
|
|
76
|
+
options = {scoringMatrix: monomerRes.scoringMatrix, alphabetIndexes: monomerHashToMatrixMap};
|
|
74
77
|
}
|
|
75
|
-
return {seqList, options};
|
|
78
|
+
return {seqList: encList, options};
|
|
76
79
|
}
|
|
@@ -5,33 +5,32 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import wu from 'wu';
|
|
6
6
|
|
|
7
7
|
import {getHelmMonomers} from '../package';
|
|
8
|
-
import {
|
|
9
|
-
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
|
+
import {GAP_SYMBOL, ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
10
10
|
|
|
11
11
|
const V2000_ATOM_NAME_POS = 31;
|
|
12
12
|
|
|
13
13
|
export async function getMonomericMols(
|
|
14
14
|
mcol: DG.Column<string>, pattern: boolean = false, monomersDict?: Map<string, string>
|
|
15
15
|
): Promise<DG.Column> {
|
|
16
|
-
const
|
|
16
|
+
const sh = SeqHandler.forColumn(mcol);
|
|
17
17
|
let molV3000Array;
|
|
18
18
|
monomersDict ??= new Map();
|
|
19
|
-
const monomers =
|
|
20
|
-
getHelmMonomers(mcol) : Object.keys(
|
|
19
|
+
const monomers = sh.isHelm() ?
|
|
20
|
+
getHelmMonomers(mcol) : Object.keys(sh.stats.freq).filter((it) => it !== '');
|
|
21
21
|
|
|
22
22
|
for (let i = 0; i < monomers.length; i++) {
|
|
23
23
|
if (!monomersDict.has(monomers[i]))
|
|
24
24
|
monomersDict.set(monomers[i], `${monomersDict.size + 1}`);
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
-
if (
|
|
27
|
+
if (sh.isHelm()) {
|
|
28
28
|
molV3000Array = await grok.functions.call('HELM:getMolFiles', {col: mcol});
|
|
29
29
|
molV3000Array = changeV2000ToV3000(molV3000Array, monomersDict, pattern);
|
|
30
30
|
} else {
|
|
31
31
|
molV3000Array = new Array<string>(mcol.length);
|
|
32
32
|
for (let i = 0; i < mcol.length; i++) {
|
|
33
|
-
const
|
|
34
|
-
const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
|
|
33
|
+
const molV3000 = molV3000FromNonHelmSequence(sh.getSplitted(i), monomersDict, pattern);
|
|
35
34
|
molV3000Array[i] = molV3000;
|
|
36
35
|
}
|
|
37
36
|
}
|
|
@@ -51,9 +50,12 @@ M V30 BEGIN CTAB
|
|
|
51
50
|
molV3000 += 'M V30 BEGIN ATOM\n';
|
|
52
51
|
|
|
53
52
|
for (let atomRowI = 0; atomRowI < monomers.length; atomRowI++) {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
const cm: string = monomers.getCanonical(atomRowI);
|
|
54
|
+
if (cm !== GAP_SYMBOL) {
|
|
55
|
+
molV3000 += pattern ?
|
|
56
|
+
`M V30 ${atomRowI + 1} R${monomersDict.get(cm)} 0.000 0.000 0 0\n` :
|
|
57
|
+
`M V30 ${atomRowI + 1} At 0.000 0.000 0 0 MASS=${monomersDict.get(cm)}\n`;
|
|
58
|
+
}
|
|
57
59
|
}
|
|
58
60
|
|
|
59
61
|
molV3000 += 'M V30 END ATOM\n';
|
package/src/package.ts
CHANGED
|
@@ -12,7 +12,7 @@ import {BitArrayMetrics, KnownMetrics} from '@datagrok-libraries/ml/src/typed-me
|
|
|
12
12
|
import {
|
|
13
13
|
TAGS as bioTAGS,
|
|
14
14
|
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
15
|
-
import {
|
|
15
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
16
16
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
17
17
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
18
18
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
@@ -59,7 +59,7 @@ import {BioPackage, BioPackageProperties} from './package-types';
|
|
|
59
59
|
import {getCompositionAnalysisWidget} from './widgets/composition-analysis-widget';
|
|
60
60
|
import {MacromoleculeColumnWidget} from './utils/macromolecule-column-widget';
|
|
61
61
|
import {addCopyMenuUI} from './utils/context-menu';
|
|
62
|
-
import {
|
|
62
|
+
import {getPolyToolDialog} from './utils/poly-tool/ui';
|
|
63
63
|
import {PolyToolCsvLibHandler} from './utils/poly-tool/csv-to-json-monomer-lib-converter';
|
|
64
64
|
import {_setPeptideColumn} from './utils/poly-tool/utils';
|
|
65
65
|
import {getRegionDo} from './utils/get-region';
|
|
@@ -165,11 +165,11 @@ export function getBioLib(): IMonomerLib {
|
|
|
165
165
|
return MonomerLibManager.instance.getBioLib();
|
|
166
166
|
}
|
|
167
167
|
|
|
168
|
-
//name:
|
|
168
|
+
//name: getSeqHandler
|
|
169
169
|
//input: column sequence { semType: Macromolecule }
|
|
170
170
|
//output: object result
|
|
171
|
-
export function
|
|
172
|
-
return
|
|
171
|
+
export function getSeqHandler(sequence: DG.Column<string>): SeqHandler {
|
|
172
|
+
return SeqHandler.forColumn(sequence);
|
|
173
173
|
}
|
|
174
174
|
|
|
175
175
|
// -- Panels --
|
|
@@ -471,9 +471,8 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<s
|
|
|
471
471
|
})
|
|
472
472
|
.onCancel(() => { resolve(undefined); })
|
|
473
473
|
.show();
|
|
474
|
-
} else
|
|
474
|
+
} else
|
|
475
475
|
runCliffs().then((res) => resolve(res)).catch((err) => reject(err));
|
|
476
|
-
}
|
|
477
476
|
}).catch((err: any) => {
|
|
478
477
|
const [errMsg, errStack] = errInfo(err);
|
|
479
478
|
_package.logger.error(errMsg, undefined, errStack);
|
|
@@ -611,7 +610,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
611
610
|
if (col.semType != DG.SEMTYPE.MACROMOLECULE)
|
|
612
611
|
return false;
|
|
613
612
|
|
|
614
|
-
const
|
|
613
|
+
const _colSh = SeqHandler.forColumn(col);
|
|
615
614
|
// TODO: prevent for cyclic, branched or multiple chains in Helm
|
|
616
615
|
return true;
|
|
617
616
|
});
|
|
@@ -630,7 +629,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
630
629
|
return;
|
|
631
630
|
} else if (colList.length > 1) {
|
|
632
631
|
const colListNames: string [] = colList.map((col) => col.name);
|
|
633
|
-
const selectedCol = colList.find((c) => { return
|
|
632
|
+
const selectedCol = colList.find((c) => { return SeqHandler.forColumn(c).isMsa(); });
|
|
634
633
|
const colInput: DG.InputBase = ui.choiceInput(
|
|
635
634
|
'Column', selectedCol ? selectedCol.name : colListNames[0], colListNames);
|
|
636
635
|
ui.dialog({
|
|
@@ -647,9 +646,8 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
647
646
|
await handler(col);
|
|
648
647
|
})
|
|
649
648
|
.show();
|
|
650
|
-
} else
|
|
649
|
+
} else
|
|
651
650
|
col = colList[0];
|
|
652
|
-
}
|
|
653
651
|
|
|
654
652
|
if (!col)
|
|
655
653
|
return;
|
|
@@ -690,10 +688,9 @@ export function convertDialog() {
|
|
|
690
688
|
//name: polyTool
|
|
691
689
|
//description: Perform cyclization of polymers
|
|
692
690
|
export async function polyTool(): Promise<void> {
|
|
693
|
-
const polytool = new PolyTool();
|
|
694
691
|
let dialog: DG.Dialog;
|
|
695
692
|
try {
|
|
696
|
-
dialog = await
|
|
693
|
+
dialog = await getPolyToolDialog();
|
|
697
694
|
dialog.show();
|
|
698
695
|
} catch (err: any) {
|
|
699
696
|
grok.shell.warning('To run PolyTool, open a dataframe with macromolecules');
|
|
@@ -773,8 +770,8 @@ export async function splitToMonomersTopMenu(table: DG.DataFrame, sequence: DG.C
|
|
|
773
770
|
//name: Bio: getHelmMonomers
|
|
774
771
|
//input: column sequence {semType: Macromolecule}
|
|
775
772
|
export function getHelmMonomers(sequence: DG.Column<string>): string[] {
|
|
776
|
-
const
|
|
777
|
-
const stats =
|
|
773
|
+
const sh = SeqHandler.forColumn(sequence);
|
|
774
|
+
const stats = sh.stats;
|
|
778
775
|
return Object.keys(stats.freq);
|
|
779
776
|
}
|
|
780
777
|
|
|
@@ -4,10 +4,10 @@ import * as ui from 'datagrok-api/ui';
|
|
|
4
4
|
|
|
5
5
|
import wu from 'wu';
|
|
6
6
|
|
|
7
|
-
import {category, expect, test
|
|
7
|
+
import {category, delay, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
8
8
|
|
|
9
9
|
import {awaitGrid} from './utils';
|
|
10
|
-
import {WebLogoViewer} from '../viewers/web-logo-viewer';
|
|
10
|
+
import {Debounces, WebLogoViewer} from '../viewers/web-logo-viewer';
|
|
11
11
|
|
|
12
12
|
import {_package} from '../package-test';
|
|
13
13
|
|
|
@@ -20,12 +20,15 @@ category('WebLogo-layout', () => {
|
|
|
20
20
|
const wlViewer = await df.plot.fromType('WebLogo',
|
|
21
21
|
{sequenceColumnName: col.name}) as unknown as WebLogoViewer;
|
|
22
22
|
view.dockManager.dock(wlViewer);
|
|
23
|
+
|
|
24
|
+
await delay(Debounces.render * 2);
|
|
23
25
|
await wlViewer.awaitRendered();
|
|
24
26
|
await awaitGrid(view.grid);
|
|
25
27
|
|
|
26
28
|
const viewLayout = view.saveLayout();
|
|
27
29
|
const viewLayoutJsonStr = viewLayout.toJson();
|
|
28
30
|
view.loadLayout(viewLayout);
|
|
31
|
+
await delay(Debounces.render * 2);
|
|
29
32
|
await wlViewer.awaitRendered();
|
|
30
33
|
await awaitGrid(view.grid);
|
|
31
34
|
|
|
@@ -3,15 +3,15 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
|
|
4
4
|
import {category, expect, expectArray, test, testEvent} from '@datagrok-libraries/utils/src/test';
|
|
5
5
|
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
|
+
import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
|
+
|
|
6
9
|
import {
|
|
7
10
|
countForMonomerAtPosition,
|
|
8
11
|
PositionInfo as PI,
|
|
9
12
|
PositionMonomerInfo as PMI,
|
|
10
13
|
WebLogoViewer,
|
|
11
14
|
} from '../viewers/web-logo-viewer';
|
|
12
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
13
|
-
|
|
14
|
-
import {GAP_SYMBOL} from '../const';
|
|
15
15
|
|
|
16
16
|
const g: string = GAP_SYMBOL;
|
|
17
17
|
|
|
@@ -182,8 +182,8 @@ ATC-G-TTGC--
|
|
|
182
182
|
}
|
|
183
183
|
|
|
184
184
|
const atPI1: PI = resPosList[1];
|
|
185
|
-
const
|
|
186
|
-
const countAt1 = countForMonomerAtPosition(df,
|
|
185
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
186
|
+
const countAt1 = countForMonomerAtPosition(df, sh, df.filter, 'G', atPI1);
|
|
187
187
|
expect(countAt1, 5);
|
|
188
188
|
await wlViewer.awaitRendered();
|
|
189
189
|
});
|
package/src/tests/bio-tests.ts
CHANGED
|
@@ -4,16 +4,11 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
|
|
5
5
|
import {category, test, expect, expectObject, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {
|
|
7
|
-
getAlphabetSimilarity,
|
|
8
|
-
monomerToShort,
|
|
9
|
-
pickUpPalette,
|
|
10
|
-
splitterAsFasta,
|
|
11
|
-
splitterAsHelm,
|
|
7
|
+
NOTATION, getAlphabetSimilarity, monomerToShort, pickUpPalette, splitterAsFasta, splitterAsHelm,
|
|
12
8
|
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
13
9
|
import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
14
10
|
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
15
11
|
import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
|
|
16
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
17
12
|
import {getStatsForCol} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
18
13
|
|
|
19
14
|
import {GAP_SYMBOL} from '../const';
|
|
@@ -63,6 +58,8 @@ XZJ{}2
|
|
|
63
58
|
PEPTIDE1{meI}$$$$`;
|
|
64
59
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
65
60
|
const seqCol: DG.Column = df.getCol('seq')!;
|
|
61
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
62
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
66
63
|
const stats = getStatsForCol(seqCol, 1, splitterAsHelm);
|
|
67
64
|
|
|
68
65
|
expectObject(stats.freq, {
|
|
@@ -132,6 +129,8 @@ category('WebLogo.monomerToShort', () => {
|
|
|
132
129
|
export async function _testGetStats(csvDfN1: string) {
|
|
133
130
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
134
131
|
const seqCol: DG.Column = dfN1.col('seq')!;
|
|
132
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
133
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
135
134
|
const stats = getStatsForCol(seqCol, 5, splitterAsFasta);
|
|
136
135
|
|
|
137
136
|
expectObject(stats.freq, {
|
|
@@ -160,6 +159,8 @@ export async function _testGetAlphabetSimilarity() {
|
|
|
160
159
|
export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
161
160
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
162
161
|
const col: DG.Column = df.col('seq')!;
|
|
162
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
163
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
163
164
|
const cp = pickUpPalette(col);
|
|
164
165
|
|
|
165
166
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
@@ -168,6 +169,8 @@ export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
|
168
169
|
export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
169
170
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
|
|
170
171
|
const col: DG.Column = df.col('seq')!;
|
|
172
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
173
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
171
174
|
const cp = pickUpPalette(col);
|
|
172
175
|
|
|
173
176
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
@@ -176,6 +179,8 @@ export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
|
176
179
|
export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
177
180
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
|
|
178
181
|
const col: DG.Column = df.col('seq')!;
|
|
182
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
183
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
179
184
|
const cp = pickUpPalette(col);
|
|
180
185
|
|
|
181
186
|
expect(cp instanceof AminoacidsPalettes, true);
|
|
@@ -184,6 +189,8 @@ export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
|
184
189
|
export async function _testPickupPaletteX(csvDfX: string) {
|
|
185
190
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
|
|
186
191
|
const col: DG.Column = df.col('seq')!;
|
|
192
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
193
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
187
194
|
const cp = pickUpPalette(col);
|
|
188
195
|
|
|
189
196
|
expect(cp instanceof UnknownSeqPalette, true);
|
|
@@ -5,7 +5,7 @@ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src
|
|
|
5
5
|
|
|
6
6
|
import {ConverterFunc} from './types';
|
|
7
7
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
-
import {
|
|
8
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
9
|
|
|
10
10
|
// import {mmSemType} from '../const';
|
|
11
11
|
// import {importFasta} from '../package';
|
|
@@ -133,8 +133,8 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
|
|
|
133
133
|
throw new Error(`Argument 'separator' is mandatory for target notation '${tgtNotation.toString()}'.`);
|
|
134
134
|
|
|
135
135
|
return function(srcCol: DG.Column): DG.Column {
|
|
136
|
-
const
|
|
137
|
-
const resCol =
|
|
136
|
+
const converterSh = SeqHandler.forColumn(srcCol);
|
|
137
|
+
const resCol = converterSh.convert(tgtNotation, tgtSeparator);
|
|
138
138
|
expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
|
|
139
139
|
return resCol;
|
|
140
140
|
};
|
|
@@ -152,7 +152,7 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
|
|
|
152
152
|
const tgtCol: DG.Column = tgtDf.getCol('seq');
|
|
153
153
|
|
|
154
154
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
155
|
-
const
|
|
155
|
+
const _sh: SeqHandler = SeqHandler.forColumn(resCol);
|
|
156
156
|
}
|
|
157
157
|
|
|
158
158
|
// FASTA tests
|
|
@@ -4,7 +4,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
|
|
5
5
|
import {before, category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {ALPHABET, getAlphabet, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
-
import {
|
|
7
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
8
8
|
|
|
9
9
|
category('detectorsBenchmark', () => {
|
|
10
10
|
let detectFunc: DG.Func;
|
|
@@ -124,11 +124,11 @@ category('detectorsBenchmark', () => {
|
|
|
124
124
|
}
|
|
125
125
|
|
|
126
126
|
function checkDetectorRes(col: DG.Column, tgt: TgtType): void {
|
|
127
|
-
const
|
|
127
|
+
const sh = SeqHandler.forColumn(col);
|
|
128
128
|
expect(col.semType === tgt.semType, true);
|
|
129
|
-
expect(
|
|
130
|
-
expect(
|
|
131
|
-
expect(
|
|
129
|
+
expect(sh.notation === tgt.notation, true);
|
|
130
|
+
expect(sh.alphabet === tgt.alphabet, true);
|
|
131
|
+
expect(sh.separator === tgt.separator, true);
|
|
132
132
|
}
|
|
133
133
|
});
|
|
134
134
|
|