@datagrok/bio 2.11.41 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/README.md +1 -1
- package/detectors.js +11 -11
- package/dist/36.js +1 -1
- package/dist/36.js.map +1 -1
- package/dist/413.js +1 -1
- package/dist/413.js.map +1 -1
- package/dist/590.js +1 -1
- package/dist/590.js.map +1 -1
- package/dist/709.js +1 -1
- package/dist/709.js.map +1 -1
- package/dist/895.js +1 -1
- package/dist/895.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/package.json +10 -10
- package/src/analysis/sequence-activity-cliffs.ts +9 -9
- package/src/analysis/sequence-diversity-viewer.ts +3 -3
- package/src/analysis/sequence-search-base-viewer.ts +2 -2
- package/src/analysis/sequence-similarity-viewer.ts +10 -10
- package/src/analysis/sequence-space.ts +26 -23
- package/src/calculations/monomerLevelMols.ts +13 -11
- package/src/const.ts +5 -0
- package/src/package.ts +8 -8
- package/src/tests/WebLogo-layout-tests.ts +5 -2
- package/src/tests/WebLogo-positions-test.ts +20 -16
- package/src/tests/bio-tests.ts +19 -7
- package/src/tests/converters-test.ts +4 -4
- package/src/tests/detectors-benchmark-tests.ts +5 -5
- package/src/tests/detectors-tests.ts +13 -13
- package/src/tests/fasta-export-tests.ts +10 -4
- package/src/tests/mm-distance-tests.ts +10 -10
- package/src/tests/msa-tests.ts +8 -15
- package/src/tests/renderers-monomer-placer.ts +3 -3
- package/src/tests/renderers-test.ts +6 -8
- package/src/tests/splitters-test.ts +14 -13
- package/src/tests/to-atomic-level-tests.ts +2 -2
- package/src/tests/units-handler-get-region.ts +4 -4
- package/src/tests/units-handler-splitted-tests.ts +19 -17
- package/src/tests/units-handler-tests.ts +32 -32
- package/src/utils/cell-renderer.ts +40 -34
- package/src/utils/check-input-column.ts +5 -5
- package/src/utils/context-menu.ts +9 -6
- package/src/utils/convert.ts +9 -9
- package/src/utils/get-region-func-editor.ts +11 -11
- package/src/utils/get-region.ts +10 -12
- package/src/utils/macromolecule-column-widget.ts +9 -5
- package/src/utils/monomer-lib/library-file-manager/event-manager.ts +1 -1
- package/src/utils/multiple-sequence-alignment-ui.ts +6 -6
- package/src/utils/pepsea.ts +1 -0
- package/src/utils/poly-tool/transformation.ts +3 -3
- package/src/utils/save-as-fasta.ts +14 -15
- package/src/utils/sequence-to-mol.ts +4 -4
- package/src/viewers/web-logo-viewer.ts +95 -110
- package/src/widgets/bio-substructure-filter.ts +3 -3
- package/src/widgets/composition-analysis-widget.ts +26 -19
package/package.json
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"friendlyName": "Bio",
|
|
4
4
|
"author": {
|
|
5
|
-
"name": "
|
|
6
|
-
"email": "
|
|
5
|
+
"name": "Aleksandr Tanas",
|
|
6
|
+
"email": "atanas@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.12.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,10 +34,10 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "5.
|
|
38
|
-
"@datagrok-libraries/chem-meta": "^1.2.
|
|
39
|
-
"@datagrok-libraries/ml": "^6.4.
|
|
40
|
-
"@datagrok-libraries/tutorials": "^1.3.
|
|
37
|
+
"@datagrok-libraries/bio": "5.40.0",
|
|
38
|
+
"@datagrok-libraries/chem-meta": "^1.2.3",
|
|
39
|
+
"@datagrok-libraries/ml": "^6.4.12",
|
|
40
|
+
"@datagrok-libraries/tutorials": "^1.3.12",
|
|
41
41
|
"ajv": "^8.12.0",
|
|
42
42
|
"ajv-errors": "^3.0.0",
|
|
43
43
|
"@datagrok-libraries/utils": "^4.1.44",
|
|
@@ -54,9 +54,9 @@
|
|
|
54
54
|
"wu": "latest"
|
|
55
55
|
},
|
|
56
56
|
"devDependencies": {
|
|
57
|
-
"@datagrok/chem": "^1.
|
|
58
|
-
"@datagrok/dendrogram": "^1.2.
|
|
59
|
-
"@datagrok/helm": "^2.1.
|
|
57
|
+
"@datagrok/chem": "^1.9.0",
|
|
58
|
+
"@datagrok/dendrogram": "^1.2.27",
|
|
59
|
+
"@datagrok/helm": "^2.1.30",
|
|
60
60
|
"@types/node": "^17.0.24",
|
|
61
61
|
"@types/wu": "latest",
|
|
62
62
|
"@typescript-eslint/eslint-plugin": "latest",
|
|
@@ -2,13 +2,15 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
5
7
|
import {ITooltipAndPanelParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
6
8
|
import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-metrics-methods';
|
|
7
9
|
import {AvailableMetrics, DistanceMetricsSubjects, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
10
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
9
11
|
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
10
12
|
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
|
-
import {
|
|
13
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
12
14
|
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
13
15
|
|
|
14
16
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
@@ -97,19 +99,16 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
|
|
|
97
99
|
|
|
98
100
|
propPanel.append(ui.divText(params.seqCol.name, {style: {fontWeight: 'bold'}}));
|
|
99
101
|
|
|
100
|
-
const sequencesArray = new Array<string>(2);
|
|
101
102
|
const activitiesArray = new Array<number>(2);
|
|
102
103
|
params.points.forEach((molIdx, idx) => {
|
|
103
|
-
sequencesArray[idx] = params.seqCol.get(molIdx);
|
|
104
104
|
activitiesArray[idx] = params.activityCol.get(molIdx);
|
|
105
105
|
});
|
|
106
106
|
|
|
107
107
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
108
|
-
const
|
|
109
|
-
const
|
|
110
|
-
const
|
|
111
|
-
const
|
|
112
|
-
const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
|
|
108
|
+
const sh = SeqHandler.forColumn(params.seqCol);
|
|
109
|
+
const subParts1 = sh.getSplitted(params.points[0]); // splitter(sequencesArray[0], {uh, rowIdx: -1});
|
|
110
|
+
const subParts2 = sh.getSplitted(params.points[1]); // splitter(sequencesArray[1], {uh, rowIdx: -1});
|
|
111
|
+
const canvas = createDifferenceCanvas(subParts1, subParts2, sh.units, molDifferences);
|
|
113
112
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
114
113
|
|
|
115
114
|
propPanel.append(createDifferencesWithPositions(molDifferences));
|
|
@@ -135,7 +134,8 @@ export function createDifferenceCanvas(
|
|
|
135
134
|
const canvas = document.createElement('canvas');
|
|
136
135
|
const context = canvas.getContext('2d');
|
|
137
136
|
canvas.height = 30;
|
|
138
|
-
drawMoleculeDifferenceOnCanvas(context!, 0, 0, 0, 30,
|
|
137
|
+
drawMoleculeDifferenceOnCanvas(context!, 0, 0, 0, 30,
|
|
138
|
+
wu(subParts1.canonicals).toArray(), wu(subParts2.canonicals).toArray(), units, true, molDifferences);
|
|
139
139
|
return canvas;
|
|
140
140
|
}
|
|
141
141
|
|
|
@@ -7,7 +7,7 @@ import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
|
7
7
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
8
8
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
|
-
import {
|
|
10
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
11
11
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
12
12
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
13
13
|
import {DistanceMatrixService, dmLinearIndex} from '@datagrok-libraries/ml/src/distance-matrix';
|
|
@@ -29,8 +29,8 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
29
29
|
return;
|
|
30
30
|
if (this.dataFrame) {
|
|
31
31
|
if (computeData && this.moleculeColumn) {
|
|
32
|
-
const
|
|
33
|
-
await (
|
|
32
|
+
const sh = SeqHandler.forColumn(this.moleculeColumn);
|
|
33
|
+
await (sh.isFasta() ? this.computeByMM() : this.computeByChem());
|
|
34
34
|
|
|
35
35
|
const diverseColumnName: string = this.diverseColumnLabel != null ? this.diverseColumnLabel :
|
|
36
36
|
`diverse (${this.moleculeColumnName})`;
|
|
@@ -14,7 +14,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
|
|
|
14
14
|
fingerprint: string;
|
|
15
15
|
metricsProperties = ['distanceMetric', 'fingerprint'];
|
|
16
16
|
fingerprintChoices = ['Morgan', 'Pattern'];
|
|
17
|
-
moleculeColumn?: DG.Column
|
|
17
|
+
moleculeColumn?: DG.Column<string>;
|
|
18
18
|
moleculeColumnName: string;
|
|
19
19
|
initialized: boolean = false;
|
|
20
20
|
tags = [DG.TAGS.UNITS, bioTAGS.aligned, bioTAGS.separator, bioTAGS.alphabet];
|
|
@@ -51,7 +51,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
|
|
|
51
51
|
.subscribe((_: any) => this.render(false)));
|
|
52
52
|
this.subs.push(DG.debounce(ui.onSizeChanged(this.root), 50)
|
|
53
53
|
.subscribe((_: any) => this.render(false)));
|
|
54
|
-
this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE)
|
|
54
|
+
this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE) as DG.Column<string>;
|
|
55
55
|
this.moleculeColumnName = this.moleculeColumn?.name!;
|
|
56
56
|
this.getProperty('limit')!.fromOptions({min: 1, max: this.dataFrame.rowCount});
|
|
57
57
|
}
|
|
@@ -7,7 +7,7 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
|
7
7
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
8
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
|
-
import {
|
|
10
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
11
11
|
import {alignSequencePair} from '@datagrok-libraries/bio/src/utils/macromolecule/alignment';
|
|
12
12
|
import {KnnResult, SparseMatrixService} from '@datagrok-libraries/ml/src/distance-matrix/sparse-matrix-service';
|
|
13
13
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
@@ -50,9 +50,9 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
50
50
|
this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
51
51
|
if (computeData && !this.gridSelect) {
|
|
52
52
|
this.targetMoleculeIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
53
|
-
const
|
|
53
|
+
const sh = SeqHandler.forColumn(this.moleculeColumn!);
|
|
54
54
|
|
|
55
|
-
await (!
|
|
55
|
+
await (!sh.isHelm() ? this.computeByMM() : this.computeByChem());
|
|
56
56
|
const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
|
|
57
57
|
`similar (${this.moleculeColumnName})`;
|
|
58
58
|
this.molCol = DG.Column.string(similarColumnName,
|
|
@@ -121,13 +121,13 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
121
121
|
const propPanel = ui.div();
|
|
122
122
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
123
123
|
const molColName = this.molCol?.name!;
|
|
124
|
-
const
|
|
125
|
-
const
|
|
126
|
-
const
|
|
127
|
-
const subParts1 =
|
|
128
|
-
const subParts2 =
|
|
129
|
-
const alignment = alignSequencePair(
|
|
130
|
-
const canvas = createDifferenceCanvas(alignment.seq1Splitted, alignment.seq2Splitted,
|
|
124
|
+
const resCol: DG.Column<string> = resDf.col(molColName)!;
|
|
125
|
+
const molColSh = SeqHandler.forColumn(this.moleculeColumn!);
|
|
126
|
+
const resSh = SeqHandler.forColumn(resCol);
|
|
127
|
+
const subParts1 = molColSh.getSplitted(this.targetMoleculeIdx);
|
|
128
|
+
const subParts2 = resSh.getSplitted(resDf.currentRowIdx);
|
|
129
|
+
const alignment = alignSequencePair(subParts1, subParts2);
|
|
130
|
+
const canvas = createDifferenceCanvas(alignment.seq1Splitted, alignment.seq2Splitted, resSh.units, molDifferences);
|
|
131
131
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
132
132
|
if (subParts1.length !== subParts2.length) {
|
|
133
133
|
propPanel.append(ui.divV([
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import * as ui from 'datagrok-api/ui';
|
|
1
2
|
import * as DG from 'datagrok-api/dg';
|
|
2
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
3
5
|
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
4
6
|
import {mmDistanceFunctionArgs} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
|
|
5
|
-
import {
|
|
7
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
6
8
|
import {getMonomerSubstitutionMatrix} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
7
9
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
8
10
|
|
|
@@ -13,32 +15,35 @@ export interface ISequenceSpaceResult {
|
|
|
13
15
|
|
|
14
16
|
export async function getEncodedSeqSpaceCol(
|
|
15
17
|
seqCol: DG.Column, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames, fingerprintType: string = 'Morgan'
|
|
16
|
-
): Promise<{seqList:string[], options: {[_:string]: any}}> {
|
|
17
|
-
// encodes sequences using utf
|
|
18
|
-
const
|
|
19
|
-
const
|
|
20
|
-
const
|
|
21
|
-
const seqColLength = seqList.length;
|
|
18
|
+
): Promise<{ seqList: string[], options: { [_: string]: any } }> {
|
|
19
|
+
// encodes sequences using utf characters to also support multichar and non fasta sequences
|
|
20
|
+
const rowCount = seqCol.length;
|
|
21
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
22
|
+
const encList = Array<string>(rowCount);
|
|
22
23
|
let charCodeCounter = 36;
|
|
23
24
|
const charCodeMap = new Map<string, string>();
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
const seqColCats = seqCol.categories;
|
|
26
|
+
const seqColRawData = seqCol.getRawData();
|
|
27
|
+
for (let rowIdx = 0; rowIdx < rowCount; rowIdx++) {
|
|
28
|
+
const catI = seqColRawData[rowIdx];
|
|
29
|
+
const seq = seqColCats[catI];
|
|
30
|
+
if (seq === null || seqCol.isNone(rowIdx)) {
|
|
31
|
+
// @ts-ignore
|
|
32
|
+
encList[rowIdx] = null;
|
|
28
33
|
continue;
|
|
29
34
|
}
|
|
30
|
-
|
|
31
|
-
const splittedSeq =
|
|
35
|
+
encList[rowIdx] = '';
|
|
36
|
+
const splittedSeq = sh.getSplitted(rowIdx);
|
|
32
37
|
for (let j = 0; j < splittedSeq.length; j++) {
|
|
33
|
-
const char = splittedSeq
|
|
38
|
+
const char = splittedSeq.getCanonical(j);
|
|
34
39
|
if (!charCodeMap.has(char)) {
|
|
35
40
|
charCodeMap.set(char, String.fromCharCode(charCodeCounter));
|
|
36
41
|
charCodeCounter++;
|
|
37
42
|
}
|
|
38
|
-
|
|
43
|
+
encList[rowIdx] += charCodeMap.get(char)!;
|
|
39
44
|
}
|
|
40
45
|
}
|
|
41
|
-
let options = {};
|
|
46
|
+
let options = {} as mmDistanceFunctionArgs;
|
|
42
47
|
if (similarityMetric === MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE) {
|
|
43
48
|
const monomers = Array.from(charCodeMap.keys());
|
|
44
49
|
const monomerRes = await getMonomerSubstitutionMatrix(monomers, fingerprintType);
|
|
@@ -48,13 +53,12 @@ export async function getEncodedSeqSpaceCol(
|
|
|
48
53
|
monomerRes.scoringMatrix[i][j] = 1 - val;
|
|
49
54
|
});
|
|
50
55
|
});
|
|
51
|
-
const monomerHashToMatrixMap: {[_: string]: number} = {};
|
|
56
|
+
const monomerHashToMatrixMap: { [_: string]: number } = {};
|
|
52
57
|
Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
|
|
53
58
|
monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
|
|
54
59
|
});
|
|
55
60
|
// sets distance function args in place.
|
|
56
|
-
options = {scoringMatrix: monomerRes.scoringMatrix,
|
|
57
|
-
alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
|
|
61
|
+
options = {scoringMatrix: monomerRes.scoringMatrix, alphabetIndexes: monomerHashToMatrixMap};
|
|
58
62
|
} else if (similarityMetric === MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH) {
|
|
59
63
|
const monomers = Array.from(charCodeMap.keys());
|
|
60
64
|
const monomerRes = await getMonomerSubstitutionMatrix(monomers, fingerprintType);
|
|
@@ -64,13 +68,12 @@ export async function getEncodedSeqSpaceCol(
|
|
|
64
68
|
// monomerRes.scoringMatrix[i][j] = 1 - val;
|
|
65
69
|
// });
|
|
66
70
|
// });
|
|
67
|
-
const monomerHashToMatrixMap: {[_: string]: number} = {};
|
|
71
|
+
const monomerHashToMatrixMap: { [_: string]: number } = {};
|
|
68
72
|
Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
|
|
69
73
|
monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
|
|
70
74
|
});
|
|
71
75
|
// sets distance function args in place.
|
|
72
|
-
options = {scoringMatrix: monomerRes.scoringMatrix,
|
|
73
|
-
alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
|
|
76
|
+
options = {scoringMatrix: monomerRes.scoringMatrix, alphabetIndexes: monomerHashToMatrixMap};
|
|
74
77
|
}
|
|
75
|
-
return {seqList, options};
|
|
78
|
+
return {seqList: encList, options};
|
|
76
79
|
}
|
|
@@ -5,33 +5,32 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import wu from 'wu';
|
|
6
6
|
|
|
7
7
|
import {getHelmMonomers} from '../package';
|
|
8
|
-
import {
|
|
9
|
-
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
|
+
import {GAP_SYMBOL, ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
10
10
|
|
|
11
11
|
const V2000_ATOM_NAME_POS = 31;
|
|
12
12
|
|
|
13
13
|
export async function getMonomericMols(
|
|
14
14
|
mcol: DG.Column<string>, pattern: boolean = false, monomersDict?: Map<string, string>
|
|
15
15
|
): Promise<DG.Column> {
|
|
16
|
-
const
|
|
16
|
+
const sh = SeqHandler.forColumn(mcol);
|
|
17
17
|
let molV3000Array;
|
|
18
18
|
monomersDict ??= new Map();
|
|
19
|
-
const monomers =
|
|
20
|
-
getHelmMonomers(mcol) : Object.keys(
|
|
19
|
+
const monomers = sh.isHelm() ?
|
|
20
|
+
getHelmMonomers(mcol) : Object.keys(sh.stats.freq).filter((it) => it !== '');
|
|
21
21
|
|
|
22
22
|
for (let i = 0; i < monomers.length; i++) {
|
|
23
23
|
if (!monomersDict.has(monomers[i]))
|
|
24
24
|
monomersDict.set(monomers[i], `${monomersDict.size + 1}`);
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
-
if (
|
|
27
|
+
if (sh.isHelm()) {
|
|
28
28
|
molV3000Array = await grok.functions.call('HELM:getMolFiles', {col: mcol});
|
|
29
29
|
molV3000Array = changeV2000ToV3000(molV3000Array, monomersDict, pattern);
|
|
30
30
|
} else {
|
|
31
31
|
molV3000Array = new Array<string>(mcol.length);
|
|
32
32
|
for (let i = 0; i < mcol.length; i++) {
|
|
33
|
-
const
|
|
34
|
-
const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
|
|
33
|
+
const molV3000 = molV3000FromNonHelmSequence(sh.getSplitted(i), monomersDict, pattern);
|
|
35
34
|
molV3000Array[i] = molV3000;
|
|
36
35
|
}
|
|
37
36
|
}
|
|
@@ -51,9 +50,12 @@ M V30 BEGIN CTAB
|
|
|
51
50
|
molV3000 += 'M V30 BEGIN ATOM\n';
|
|
52
51
|
|
|
53
52
|
for (let atomRowI = 0; atomRowI < monomers.length; atomRowI++) {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
const cm: string = monomers.getCanonical(atomRowI);
|
|
54
|
+
if (cm !== GAP_SYMBOL) {
|
|
55
|
+
molV3000 += pattern ?
|
|
56
|
+
`M V30 ${atomRowI + 1} R${monomersDict.get(cm)} 0.000 0.000 0 0\n` :
|
|
57
|
+
`M V30 ${atomRowI + 1} At 0.000 0.000 0 0 MASS=${monomersDict.get(cm)}\n`;
|
|
58
|
+
}
|
|
57
59
|
}
|
|
58
60
|
|
|
59
61
|
molV3000 += 'M V30 END ATOM\n';
|
package/src/const.ts
ADDED
package/src/package.ts
CHANGED
|
@@ -12,7 +12,7 @@ import {BitArrayMetrics, KnownMetrics} from '@datagrok-libraries/ml/src/typed-me
|
|
|
12
12
|
import {
|
|
13
13
|
TAGS as bioTAGS,
|
|
14
14
|
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
15
|
-
import {
|
|
15
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
16
16
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
17
17
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
18
18
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
@@ -165,11 +165,11 @@ export function getBioLib(): IMonomerLib {
|
|
|
165
165
|
return MonomerLibManager.instance.getBioLib();
|
|
166
166
|
}
|
|
167
167
|
|
|
168
|
-
//name:
|
|
168
|
+
//name: getSeqHandler
|
|
169
169
|
//input: column sequence { semType: Macromolecule }
|
|
170
170
|
//output: object result
|
|
171
|
-
export function
|
|
172
|
-
return
|
|
171
|
+
export function getSeqHandler(sequence: DG.Column<string>): SeqHandler {
|
|
172
|
+
return SeqHandler.forColumn(sequence);
|
|
173
173
|
}
|
|
174
174
|
|
|
175
175
|
// -- Panels --
|
|
@@ -611,7 +611,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
611
611
|
if (col.semType != DG.SEMTYPE.MACROMOLECULE)
|
|
612
612
|
return false;
|
|
613
613
|
|
|
614
|
-
const
|
|
614
|
+
const _colSh = SeqHandler.forColumn(col);
|
|
615
615
|
// TODO: prevent for cyclic, branched or multiple chains in Helm
|
|
616
616
|
return true;
|
|
617
617
|
});
|
|
@@ -630,7 +630,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
630
630
|
return;
|
|
631
631
|
} else if (colList.length > 1) {
|
|
632
632
|
const colListNames: string [] = colList.map((col) => col.name);
|
|
633
|
-
const selectedCol = colList.find((c) => { return
|
|
633
|
+
const selectedCol = colList.find((c) => { return SeqHandler.forColumn(c).isMsa(); });
|
|
634
634
|
const colInput: DG.InputBase = ui.choiceInput(
|
|
635
635
|
'Column', selectedCol ? selectedCol.name : colListNames[0], colListNames);
|
|
636
636
|
ui.dialog({
|
|
@@ -773,8 +773,8 @@ export async function splitToMonomersTopMenu(table: DG.DataFrame, sequence: DG.C
|
|
|
773
773
|
//name: Bio: getHelmMonomers
|
|
774
774
|
//input: column sequence {semType: Macromolecule}
|
|
775
775
|
export function getHelmMonomers(sequence: DG.Column<string>): string[] {
|
|
776
|
-
const
|
|
777
|
-
const stats =
|
|
776
|
+
const sh = SeqHandler.forColumn(sequence);
|
|
777
|
+
const stats = sh.stats;
|
|
778
778
|
return Object.keys(stats.freq);
|
|
779
779
|
}
|
|
780
780
|
|
|
@@ -4,10 +4,10 @@ import * as ui from 'datagrok-api/ui';
|
|
|
4
4
|
|
|
5
5
|
import wu from 'wu';
|
|
6
6
|
|
|
7
|
-
import {category, expect, test
|
|
7
|
+
import {category, delay, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
8
8
|
|
|
9
9
|
import {awaitGrid} from './utils';
|
|
10
|
-
import {WebLogoViewer} from '../viewers/web-logo-viewer';
|
|
10
|
+
import {Debounces, WebLogoViewer} from '../viewers/web-logo-viewer';
|
|
11
11
|
|
|
12
12
|
import {_package} from '../package-test';
|
|
13
13
|
|
|
@@ -20,12 +20,15 @@ category('WebLogo-layout', () => {
|
|
|
20
20
|
const wlViewer = await df.plot.fromType('WebLogo',
|
|
21
21
|
{sequenceColumnName: col.name}) as unknown as WebLogoViewer;
|
|
22
22
|
view.dockManager.dock(wlViewer);
|
|
23
|
+
|
|
24
|
+
await delay(Debounces.render * 2);
|
|
23
25
|
await wlViewer.awaitRendered();
|
|
24
26
|
await awaitGrid(view.grid);
|
|
25
27
|
|
|
26
28
|
const viewLayout = view.saveLayout();
|
|
27
29
|
const viewLayoutJsonStr = viewLayout.toJson();
|
|
28
30
|
view.loadLayout(viewLayout);
|
|
31
|
+
await delay(Debounces.render * 2);
|
|
29
32
|
await wlViewer.awaitRendered();
|
|
30
33
|
await awaitGrid(view.grid);
|
|
31
34
|
|
|
@@ -3,13 +3,17 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
|
|
4
4
|
import {category, expect, expectArray, test, testEvent} from '@datagrok-libraries/utils/src/test';
|
|
5
5
|
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
|
+
import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
|
+
|
|
6
9
|
import {
|
|
7
10
|
countForMonomerAtPosition,
|
|
8
11
|
PositionInfo as PI,
|
|
9
12
|
PositionMonomerInfo as PMI,
|
|
10
13
|
WebLogoViewer,
|
|
11
14
|
} from '../viewers/web-logo-viewer';
|
|
12
|
-
|
|
15
|
+
|
|
16
|
+
const g: string = GAP_SYMBOL;
|
|
13
17
|
|
|
14
18
|
category('WebLogo-positions', () => {
|
|
15
19
|
const csvDf1 = `seq
|
|
@@ -36,18 +40,18 @@ ATC-G-TTGC--
|
|
|
36
40
|
const positions: PI[] = wlViewer['positions'];
|
|
37
41
|
|
|
38
42
|
const resAllDf1: PI[] = [
|
|
39
|
-
new PI(0, '1', {'A': new PMI(2),
|
|
43
|
+
new PI(0, '1', {'A': new PMI(2), [g]: new PMI(3)}),
|
|
40
44
|
new PI(1, '2', {'T': new PMI(5)}),
|
|
41
45
|
new PI(2, '3', {'C': new PMI(5)}),
|
|
42
|
-
new PI(3, '4', {
|
|
46
|
+
new PI(3, '4', {[g]: new PMI(5)}),
|
|
43
47
|
new PI(4, '5', {'G': new PMI(5)}),
|
|
44
|
-
new PI(5, '6', {
|
|
48
|
+
new PI(5, '6', {[g]: new PMI(3), 'C': new PMI(2)}),
|
|
45
49
|
new PI(6, '7', {'T': new PMI(5)}),
|
|
46
50
|
new PI(7, '8', {'T': new PMI(5)}),
|
|
47
51
|
new PI(8, '9', {'G': new PMI(5)}),
|
|
48
52
|
new PI(9, '10', {'C': new PMI(5)}),
|
|
49
|
-
new PI(10, '11', {
|
|
50
|
-
new PI(11, '12', {
|
|
53
|
+
new PI(10, '11', {[g]: new PMI(5)}),
|
|
54
|
+
new PI(11, '12', {[g]: new PMI(5)}),
|
|
51
55
|
];
|
|
52
56
|
|
|
53
57
|
expect(positions.length, resAllDf1.length);
|
|
@@ -89,15 +93,15 @@ ATC-G-TTGC--
|
|
|
89
93
|
const positions: PI[] = wlViewer['positions'];
|
|
90
94
|
|
|
91
95
|
const resAllDf1: PI[] = [
|
|
92
|
-
new PI(0, '1', {
|
|
96
|
+
new PI(0, '1', {[g]: new PMI(3)}),
|
|
93
97
|
new PI(1, '2', {'T': new PMI(3)}),
|
|
94
|
-
new PI(2, '3', {
|
|
95
|
-
new PI(3, '4', {
|
|
98
|
+
new PI(2, '3', {[g]: new PMI(3)}),
|
|
99
|
+
new PI(3, '4', {[g]: new PMI(3)}),
|
|
96
100
|
new PI(4, '5', {'C': new PMI(3)}),
|
|
97
|
-
new PI(5, '6', {
|
|
101
|
+
new PI(5, '6', {[g]: new PMI(2), 'C': new PMI(1)}),
|
|
98
102
|
new PI(6, '7', {'G': new PMI(3)}),
|
|
99
103
|
new PI(7, '8', {'T': new PMI(3)}),
|
|
100
|
-
new PI(8, '9', {
|
|
104
|
+
new PI(8, '9', {[g]: new PMI(3)}),
|
|
101
105
|
];
|
|
102
106
|
|
|
103
107
|
expect(positions.length, resAllDf1.length);
|
|
@@ -128,11 +132,11 @@ ATC-G-TTGC--
|
|
|
128
132
|
const resPosList: PI[] = wlViewer['positions'];
|
|
129
133
|
|
|
130
134
|
const tgtPosList: PI[] = [
|
|
131
|
-
new PI(0, '1', {'A': new PMI(2),
|
|
135
|
+
new PI(0, '1', {'A': new PMI(2), [g]: new PMI(3)}),
|
|
132
136
|
new PI(1, '2', {'T': new PMI(5)}),
|
|
133
137
|
new PI(2, '3', {'C': new PMI(5)}),
|
|
134
138
|
new PI(4, '5', {'G': new PMI(5)}),
|
|
135
|
-
new PI(5, '6', {
|
|
139
|
+
new PI(5, '6', {[g]: new PMI(3), 'C': new PMI(2)}),
|
|
136
140
|
new PI(6, '7', {'T': new PMI(5)}),
|
|
137
141
|
new PI(7, '8', {'T': new PMI(5)}),
|
|
138
142
|
new PI(8, '9', {'G': new PMI(5)}),
|
|
@@ -166,7 +170,7 @@ ATC-G-TTGC--
|
|
|
166
170
|
const tgtPosList: PI[] = [
|
|
167
171
|
new PI(2, '3', {'C': new PMI(5)}),
|
|
168
172
|
new PI(4, '5', {'G': new PMI(5)}),
|
|
169
|
-
new PI(5, '6', {
|
|
173
|
+
new PI(5, '6', {[g]: new PMI(3), 'C': new PMI(2)}),
|
|
170
174
|
new PI(6, '7', {'T': new PMI(5)}),
|
|
171
175
|
];
|
|
172
176
|
|
|
@@ -178,8 +182,8 @@ ATC-G-TTGC--
|
|
|
178
182
|
}
|
|
179
183
|
|
|
180
184
|
const atPI1: PI = resPosList[1];
|
|
181
|
-
const
|
|
182
|
-
const countAt1 = countForMonomerAtPosition(df,
|
|
185
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
186
|
+
const countAt1 = countForMonomerAtPosition(df, sh, df.filter, 'G', atPI1);
|
|
183
187
|
expect(countAt1, 5);
|
|
184
188
|
await wlViewer.awaitRendered();
|
|
185
189
|
});
|
package/src/tests/bio-tests.ts
CHANGED
|
@@ -4,18 +4,18 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
|
|
5
5
|
import {category, test, expect, expectObject, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {
|
|
7
|
-
getAlphabetSimilarity,
|
|
8
|
-
monomerToShort,
|
|
9
|
-
pickUpPalette,
|
|
10
|
-
splitterAsFasta,
|
|
11
|
-
splitterAsHelm,
|
|
7
|
+
NOTATION, getAlphabetSimilarity, monomerToShort, pickUpPalette, splitterAsFasta, splitterAsHelm,
|
|
12
8
|
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
13
9
|
import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
14
10
|
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
15
11
|
import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
|
|
16
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
17
12
|
import {getStatsForCol} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
18
13
|
|
|
14
|
+
import {GAP_SYMBOL} from '../const';
|
|
15
|
+
|
|
16
|
+
/** GAP_SYMBOL */
|
|
17
|
+
const g: string = GAP_SYMBOL;
|
|
18
|
+
|
|
19
19
|
category('bio', () => {
|
|
20
20
|
const csvDfN1: string = `seq
|
|
21
21
|
ACGTCT
|
|
@@ -58,6 +58,8 @@ XZJ{}2
|
|
|
58
58
|
PEPTIDE1{meI}$$$$`;
|
|
59
59
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
60
60
|
const seqCol: DG.Column = df.getCol('seq')!;
|
|
61
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
62
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
61
63
|
const stats = getStatsForCol(seqCol, 1, splitterAsHelm);
|
|
62
64
|
|
|
63
65
|
expectObject(stats.freq, {
|
|
@@ -127,6 +129,8 @@ category('WebLogo.monomerToShort', () => {
|
|
|
127
129
|
export async function _testGetStats(csvDfN1: string) {
|
|
128
130
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
129
131
|
const seqCol: DG.Column = dfN1.col('seq')!;
|
|
132
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
133
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
130
134
|
const stats = getStatsForCol(seqCol, 5, splitterAsFasta);
|
|
131
135
|
|
|
132
136
|
expectObject(stats.freq, {
|
|
@@ -144,7 +148,7 @@ export async function _testGetAlphabetSimilarity() {
|
|
|
144
148
|
'C': 3015,
|
|
145
149
|
'G': 3015,
|
|
146
150
|
'T': 2048,
|
|
147
|
-
|
|
151
|
+
[g]: 1000,
|
|
148
152
|
};
|
|
149
153
|
const alphabet: Set<string> = new Set(Object.keys(Nucleotides.Names));
|
|
150
154
|
const res = getAlphabetSimilarity(freq, alphabet);
|
|
@@ -155,6 +159,8 @@ export async function _testGetAlphabetSimilarity() {
|
|
|
155
159
|
export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
156
160
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
157
161
|
const col: DG.Column = df.col('seq')!;
|
|
162
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
163
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
158
164
|
const cp = pickUpPalette(col);
|
|
159
165
|
|
|
160
166
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
@@ -163,6 +169,8 @@ export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
|
163
169
|
export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
164
170
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
|
|
165
171
|
const col: DG.Column = df.col('seq')!;
|
|
172
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
173
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
166
174
|
const cp = pickUpPalette(col);
|
|
167
175
|
|
|
168
176
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
@@ -171,6 +179,8 @@ export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
|
171
179
|
export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
172
180
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
|
|
173
181
|
const col: DG.Column = df.col('seq')!;
|
|
182
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
183
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
174
184
|
const cp = pickUpPalette(col);
|
|
175
185
|
|
|
176
186
|
expect(cp instanceof AminoacidsPalettes, true);
|
|
@@ -179,6 +189,8 @@ export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
|
179
189
|
export async function _testPickupPaletteX(csvDfX: string) {
|
|
180
190
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
|
|
181
191
|
const col: DG.Column = df.col('seq')!;
|
|
192
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
193
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
182
194
|
const cp = pickUpPalette(col);
|
|
183
195
|
|
|
184
196
|
expect(cp instanceof UnknownSeqPalette, true);
|
|
@@ -5,7 +5,7 @@ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src
|
|
|
5
5
|
|
|
6
6
|
import {ConverterFunc} from './types';
|
|
7
7
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
-
import {
|
|
8
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
9
|
|
|
10
10
|
// import {mmSemType} from '../const';
|
|
11
11
|
// import {importFasta} from '../package';
|
|
@@ -133,8 +133,8 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
|
|
|
133
133
|
throw new Error(`Argument 'separator' is mandatory for target notation '${tgtNotation.toString()}'.`);
|
|
134
134
|
|
|
135
135
|
return function(srcCol: DG.Column): DG.Column {
|
|
136
|
-
const
|
|
137
|
-
const resCol =
|
|
136
|
+
const converterSh = SeqHandler.forColumn(srcCol);
|
|
137
|
+
const resCol = converterSh.convert(tgtNotation, tgtSeparator);
|
|
138
138
|
expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
|
|
139
139
|
return resCol;
|
|
140
140
|
};
|
|
@@ -152,7 +152,7 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
|
|
|
152
152
|
const tgtCol: DG.Column = tgtDf.getCol('seq');
|
|
153
153
|
|
|
154
154
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
155
|
-
const
|
|
155
|
+
const _sh: SeqHandler = SeqHandler.forColumn(resCol);
|
|
156
156
|
}
|
|
157
157
|
|
|
158
158
|
// FASTA tests
|