@datagrok/bio 2.11.42 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +1 -1
- package/detectors.js +11 -11
- package/dist/36.js +1 -1
- package/dist/36.js.map +1 -1
- package/dist/413.js +1 -1
- package/dist/413.js.map +1 -1
- package/dist/590.js +1 -1
- package/dist/590.js.map +1 -1
- package/dist/709.js +1 -1
- package/dist/709.js.map +1 -1
- package/dist/895.js +1 -1
- package/dist/895.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/package.json +10 -10
- package/src/analysis/sequence-activity-cliffs.ts +9 -9
- package/src/analysis/sequence-diversity-viewer.ts +3 -3
- package/src/analysis/sequence-search-base-viewer.ts +2 -2
- package/src/analysis/sequence-similarity-viewer.ts +10 -10
- package/src/analysis/sequence-space.ts +26 -23
- package/src/calculations/monomerLevelMols.ts +13 -11
- package/src/package.ts +8 -8
- package/src/tests/WebLogo-layout-tests.ts +5 -2
- package/src/tests/WebLogo-positions-test.ts +5 -5
- package/src/tests/bio-tests.ts +13 -6
- package/src/tests/converters-test.ts +4 -4
- package/src/tests/detectors-benchmark-tests.ts +5 -5
- package/src/tests/detectors-tests.ts +13 -13
- package/src/tests/fasta-export-tests.ts +10 -4
- package/src/tests/mm-distance-tests.ts +10 -10
- package/src/tests/msa-tests.ts +8 -15
- package/src/tests/renderers-monomer-placer.ts +3 -3
- package/src/tests/renderers-test.ts +6 -8
- package/src/tests/splitters-test.ts +14 -13
- package/src/tests/to-atomic-level-tests.ts +2 -2
- package/src/tests/units-handler-get-region.ts +4 -4
- package/src/tests/units-handler-splitted-tests.ts +19 -17
- package/src/tests/units-handler-tests.ts +32 -32
- package/src/utils/cell-renderer.ts +40 -34
- package/src/utils/check-input-column.ts +5 -5
- package/src/utils/context-menu.ts +9 -6
- package/src/utils/convert.ts +9 -9
- package/src/utils/get-region-func-editor.ts +11 -11
- package/src/utils/get-region.ts +10 -12
- package/src/utils/macromolecule-column-widget.ts +4 -3
- package/src/utils/monomer-lib/library-file-manager/event-manager.ts +1 -1
- package/src/utils/multiple-sequence-alignment-ui.ts +6 -6
- package/src/utils/pepsea.ts +1 -0
- package/src/utils/poly-tool/transformation.ts +3 -3
- package/src/utils/save-as-fasta.ts +14 -15
- package/src/utils/sequence-to-mol.ts +4 -4
- package/src/viewers/web-logo-viewer.ts +46 -54
- package/src/widgets/bio-substructure-filter.ts +3 -3
- package/src/widgets/composition-analysis-widget.ts +8 -8
package/package.json
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"friendlyName": "Bio",
|
|
4
4
|
"author": {
|
|
5
|
-
"name": "
|
|
6
|
-
"email": "
|
|
5
|
+
"name": "Aleksandr Tanas",
|
|
6
|
+
"email": "atanas@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.12.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,10 +34,10 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "5.
|
|
38
|
-
"@datagrok-libraries/chem-meta": "^1.2.
|
|
39
|
-
"@datagrok-libraries/ml": "^6.4.
|
|
40
|
-
"@datagrok-libraries/tutorials": "^1.3.
|
|
37
|
+
"@datagrok-libraries/bio": "5.40.0",
|
|
38
|
+
"@datagrok-libraries/chem-meta": "^1.2.3",
|
|
39
|
+
"@datagrok-libraries/ml": "^6.4.12",
|
|
40
|
+
"@datagrok-libraries/tutorials": "^1.3.12",
|
|
41
41
|
"ajv": "^8.12.0",
|
|
42
42
|
"ajv-errors": "^3.0.0",
|
|
43
43
|
"@datagrok-libraries/utils": "^4.1.44",
|
|
@@ -54,9 +54,9 @@
|
|
|
54
54
|
"wu": "latest"
|
|
55
55
|
},
|
|
56
56
|
"devDependencies": {
|
|
57
|
-
"@datagrok/chem": "^1.
|
|
58
|
-
"@datagrok/dendrogram": "^1.2.
|
|
59
|
-
"@datagrok/helm": "^2.1.
|
|
57
|
+
"@datagrok/chem": "^1.9.0",
|
|
58
|
+
"@datagrok/dendrogram": "^1.2.27",
|
|
59
|
+
"@datagrok/helm": "^2.1.30",
|
|
60
60
|
"@types/node": "^17.0.24",
|
|
61
61
|
"@types/wu": "latest",
|
|
62
62
|
"@typescript-eslint/eslint-plugin": "latest",
|
|
@@ -2,13 +2,15 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
5
7
|
import {ITooltipAndPanelParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
6
8
|
import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-metrics-methods';
|
|
7
9
|
import {AvailableMetrics, DistanceMetricsSubjects, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
10
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
9
11
|
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
10
12
|
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
|
-
import {
|
|
13
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
12
14
|
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
13
15
|
|
|
14
16
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
@@ -97,19 +99,16 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
|
|
|
97
99
|
|
|
98
100
|
propPanel.append(ui.divText(params.seqCol.name, {style: {fontWeight: 'bold'}}));
|
|
99
101
|
|
|
100
|
-
const sequencesArray = new Array<string>(2);
|
|
101
102
|
const activitiesArray = new Array<number>(2);
|
|
102
103
|
params.points.forEach((molIdx, idx) => {
|
|
103
|
-
sequencesArray[idx] = params.seqCol.get(molIdx);
|
|
104
104
|
activitiesArray[idx] = params.activityCol.get(molIdx);
|
|
105
105
|
});
|
|
106
106
|
|
|
107
107
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
108
|
-
const
|
|
109
|
-
const
|
|
110
|
-
const
|
|
111
|
-
const
|
|
112
|
-
const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
|
|
108
|
+
const sh = SeqHandler.forColumn(params.seqCol);
|
|
109
|
+
const subParts1 = sh.getSplitted(params.points[0]); // splitter(sequencesArray[0], {uh, rowIdx: -1});
|
|
110
|
+
const subParts2 = sh.getSplitted(params.points[1]); // splitter(sequencesArray[1], {uh, rowIdx: -1});
|
|
111
|
+
const canvas = createDifferenceCanvas(subParts1, subParts2, sh.units, molDifferences);
|
|
113
112
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
114
113
|
|
|
115
114
|
propPanel.append(createDifferencesWithPositions(molDifferences));
|
|
@@ -135,7 +134,8 @@ export function createDifferenceCanvas(
|
|
|
135
134
|
const canvas = document.createElement('canvas');
|
|
136
135
|
const context = canvas.getContext('2d');
|
|
137
136
|
canvas.height = 30;
|
|
138
|
-
drawMoleculeDifferenceOnCanvas(context!, 0, 0, 0, 30,
|
|
137
|
+
drawMoleculeDifferenceOnCanvas(context!, 0, 0, 0, 30,
|
|
138
|
+
wu(subParts1.canonicals).toArray(), wu(subParts2.canonicals).toArray(), units, true, molDifferences);
|
|
139
139
|
return canvas;
|
|
140
140
|
}
|
|
141
141
|
|
|
@@ -7,7 +7,7 @@ import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
|
7
7
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
8
8
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
|
-
import {
|
|
10
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
11
11
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
12
12
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
13
13
|
import {DistanceMatrixService, dmLinearIndex} from '@datagrok-libraries/ml/src/distance-matrix';
|
|
@@ -29,8 +29,8 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
29
29
|
return;
|
|
30
30
|
if (this.dataFrame) {
|
|
31
31
|
if (computeData && this.moleculeColumn) {
|
|
32
|
-
const
|
|
33
|
-
await (
|
|
32
|
+
const sh = SeqHandler.forColumn(this.moleculeColumn);
|
|
33
|
+
await (sh.isFasta() ? this.computeByMM() : this.computeByChem());
|
|
34
34
|
|
|
35
35
|
const diverseColumnName: string = this.diverseColumnLabel != null ? this.diverseColumnLabel :
|
|
36
36
|
`diverse (${this.moleculeColumnName})`;
|
|
@@ -14,7 +14,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
|
|
|
14
14
|
fingerprint: string;
|
|
15
15
|
metricsProperties = ['distanceMetric', 'fingerprint'];
|
|
16
16
|
fingerprintChoices = ['Morgan', 'Pattern'];
|
|
17
|
-
moleculeColumn?: DG.Column
|
|
17
|
+
moleculeColumn?: DG.Column<string>;
|
|
18
18
|
moleculeColumnName: string;
|
|
19
19
|
initialized: boolean = false;
|
|
20
20
|
tags = [DG.TAGS.UNITS, bioTAGS.aligned, bioTAGS.separator, bioTAGS.alphabet];
|
|
@@ -51,7 +51,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
|
|
|
51
51
|
.subscribe((_: any) => this.render(false)));
|
|
52
52
|
this.subs.push(DG.debounce(ui.onSizeChanged(this.root), 50)
|
|
53
53
|
.subscribe((_: any) => this.render(false)));
|
|
54
|
-
this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE)
|
|
54
|
+
this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE) as DG.Column<string>;
|
|
55
55
|
this.moleculeColumnName = this.moleculeColumn?.name!;
|
|
56
56
|
this.getProperty('limit')!.fromOptions({min: 1, max: this.dataFrame.rowCount});
|
|
57
57
|
}
|
|
@@ -7,7 +7,7 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
|
7
7
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
8
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
|
-
import {
|
|
10
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
11
11
|
import {alignSequencePair} from '@datagrok-libraries/bio/src/utils/macromolecule/alignment';
|
|
12
12
|
import {KnnResult, SparseMatrixService} from '@datagrok-libraries/ml/src/distance-matrix/sparse-matrix-service';
|
|
13
13
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
@@ -50,9 +50,9 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
50
50
|
this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
51
51
|
if (computeData && !this.gridSelect) {
|
|
52
52
|
this.targetMoleculeIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
53
|
-
const
|
|
53
|
+
const sh = SeqHandler.forColumn(this.moleculeColumn!);
|
|
54
54
|
|
|
55
|
-
await (!
|
|
55
|
+
await (!sh.isHelm() ? this.computeByMM() : this.computeByChem());
|
|
56
56
|
const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
|
|
57
57
|
`similar (${this.moleculeColumnName})`;
|
|
58
58
|
this.molCol = DG.Column.string(similarColumnName,
|
|
@@ -121,13 +121,13 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
121
121
|
const propPanel = ui.div();
|
|
122
122
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
123
123
|
const molColName = this.molCol?.name!;
|
|
124
|
-
const
|
|
125
|
-
const
|
|
126
|
-
const
|
|
127
|
-
const subParts1 =
|
|
128
|
-
const subParts2 =
|
|
129
|
-
const alignment = alignSequencePair(
|
|
130
|
-
const canvas = createDifferenceCanvas(alignment.seq1Splitted, alignment.seq2Splitted,
|
|
124
|
+
const resCol: DG.Column<string> = resDf.col(molColName)!;
|
|
125
|
+
const molColSh = SeqHandler.forColumn(this.moleculeColumn!);
|
|
126
|
+
const resSh = SeqHandler.forColumn(resCol);
|
|
127
|
+
const subParts1 = molColSh.getSplitted(this.targetMoleculeIdx);
|
|
128
|
+
const subParts2 = resSh.getSplitted(resDf.currentRowIdx);
|
|
129
|
+
const alignment = alignSequencePair(subParts1, subParts2);
|
|
130
|
+
const canvas = createDifferenceCanvas(alignment.seq1Splitted, alignment.seq2Splitted, resSh.units, molDifferences);
|
|
131
131
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
132
132
|
if (subParts1.length !== subParts2.length) {
|
|
133
133
|
propPanel.append(ui.divV([
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import * as ui from 'datagrok-api/ui';
|
|
1
2
|
import * as DG from 'datagrok-api/dg';
|
|
2
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
3
5
|
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
4
6
|
import {mmDistanceFunctionArgs} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
|
|
5
|
-
import {
|
|
7
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
6
8
|
import {getMonomerSubstitutionMatrix} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
7
9
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
8
10
|
|
|
@@ -13,32 +15,35 @@ export interface ISequenceSpaceResult {
|
|
|
13
15
|
|
|
14
16
|
export async function getEncodedSeqSpaceCol(
|
|
15
17
|
seqCol: DG.Column, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames, fingerprintType: string = 'Morgan'
|
|
16
|
-
): Promise<{seqList:string[], options: {[_:string]: any}}> {
|
|
17
|
-
// encodes sequences using utf
|
|
18
|
-
const
|
|
19
|
-
const
|
|
20
|
-
const
|
|
21
|
-
const seqColLength = seqList.length;
|
|
18
|
+
): Promise<{ seqList: string[], options: { [_: string]: any } }> {
|
|
19
|
+
// encodes sequences using utf characters to also support multichar and non fasta sequences
|
|
20
|
+
const rowCount = seqCol.length;
|
|
21
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
22
|
+
const encList = Array<string>(rowCount);
|
|
22
23
|
let charCodeCounter = 36;
|
|
23
24
|
const charCodeMap = new Map<string, string>();
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
const seqColCats = seqCol.categories;
|
|
26
|
+
const seqColRawData = seqCol.getRawData();
|
|
27
|
+
for (let rowIdx = 0; rowIdx < rowCount; rowIdx++) {
|
|
28
|
+
const catI = seqColRawData[rowIdx];
|
|
29
|
+
const seq = seqColCats[catI];
|
|
30
|
+
if (seq === null || seqCol.isNone(rowIdx)) {
|
|
31
|
+
// @ts-ignore
|
|
32
|
+
encList[rowIdx] = null;
|
|
28
33
|
continue;
|
|
29
34
|
}
|
|
30
|
-
|
|
31
|
-
const splittedSeq =
|
|
35
|
+
encList[rowIdx] = '';
|
|
36
|
+
const splittedSeq = sh.getSplitted(rowIdx);
|
|
32
37
|
for (let j = 0; j < splittedSeq.length; j++) {
|
|
33
|
-
const char = splittedSeq
|
|
38
|
+
const char = splittedSeq.getCanonical(j);
|
|
34
39
|
if (!charCodeMap.has(char)) {
|
|
35
40
|
charCodeMap.set(char, String.fromCharCode(charCodeCounter));
|
|
36
41
|
charCodeCounter++;
|
|
37
42
|
}
|
|
38
|
-
|
|
43
|
+
encList[rowIdx] += charCodeMap.get(char)!;
|
|
39
44
|
}
|
|
40
45
|
}
|
|
41
|
-
let options = {};
|
|
46
|
+
let options = {} as mmDistanceFunctionArgs;
|
|
42
47
|
if (similarityMetric === MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE) {
|
|
43
48
|
const monomers = Array.from(charCodeMap.keys());
|
|
44
49
|
const monomerRes = await getMonomerSubstitutionMatrix(monomers, fingerprintType);
|
|
@@ -48,13 +53,12 @@ export async function getEncodedSeqSpaceCol(
|
|
|
48
53
|
monomerRes.scoringMatrix[i][j] = 1 - val;
|
|
49
54
|
});
|
|
50
55
|
});
|
|
51
|
-
const monomerHashToMatrixMap: {[_: string]: number} = {};
|
|
56
|
+
const monomerHashToMatrixMap: { [_: string]: number } = {};
|
|
52
57
|
Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
|
|
53
58
|
monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
|
|
54
59
|
});
|
|
55
60
|
// sets distance function args in place.
|
|
56
|
-
options = {scoringMatrix: monomerRes.scoringMatrix,
|
|
57
|
-
alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
|
|
61
|
+
options = {scoringMatrix: monomerRes.scoringMatrix, alphabetIndexes: monomerHashToMatrixMap};
|
|
58
62
|
} else if (similarityMetric === MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH) {
|
|
59
63
|
const monomers = Array.from(charCodeMap.keys());
|
|
60
64
|
const monomerRes = await getMonomerSubstitutionMatrix(monomers, fingerprintType);
|
|
@@ -64,13 +68,12 @@ export async function getEncodedSeqSpaceCol(
|
|
|
64
68
|
// monomerRes.scoringMatrix[i][j] = 1 - val;
|
|
65
69
|
// });
|
|
66
70
|
// });
|
|
67
|
-
const monomerHashToMatrixMap: {[_: string]: number} = {};
|
|
71
|
+
const monomerHashToMatrixMap: { [_: string]: number } = {};
|
|
68
72
|
Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
|
|
69
73
|
monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
|
|
70
74
|
});
|
|
71
75
|
// sets distance function args in place.
|
|
72
|
-
options = {scoringMatrix: monomerRes.scoringMatrix,
|
|
73
|
-
alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
|
|
76
|
+
options = {scoringMatrix: monomerRes.scoringMatrix, alphabetIndexes: monomerHashToMatrixMap};
|
|
74
77
|
}
|
|
75
|
-
return {seqList, options};
|
|
78
|
+
return {seqList: encList, options};
|
|
76
79
|
}
|
|
@@ -5,33 +5,32 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import wu from 'wu';
|
|
6
6
|
|
|
7
7
|
import {getHelmMonomers} from '../package';
|
|
8
|
-
import {
|
|
9
|
-
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
|
+
import {GAP_SYMBOL, ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
10
10
|
|
|
11
11
|
const V2000_ATOM_NAME_POS = 31;
|
|
12
12
|
|
|
13
13
|
export async function getMonomericMols(
|
|
14
14
|
mcol: DG.Column<string>, pattern: boolean = false, monomersDict?: Map<string, string>
|
|
15
15
|
): Promise<DG.Column> {
|
|
16
|
-
const
|
|
16
|
+
const sh = SeqHandler.forColumn(mcol);
|
|
17
17
|
let molV3000Array;
|
|
18
18
|
monomersDict ??= new Map();
|
|
19
|
-
const monomers =
|
|
20
|
-
getHelmMonomers(mcol) : Object.keys(
|
|
19
|
+
const monomers = sh.isHelm() ?
|
|
20
|
+
getHelmMonomers(mcol) : Object.keys(sh.stats.freq).filter((it) => it !== '');
|
|
21
21
|
|
|
22
22
|
for (let i = 0; i < monomers.length; i++) {
|
|
23
23
|
if (!monomersDict.has(monomers[i]))
|
|
24
24
|
monomersDict.set(monomers[i], `${monomersDict.size + 1}`);
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
-
if (
|
|
27
|
+
if (sh.isHelm()) {
|
|
28
28
|
molV3000Array = await grok.functions.call('HELM:getMolFiles', {col: mcol});
|
|
29
29
|
molV3000Array = changeV2000ToV3000(molV3000Array, monomersDict, pattern);
|
|
30
30
|
} else {
|
|
31
31
|
molV3000Array = new Array<string>(mcol.length);
|
|
32
32
|
for (let i = 0; i < mcol.length; i++) {
|
|
33
|
-
const
|
|
34
|
-
const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
|
|
33
|
+
const molV3000 = molV3000FromNonHelmSequence(sh.getSplitted(i), monomersDict, pattern);
|
|
35
34
|
molV3000Array[i] = molV3000;
|
|
36
35
|
}
|
|
37
36
|
}
|
|
@@ -51,9 +50,12 @@ M V30 BEGIN CTAB
|
|
|
51
50
|
molV3000 += 'M V30 BEGIN ATOM\n';
|
|
52
51
|
|
|
53
52
|
for (let atomRowI = 0; atomRowI < monomers.length; atomRowI++) {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
const cm: string = monomers.getCanonical(atomRowI);
|
|
54
|
+
if (cm !== GAP_SYMBOL) {
|
|
55
|
+
molV3000 += pattern ?
|
|
56
|
+
`M V30 ${atomRowI + 1} R${monomersDict.get(cm)} 0.000 0.000 0 0\n` :
|
|
57
|
+
`M V30 ${atomRowI + 1} At 0.000 0.000 0 0 MASS=${monomersDict.get(cm)}\n`;
|
|
58
|
+
}
|
|
57
59
|
}
|
|
58
60
|
|
|
59
61
|
molV3000 += 'M V30 END ATOM\n';
|
package/src/package.ts
CHANGED
|
@@ -12,7 +12,7 @@ import {BitArrayMetrics, KnownMetrics} from '@datagrok-libraries/ml/src/typed-me
|
|
|
12
12
|
import {
|
|
13
13
|
TAGS as bioTAGS,
|
|
14
14
|
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
15
|
-
import {
|
|
15
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
16
16
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
17
17
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
18
18
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
@@ -165,11 +165,11 @@ export function getBioLib(): IMonomerLib {
|
|
|
165
165
|
return MonomerLibManager.instance.getBioLib();
|
|
166
166
|
}
|
|
167
167
|
|
|
168
|
-
//name:
|
|
168
|
+
//name: getSeqHandler
|
|
169
169
|
//input: column sequence { semType: Macromolecule }
|
|
170
170
|
//output: object result
|
|
171
|
-
export function
|
|
172
|
-
return
|
|
171
|
+
export function getSeqHandler(sequence: DG.Column<string>): SeqHandler {
|
|
172
|
+
return SeqHandler.forColumn(sequence);
|
|
173
173
|
}
|
|
174
174
|
|
|
175
175
|
// -- Panels --
|
|
@@ -611,7 +611,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
611
611
|
if (col.semType != DG.SEMTYPE.MACROMOLECULE)
|
|
612
612
|
return false;
|
|
613
613
|
|
|
614
|
-
const
|
|
614
|
+
const _colSh = SeqHandler.forColumn(col);
|
|
615
615
|
// TODO: prevent for cyclic, branched or multiple chains in Helm
|
|
616
616
|
return true;
|
|
617
617
|
});
|
|
@@ -630,7 +630,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
630
630
|
return;
|
|
631
631
|
} else if (colList.length > 1) {
|
|
632
632
|
const colListNames: string [] = colList.map((col) => col.name);
|
|
633
|
-
const selectedCol = colList.find((c) => { return
|
|
633
|
+
const selectedCol = colList.find((c) => { return SeqHandler.forColumn(c).isMsa(); });
|
|
634
634
|
const colInput: DG.InputBase = ui.choiceInput(
|
|
635
635
|
'Column', selectedCol ? selectedCol.name : colListNames[0], colListNames);
|
|
636
636
|
ui.dialog({
|
|
@@ -773,8 +773,8 @@ export async function splitToMonomersTopMenu(table: DG.DataFrame, sequence: DG.C
|
|
|
773
773
|
//name: Bio: getHelmMonomers
|
|
774
774
|
//input: column sequence {semType: Macromolecule}
|
|
775
775
|
export function getHelmMonomers(sequence: DG.Column<string>): string[] {
|
|
776
|
-
const
|
|
777
|
-
const stats =
|
|
776
|
+
const sh = SeqHandler.forColumn(sequence);
|
|
777
|
+
const stats = sh.stats;
|
|
778
778
|
return Object.keys(stats.freq);
|
|
779
779
|
}
|
|
780
780
|
|
|
@@ -4,10 +4,10 @@ import * as ui from 'datagrok-api/ui';
|
|
|
4
4
|
|
|
5
5
|
import wu from 'wu';
|
|
6
6
|
|
|
7
|
-
import {category, expect, test
|
|
7
|
+
import {category, delay, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
8
8
|
|
|
9
9
|
import {awaitGrid} from './utils';
|
|
10
|
-
import {WebLogoViewer} from '../viewers/web-logo-viewer';
|
|
10
|
+
import {Debounces, WebLogoViewer} from '../viewers/web-logo-viewer';
|
|
11
11
|
|
|
12
12
|
import {_package} from '../package-test';
|
|
13
13
|
|
|
@@ -20,12 +20,15 @@ category('WebLogo-layout', () => {
|
|
|
20
20
|
const wlViewer = await df.plot.fromType('WebLogo',
|
|
21
21
|
{sequenceColumnName: col.name}) as unknown as WebLogoViewer;
|
|
22
22
|
view.dockManager.dock(wlViewer);
|
|
23
|
+
|
|
24
|
+
await delay(Debounces.render * 2);
|
|
23
25
|
await wlViewer.awaitRendered();
|
|
24
26
|
await awaitGrid(view.grid);
|
|
25
27
|
|
|
26
28
|
const viewLayout = view.saveLayout();
|
|
27
29
|
const viewLayoutJsonStr = viewLayout.toJson();
|
|
28
30
|
view.loadLayout(viewLayout);
|
|
31
|
+
await delay(Debounces.render * 2);
|
|
29
32
|
await wlViewer.awaitRendered();
|
|
30
33
|
await awaitGrid(view.grid);
|
|
31
34
|
|
|
@@ -3,15 +3,15 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
|
|
4
4
|
import {category, expect, expectArray, test, testEvent} from '@datagrok-libraries/utils/src/test';
|
|
5
5
|
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
|
+
import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
|
+
|
|
6
9
|
import {
|
|
7
10
|
countForMonomerAtPosition,
|
|
8
11
|
PositionInfo as PI,
|
|
9
12
|
PositionMonomerInfo as PMI,
|
|
10
13
|
WebLogoViewer,
|
|
11
14
|
} from '../viewers/web-logo-viewer';
|
|
12
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
13
|
-
|
|
14
|
-
import {GAP_SYMBOL} from '../const';
|
|
15
15
|
|
|
16
16
|
const g: string = GAP_SYMBOL;
|
|
17
17
|
|
|
@@ -182,8 +182,8 @@ ATC-G-TTGC--
|
|
|
182
182
|
}
|
|
183
183
|
|
|
184
184
|
const atPI1: PI = resPosList[1];
|
|
185
|
-
const
|
|
186
|
-
const countAt1 = countForMonomerAtPosition(df,
|
|
185
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
186
|
+
const countAt1 = countForMonomerAtPosition(df, sh, df.filter, 'G', atPI1);
|
|
187
187
|
expect(countAt1, 5);
|
|
188
188
|
await wlViewer.awaitRendered();
|
|
189
189
|
});
|
package/src/tests/bio-tests.ts
CHANGED
|
@@ -4,16 +4,11 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
|
|
5
5
|
import {category, test, expect, expectObject, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {
|
|
7
|
-
getAlphabetSimilarity,
|
|
8
|
-
monomerToShort,
|
|
9
|
-
pickUpPalette,
|
|
10
|
-
splitterAsFasta,
|
|
11
|
-
splitterAsHelm,
|
|
7
|
+
NOTATION, getAlphabetSimilarity, monomerToShort, pickUpPalette, splitterAsFasta, splitterAsHelm,
|
|
12
8
|
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
13
9
|
import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
14
10
|
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
15
11
|
import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
|
|
16
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
17
12
|
import {getStatsForCol} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
18
13
|
|
|
19
14
|
import {GAP_SYMBOL} from '../const';
|
|
@@ -63,6 +58,8 @@ XZJ{}2
|
|
|
63
58
|
PEPTIDE1{meI}$$$$`;
|
|
64
59
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
65
60
|
const seqCol: DG.Column = df.getCol('seq')!;
|
|
61
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
62
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
66
63
|
const stats = getStatsForCol(seqCol, 1, splitterAsHelm);
|
|
67
64
|
|
|
68
65
|
expectObject(stats.freq, {
|
|
@@ -132,6 +129,8 @@ category('WebLogo.monomerToShort', () => {
|
|
|
132
129
|
export async function _testGetStats(csvDfN1: string) {
|
|
133
130
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
134
131
|
const seqCol: DG.Column = dfN1.col('seq')!;
|
|
132
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
133
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
135
134
|
const stats = getStatsForCol(seqCol, 5, splitterAsFasta);
|
|
136
135
|
|
|
137
136
|
expectObject(stats.freq, {
|
|
@@ -160,6 +159,8 @@ export async function _testGetAlphabetSimilarity() {
|
|
|
160
159
|
export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
161
160
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
162
161
|
const col: DG.Column = df.col('seq')!;
|
|
162
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
163
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
163
164
|
const cp = pickUpPalette(col);
|
|
164
165
|
|
|
165
166
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
@@ -168,6 +169,8 @@ export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
|
168
169
|
export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
169
170
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
|
|
170
171
|
const col: DG.Column = df.col('seq')!;
|
|
172
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
173
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
171
174
|
const cp = pickUpPalette(col);
|
|
172
175
|
|
|
173
176
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
@@ -176,6 +179,8 @@ export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
|
176
179
|
export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
177
180
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
|
|
178
181
|
const col: DG.Column = df.col('seq')!;
|
|
182
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
183
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
179
184
|
const cp = pickUpPalette(col);
|
|
180
185
|
|
|
181
186
|
expect(cp instanceof AminoacidsPalettes, true);
|
|
@@ -184,6 +189,8 @@ export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
|
184
189
|
export async function _testPickupPaletteX(csvDfX: string) {
|
|
185
190
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
|
|
186
191
|
const col: DG.Column = df.col('seq')!;
|
|
192
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
193
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
187
194
|
const cp = pickUpPalette(col);
|
|
188
195
|
|
|
189
196
|
expect(cp instanceof UnknownSeqPalette, true);
|
|
@@ -5,7 +5,7 @@ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src
|
|
|
5
5
|
|
|
6
6
|
import {ConverterFunc} from './types';
|
|
7
7
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
-
import {
|
|
8
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
9
|
|
|
10
10
|
// import {mmSemType} from '../const';
|
|
11
11
|
// import {importFasta} from '../package';
|
|
@@ -133,8 +133,8 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
|
|
|
133
133
|
throw new Error(`Argument 'separator' is mandatory for target notation '${tgtNotation.toString()}'.`);
|
|
134
134
|
|
|
135
135
|
return function(srcCol: DG.Column): DG.Column {
|
|
136
|
-
const
|
|
137
|
-
const resCol =
|
|
136
|
+
const converterSh = SeqHandler.forColumn(srcCol);
|
|
137
|
+
const resCol = converterSh.convert(tgtNotation, tgtSeparator);
|
|
138
138
|
expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
|
|
139
139
|
return resCol;
|
|
140
140
|
};
|
|
@@ -152,7 +152,7 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
|
|
|
152
152
|
const tgtCol: DG.Column = tgtDf.getCol('seq');
|
|
153
153
|
|
|
154
154
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
155
|
-
const
|
|
155
|
+
const _sh: SeqHandler = SeqHandler.forColumn(resCol);
|
|
156
156
|
}
|
|
157
157
|
|
|
158
158
|
// FASTA tests
|
|
@@ -4,7 +4,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
|
|
5
5
|
import {before, category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {ALPHABET, getAlphabet, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
-
import {
|
|
7
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
8
8
|
|
|
9
9
|
category('detectorsBenchmark', () => {
|
|
10
10
|
let detectFunc: DG.Func;
|
|
@@ -124,11 +124,11 @@ category('detectorsBenchmark', () => {
|
|
|
124
124
|
}
|
|
125
125
|
|
|
126
126
|
function checkDetectorRes(col: DG.Column, tgt: TgtType): void {
|
|
127
|
-
const
|
|
127
|
+
const sh = SeqHandler.forColumn(col);
|
|
128
128
|
expect(col.semType === tgt.semType, true);
|
|
129
|
-
expect(
|
|
130
|
-
expect(
|
|
131
|
-
expect(
|
|
129
|
+
expect(sh.notation === tgt.notation, true);
|
|
130
|
+
expect(sh.alphabet === tgt.alphabet, true);
|
|
131
|
+
expect(sh.separator === tgt.separator, true);
|
|
132
132
|
}
|
|
133
133
|
});
|
|
134
134
|
|
|
@@ -6,7 +6,7 @@ import {category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
|
6
6
|
|
|
7
7
|
import {importFasta} from '../package';
|
|
8
8
|
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
9
|
-
import {
|
|
9
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
10
10
|
|
|
11
11
|
/*
|
|
12
12
|
// snippet to list df columns of semType='Macromolecule' (false positive)
|
|
@@ -433,12 +433,12 @@ export async function _testPosList(list: string[], units: NOTATION,
|
|
|
433
433
|
if (separator)
|
|
434
434
|
expect(col.getTag(bioTAGS.separator), separator);
|
|
435
435
|
|
|
436
|
-
const
|
|
437
|
-
expect(
|
|
438
|
-
expect(
|
|
439
|
-
if (!
|
|
440
|
-
expect(
|
|
441
|
-
expect(
|
|
436
|
+
const sh = SeqHandler.forColumn(col);
|
|
437
|
+
expect(sh.getAlphabetSize(), alphabetSize);
|
|
438
|
+
expect(sh.getAlphabetIsMultichar(), alphabetIsMultichar);
|
|
439
|
+
if (!sh.isHelm()) {
|
|
440
|
+
expect(sh.aligned, aligned);
|
|
441
|
+
expect(sh.alphabet, alphabet);
|
|
442
442
|
}
|
|
443
443
|
}
|
|
444
444
|
|
|
@@ -461,12 +461,12 @@ export async function _testPos(
|
|
|
461
461
|
if (separator)
|
|
462
462
|
expect(col.getTag(bioTAGS.separator), separator);
|
|
463
463
|
|
|
464
|
-
const
|
|
465
|
-
expect(
|
|
466
|
-
expect(
|
|
467
|
-
if (!
|
|
468
|
-
expect(
|
|
469
|
-
expect(
|
|
464
|
+
const sh = SeqHandler.forColumn(col);
|
|
465
|
+
expect(sh.getAlphabetSize(), alphabetSize);
|
|
466
|
+
expect(sh.getAlphabetIsMultichar(), alphabetIsMultichar);
|
|
467
|
+
if (!sh.isHelm()) {
|
|
468
|
+
expect(sh.aligned, aligned);
|
|
469
|
+
expect(sh.alphabet, alphabet);
|
|
470
470
|
}
|
|
471
471
|
}
|
|
472
472
|
|