@datagrok/bio 2.8.4 → 2.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -2
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/dockerfiles/Dockerfile +5 -4
- package/package.json +2 -2
- package/src/analysis/sequence-activity-cliffs.ts +8 -7
- package/src/analysis/sequence-similarity-viewer.ts +8 -8
- package/src/apps/web-logo-app.ts +26 -6
- package/src/calculations/monomerLevelMols.ts +6 -3
- package/src/package.ts +2 -1
- package/src/tests/converters-test.ts +1 -1
- package/src/tests/msa-tests.ts +2 -3
- package/src/tests/renderers-test.ts +37 -3
- package/src/tests/splitters-test.ts +27 -1
- package/src/tests/units-handler-splitted-tests.ts +19 -12
- package/src/tests/units-handler-tests.ts +15 -15
- package/src/utils/cell-renderer.ts +31 -22
- package/src/utils/monomer-cell-renderer.ts +14 -14
- package/src/utils/save-as-fasta.ts +1 -1
- package/src/utils/split-to-monomers.ts +40 -6
- package/src/viewers/vd-regions-viewer.ts +88 -51
- package/src/viewers/web-logo-viewer.ts +307 -310
- package/src/widgets/composition-analysis-widget.ts +6 -2
package/dockerfiles/Dockerfile
CHANGED
|
@@ -24,12 +24,12 @@ RUN savedAptMark="$(apt-mark showmanual)" ; \
|
|
|
24
24
|
; \
|
|
25
25
|
apt-mark auto '.*' > /dev/null ; \
|
|
26
26
|
[ -z "$savedAptMark" ] || apt-mark manual $savedAptMark ; \
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
wget https://mafft.cbrc.jp/alignment/software/mafft_7.520-1_amd64.deb -O mafft.deb; \
|
|
28
|
+
apt install -y ./mafft.deb; \
|
|
29
|
+
rm -rf mafft.deb; \
|
|
30
30
|
wget https://github.com/Merck/PepSeA/archive/refs/heads/main.zip -O PepSeA.zip; \
|
|
31
31
|
unzip -q PepSeA.zip -d /opt ; \
|
|
32
|
-
|
|
32
|
+
rm -rf PepSeA.zip; \
|
|
33
33
|
chown -R grok:grok /opt/PepSeA-main ; \
|
|
34
34
|
apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false ; \
|
|
35
35
|
apt-get clean ; \
|
|
@@ -57,6 +57,7 @@ def distout():\n\
|
|
|
57
57
|
\n\
|
|
58
58
|
return distout_output\n' >> /opt/PepSeA-main/alignment/api.py;
|
|
59
59
|
|
|
60
|
+
# It is important to run docker container as user and not as root
|
|
60
61
|
USER grok:grok
|
|
61
62
|
|
|
62
63
|
# Command source does not work for Docker, cause it will apply only to one layer
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.8.
|
|
8
|
+
"version": "2.8.6",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.36.0",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
39
39
|
"@datagrok-libraries/ml": "^6.3.39",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
@@ -7,7 +7,9 @@ import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-met
|
|
|
7
7
|
import {AvailableMetrics, DistanceMetricsSubjects, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
8
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
9
9
|
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
10
|
-
import {
|
|
10
|
+
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
12
|
+
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
11
13
|
|
|
12
14
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
13
15
|
const stringArray = col.toList();
|
|
@@ -103,12 +105,11 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
|
|
|
103
105
|
});
|
|
104
106
|
|
|
105
107
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
106
|
-
const
|
|
107
|
-
const
|
|
108
|
-
const splitter = getSplitter(units, separator);
|
|
108
|
+
const uh = UnitsHandler.getOrCreate(params.seqCol);
|
|
109
|
+
const splitter = uh.getSplitter();
|
|
109
110
|
const subParts1 = splitter(sequencesArray[0]);
|
|
110
111
|
const subParts2 = splitter(sequencesArray[1]);
|
|
111
|
-
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
112
|
+
const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
|
|
112
113
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
113
114
|
|
|
114
115
|
propPanel.append(createDifferencesWithPositions(molDifferences));
|
|
@@ -127,8 +128,8 @@ function createPropPanelField(name: string, value: number): HTMLDivElement {
|
|
|
127
128
|
}
|
|
128
129
|
|
|
129
130
|
export function createDifferenceCanvas(
|
|
130
|
-
subParts1:
|
|
131
|
-
subParts2:
|
|
131
|
+
subParts1: ISeqSplitted,
|
|
132
|
+
subParts2: ISeqSplitted,
|
|
132
133
|
units: string,
|
|
133
134
|
molDifferences: { [key: number]: HTMLCanvasElement }): HTMLCanvasElement {
|
|
134
135
|
const canvas = document.createElement('canvas');
|
|
@@ -7,7 +7,6 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
|
7
7
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
8
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
|
-
import {TAGS as bioTAGS, getSplitter} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
10
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
12
11
|
import {calcMmDistanceMatrix, dmLinearIndex} from './workers/mm-distance-worker-creator';
|
|
13
12
|
import {calculateMMDistancesArray} from './workers/mm-distance-array-service';
|
|
@@ -106,9 +105,10 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
106
105
|
const linearizeFunc = dmLinearIndex(len);
|
|
107
106
|
// array that keeps track of the indexes and scores together
|
|
108
107
|
const indexWScore = Array(len).fill(0)
|
|
109
|
-
.map((_, i) => ({
|
|
110
|
-
|
|
111
|
-
1 -
|
|
108
|
+
.map((_, i) => ({
|
|
109
|
+
idx: i, score: i === this.targetMoleculeIdx ? 1 :
|
|
110
|
+
this.preComputeDistanceMatrix ? 1 - this.mmDistanceMatrix[linearizeFunc(this.targetMoleculeIdx, i)] :
|
|
111
|
+
1 - distanceArray[i]
|
|
112
112
|
}));
|
|
113
113
|
indexWScore.sort((a, b) => b.score - a.score);
|
|
114
114
|
// get the most similar molecules
|
|
@@ -122,12 +122,12 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
122
122
|
const propPanel = ui.div();
|
|
123
123
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
124
124
|
const molColName = this.molCol?.name!;
|
|
125
|
-
const
|
|
126
|
-
const
|
|
127
|
-
const splitter = getSplitter(
|
|
125
|
+
const col = resDf.col(molColName)!;
|
|
126
|
+
const uh = UnitsHandler.getOrCreate(col);
|
|
127
|
+
const splitter = uh.getSplitter();
|
|
128
128
|
const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
|
|
129
129
|
const subParts2 = splitter(resDf.get(molColName, resDf.currentRowIdx));
|
|
130
|
-
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
130
|
+
const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
|
|
131
131
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
132
132
|
if (subParts1.length !== subParts2.length) {
|
|
133
133
|
propPanel.append(ui.divV([
|
package/src/apps/web-logo-app.ts
CHANGED
|
@@ -2,7 +2,12 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
5
7
|
import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
8
|
+
|
|
9
|
+
import {PROPS as wlPROPS} from '../viewers/web-logo-viewer';
|
|
10
|
+
|
|
6
11
|
import {_package} from '../package';
|
|
7
12
|
|
|
8
13
|
export class WebLogoApp {
|
|
@@ -11,7 +16,7 @@ export class WebLogoApp {
|
|
|
11
16
|
df: DG.DataFrame;
|
|
12
17
|
view: DG.TableView;
|
|
13
18
|
|
|
14
|
-
constructor() {}
|
|
19
|
+
constructor(private readonly urlParams: URLSearchParams) {}
|
|
15
20
|
|
|
16
21
|
async init(df: DG.DataFrame, funcName: string): Promise<void> {
|
|
17
22
|
this._funcName = funcName;
|
|
@@ -23,12 +28,27 @@ export class WebLogoApp {
|
|
|
23
28
|
// -- View --
|
|
24
29
|
|
|
25
30
|
async buildView(): Promise<void> {
|
|
26
|
-
|
|
27
|
-
|
|
31
|
+
const urlParamsTxt = wu(this.urlParams.entries())
|
|
32
|
+
.map(([key, value]) => `${key}=${encodeURIComponent(value)}`)
|
|
33
|
+
.toArray().join('&');
|
|
28
34
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
35
|
+
this.view = grok.shell.addTableView(this.df);
|
|
36
|
+
this.view.path = this.view.basePath = `func/${_package.name}.${this._funcName}?${urlParamsTxt}`;
|
|
37
|
+
|
|
38
|
+
const options: { [p: string]: any } = {sequenceColumnName: 'sequence'};
|
|
39
|
+
for (const [optName, optValue] of this.urlParams.entries()) {
|
|
40
|
+
switch (optName) {
|
|
41
|
+
// boolean
|
|
42
|
+
case wlPROPS.fixWidth:
|
|
43
|
+
case wlPROPS.fitArea:
|
|
44
|
+
options[optName] = ((v) => { return ['1', 'on', 'true'].includes(v.toLowerCase()); })(optValue);
|
|
45
|
+
break;
|
|
46
|
+
default:
|
|
47
|
+
options[optName] = optValue;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
const viewer: DG.Viewer & IWebLogoViewer = (await this.view.dataFrame.plot
|
|
51
|
+
.fromType('WebLogo', options)) as DG.Viewer & IWebLogoViewer;
|
|
32
52
|
this.view.dockManager.dock(viewer, DG.DOCK_TYPE.DOWN, null, 'WebLogo', 0.35);
|
|
33
53
|
}
|
|
34
54
|
}
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
4
7
|
import {getHelmMonomers} from '../package';
|
|
5
|
-
import {TAGS as bioTAGS, getSplitter, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
8
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
|
+
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
7
10
|
|
|
8
11
|
const V2000_ATOM_NAME_POS = 31;
|
|
9
12
|
|
|
@@ -27,7 +30,7 @@ export async function getMonomericMols(
|
|
|
27
30
|
} else {
|
|
28
31
|
molV3000Array = new Array<string>(mcol.length);
|
|
29
32
|
for (let i = 0; i < mcol.length; i++) {
|
|
30
|
-
const sequenceMonomers = uh.splitted[i].filter((it) => it !== '');
|
|
33
|
+
const sequenceMonomers = wu(uh.splitted[i]).filter((it) => it !== '').toArray();
|
|
31
34
|
const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
|
|
32
35
|
molV3000Array[i] = molV3000;
|
|
33
36
|
}
|
|
@@ -36,7 +39,7 @@ export async function getMonomericMols(
|
|
|
36
39
|
}
|
|
37
40
|
|
|
38
41
|
function molV3000FromNonHelmSequence(
|
|
39
|
-
monomers:
|
|
42
|
+
monomers: ISeqSplitted, monomersDict: Map<string, string>, pattern: boolean = false) {
|
|
40
43
|
let molV3000 = `
|
|
41
44
|
Datagrok macromolecule handler
|
|
42
45
|
|
package/src/package.ts
CHANGED
|
@@ -764,7 +764,8 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
|
764
764
|
export async function webLogoLargeApp(): Promise<void> {
|
|
765
765
|
const pi = DG.TaskBarProgressIndicator.create('WebLogo');
|
|
766
766
|
try {
|
|
767
|
-
const
|
|
767
|
+
const urlParams = new URLSearchParams(window.location.search);
|
|
768
|
+
const app = new WebLogoApp(urlParams);
|
|
768
769
|
const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_100000x5.csv');
|
|
769
770
|
await grok.data.detectSemanticTypes(df);
|
|
770
771
|
await app.init(df, 'webLogoLargeApp');
|
|
@@ -136,7 +136,7 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$`,
|
|
|
136
136
|
return function(srcCol: DG.Column): DG.Column {
|
|
137
137
|
const converter = new NotationConverter(srcCol);
|
|
138
138
|
const resCol = converter.convert(tgtNotation, tgtSeparator);
|
|
139
|
-
expect(resCol.getTag(
|
|
139
|
+
expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
|
|
140
140
|
return resCol;
|
|
141
141
|
};
|
|
142
142
|
}
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -7,7 +7,6 @@ import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-librarie
|
|
|
7
7
|
import {runKalign} from '../utils/multiple-sequence-alignment';
|
|
8
8
|
import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
|
|
9
9
|
import {awaitContainerStart} from './utils';
|
|
10
|
-
//import * as grok from 'datagrok-api/grok';
|
|
11
10
|
|
|
12
11
|
category('MSA', async () => {
|
|
13
12
|
//table = await grok.data.files.openTable('Demo:Files/bio/peptides.csv');
|
|
@@ -113,8 +112,8 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
|
|
|
113
112
|
srcCol.semType = semType;
|
|
114
113
|
|
|
115
114
|
const tgtCol: DG.Column = tgtDf.getCol('seq')!;
|
|
116
|
-
const
|
|
117
|
-
expectArray(
|
|
115
|
+
const resCol: DG.Column = await runKalign(srcCol, true);
|
|
116
|
+
expectArray(resCol.toList(), tgtCol.toList());
|
|
118
117
|
}
|
|
119
118
|
|
|
120
119
|
async function _testMSAOnColumn(
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
|
+
import $ from 'cash-dom';
|
|
5
|
+
|
|
4
6
|
import {category, expect, test, awaitCheck, delay} from '@datagrok-libraries/utils/src/test';
|
|
5
7
|
|
|
6
8
|
import {importFasta} from '../package';
|
|
@@ -47,8 +49,12 @@ category('renderers', () => {
|
|
|
47
49
|
await _selectRendererBySemType();
|
|
48
50
|
});
|
|
49
51
|
|
|
50
|
-
test('
|
|
51
|
-
await
|
|
52
|
+
test('setRendererManuallyBeforeAddColumn', async () => {
|
|
53
|
+
await _setRendererManuallyBeforeAddColumn();
|
|
54
|
+
}, {skipReason: 'GROK-11212'});
|
|
55
|
+
|
|
56
|
+
test('setRendererManuallyAfterAddColumn', async () => {
|
|
57
|
+
await _setRendererManuallyAfterAddColumn();
|
|
52
58
|
}, {skipReason: 'GROK-11212'});
|
|
53
59
|
|
|
54
60
|
async function _rendererMacromoleculeFasta() {
|
|
@@ -204,7 +210,7 @@ category('renderers', () => {
|
|
|
204
210
|
|
|
205
211
|
/** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
|
|
206
212
|
* https://reddata.atlassian.net/browse/GROK-11212 */
|
|
207
|
-
async function
|
|
213
|
+
async function _setRendererManuallyBeforeAddColumn() {
|
|
208
214
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
209
215
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
210
216
|
seqDiffCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
|
|
@@ -227,4 +233,32 @@ category('renderers', () => {
|
|
|
227
233
|
`instead of manual '${tgtCellRenderer}'.`);
|
|
228
234
|
}
|
|
229
235
|
}
|
|
236
|
+
|
|
237
|
+
/** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
|
|
238
|
+
* https://reddata.atlassian.net/browse/GROK-11212 */
|
|
239
|
+
async function _setRendererManuallyAfterAddColumn() {
|
|
240
|
+
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
241
|
+
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
242
|
+
seqDiffCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
|
|
243
|
+
seqDiffCol.setTag(bioTAGS.separator, '/');
|
|
244
|
+
seqDiffCol.setTag(bioTAGS.aligned, 'SEQ');
|
|
245
|
+
seqDiffCol.setTag(bioTAGS.alphabet, 'UN');
|
|
246
|
+
seqDiffCol.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
247
|
+
seqDiffCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
248
|
+
const tgtCellRenderer = 'MacromoleculeDifference';
|
|
249
|
+
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
250
|
+
await grok.data.detectSemanticTypes(df);
|
|
251
|
+
const tv = grok.shell.addTableView(df);
|
|
252
|
+
await awaitCheck(() => $(tv.root).find('.d4-grid canvas').length > 0, 'View grid canvas not found', 200);
|
|
253
|
+
|
|
254
|
+
seqDiffCol.setTag(DG.TAGS.CELL_RENDERER, tgtCellRenderer);
|
|
255
|
+
await awaitCheck(() => $(tv.root).find('.d4-grid canvas').length > 0, 'View grid canvas not found', 200);
|
|
256
|
+
|
|
257
|
+
const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
258
|
+
if (resCellRenderer !== tgtCellRenderer) { // this is value of MacromoleculeDifferenceCR.cellType
|
|
259
|
+
throw new Error(`Tag 'cell.renderer' has been manually set to '${tgtCellRenderer}' for column ` +
|
|
260
|
+
`but after df was added as table, tag 'cell.renderer' has reset to '${resCellRenderer}' ` +
|
|
261
|
+
`instead of manual '${tgtCellRenderer}'.`);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
230
264
|
});
|
|
@@ -14,8 +14,16 @@ import {
|
|
|
14
14
|
} from '@datagrok-libraries/utils/src/test';
|
|
15
15
|
import * as C from '../utils/constants';
|
|
16
16
|
import {_package, getHelmMonomers} from '../package';
|
|
17
|
-
import {
|
|
17
|
+
import {
|
|
18
|
+
TAGS as bioTAGS,
|
|
19
|
+
splitterAsFasta,
|
|
20
|
+
splitterAsHelm,
|
|
21
|
+
NOTATION
|
|
22
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
18
23
|
import {splitToMonomersUI} from '../utils/split-to-monomers';
|
|
24
|
+
import {SEMTYPE} from 'datagrok-api/dg';
|
|
25
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
26
|
+
import {TAGS} from '../utils/constants';
|
|
19
27
|
|
|
20
28
|
|
|
21
29
|
category('splitters', async () => {
|
|
@@ -35,6 +43,12 @@ category('splitters', async () => {
|
|
|
35
43
|
['M', 'MeI', 'Y', 'K', 'E', 'T', 'L', 'L', 'MeF', 'P',
|
|
36
44
|
'K', 'T', 'D', 'F', 'P', 'M', 'R', 'G', 'G', 'L', 'MeA'],
|
|
37
45
|
],
|
|
46
|
+
fastaFromHelm: [
|
|
47
|
+
'[meI][Pip][dK][Thr_PO3H2][L-hArg(Et,Et)][D-Tyr_Et][Tyr_ab-dehydroMe][dV]EN[D-Orn][D-aThr][Phe_4Me]',
|
|
48
|
+
['meI', 'Pip', 'dK', 'Thr_PO3H2', 'L-hArg(Et,Et)', 'D-Tyr_Et', 'Tyr_ab-dehydroMe', 'dV', 'E', 'N', 'D-Orn',
|
|
49
|
+
'D-aThr', 'Phe_4Me'],
|
|
50
|
+
],
|
|
51
|
+
|
|
38
52
|
helm1: [
|
|
39
53
|
'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$',
|
|
40
54
|
['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et',
|
|
@@ -68,6 +82,7 @@ category('splitters', async () => {
|
|
|
68
82
|
};
|
|
69
83
|
|
|
70
84
|
test('fastaMulti', async () => { await _testFastaSplitter(data.fastaMulti[0], data.fastaMulti[1]); });
|
|
85
|
+
test('fastaFromHelm', async () => { await _testFastaSplitter(data.fastaFromHelm[0], data.fastaFromHelm[1]); });
|
|
71
86
|
|
|
72
87
|
test('helm1', async () => { await _testHelmSplitter(data.helm1[0], data.helm1[1]); });
|
|
73
88
|
test('helm2', async () => { await _testHelmSplitter(data.helm2[0], data.helm2[1]); });
|
|
@@ -78,6 +93,7 @@ category('splitters', async () => {
|
|
|
78
93
|
test('testHelm2', async () => { await _testHelmSplitter(data.testHelm2[0], data.testHelm2[1]); });
|
|
79
94
|
test('testHelm3', async () => { await _testHelmSplitter(data.testHelm3[0], data.testHelm3[1]); });
|
|
80
95
|
|
|
96
|
+
|
|
81
97
|
test('splitToMonomers', async () => {
|
|
82
98
|
const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/MSA.csv');
|
|
83
99
|
|
|
@@ -122,6 +138,16 @@ PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
|
|
|
122
138
|
throw new Error(msgs.join(' '));
|
|
123
139
|
}
|
|
124
140
|
});
|
|
141
|
+
|
|
142
|
+
// test('helmAsFasta', async () => {
|
|
143
|
+
// // The columns can't be empty for UnitsHandler
|
|
144
|
+
// /* eslint-disable max-len */
|
|
145
|
+
// const srcSeq = '[meI][Pip][dK][Thr_PO3H2][L-hArg(Et,Et)][D-Tyr_Et][Tyr_ab-dehydroMe][dV]EN[D-Orn][D-aThr][Phe_4Me]';
|
|
146
|
+
// const tgtSeqA = ['meI', 'Pip', 'dK', 'Thr_PO3H2', 'L-hArg(Et,Et)', 'D-Tyr_Et', 'Tyr_ab-dehydroMe', 'dV', 'E', 'N', 'D-Orn', 'D-aThr', 'Phe_4Me'];
|
|
147
|
+
// /* eslint-enable max-len */
|
|
148
|
+
// const resSeqA = splitterAsFasta(srcSeq);
|
|
149
|
+
// expectArray(resSeqA, tgtSeqA);
|
|
150
|
+
// });
|
|
125
151
|
});
|
|
126
152
|
|
|
127
153
|
export async function _testFastaSplitter(src: string, tgt: string[]) {
|
|
@@ -2,11 +2,18 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
5
7
|
import {category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
6
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
|
+
import {GapSymbols, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
|
+
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
|
+
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
7
11
|
|
|
8
12
|
category('UnitsHandler', () => {
|
|
9
|
-
const
|
|
13
|
+
const fG = GapSymbols[NOTATION.FASTA];
|
|
14
|
+
const hG = GapSymbols[NOTATION.HELM];
|
|
15
|
+
const sG = GapSymbols[NOTATION.SEPARATOR];
|
|
16
|
+
const data: { [testName: string]: { src: { csv: string }, tgt: { splitted: (string[] | string)[] } } } = {
|
|
10
17
|
fasta: {
|
|
11
18
|
src: {
|
|
12
19
|
csv: `seq
|
|
@@ -16,9 +23,9 @@ TTCAAC`
|
|
|
16
23
|
},
|
|
17
24
|
tgt: {
|
|
18
25
|
splitted: [
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
26
|
+
'ACGTC',
|
|
27
|
+
'CAGTGT',
|
|
28
|
+
'TTCAAC',
|
|
22
29
|
]
|
|
23
30
|
}
|
|
24
31
|
},
|
|
@@ -32,9 +39,9 @@ ACCGTACT`,
|
|
|
32
39
|
tgt: {
|
|
33
40
|
splitted: [
|
|
34
41
|
//@formatter:off
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
42
|
+
'AC-GT-CT',
|
|
43
|
+
'CAC-T-GT',
|
|
44
|
+
'ACCGTACT',
|
|
38
45
|
//@formatter:on
|
|
39
46
|
]
|
|
40
47
|
}
|
|
@@ -65,8 +72,8 @@ rut12-rty-her2---wert`
|
|
|
65
72
|
tgt: {
|
|
66
73
|
splitted: [
|
|
67
74
|
['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
|
|
68
|
-
['rut12', 'her2', 'rty',
|
|
69
|
-
['rut12', 'rty', 'her2',
|
|
75
|
+
['rut12', 'her2', 'rty', sG, 'abc1', 'dfgg'],
|
|
76
|
+
['rut12', 'rty', 'her2', sG, sG, 'wert'],
|
|
70
77
|
]
|
|
71
78
|
}
|
|
72
79
|
},
|
|
@@ -99,8 +106,8 @@ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2}$$$$`
|
|
|
99
106
|
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
100
107
|
|
|
101
108
|
const uh = UnitsHandler.getOrCreate(col);
|
|
102
|
-
const
|
|
103
|
-
expectArray(
|
|
109
|
+
const resSplitted: ISeqSplitted[] = uh.splitted;
|
|
110
|
+
expectArray(resSplitted, testData.tgt.splitted);
|
|
104
111
|
});
|
|
105
112
|
}
|
|
106
113
|
});
|
|
@@ -41,25 +41,25 @@ category('UnitsHandler', () => {
|
|
|
41
41
|
});
|
|
42
42
|
|
|
43
43
|
test('Seq-Fasta-units', async () => {
|
|
44
|
-
const [_df, uh] = await
|
|
44
|
+
const [_df, uh] = await loadCsvWithDetection(seqDna);
|
|
45
45
|
expect(uh.notation, NOTATION.FASTA);
|
|
46
46
|
expect(uh.isMsa(), false);
|
|
47
47
|
});
|
|
48
48
|
|
|
49
49
|
test('Seq-Fasta-MSA-units', async () => {
|
|
50
|
-
const [_df, uh] = await
|
|
50
|
+
const [_df, uh] = await loadCsvWithDetection(seqDnaMsa);
|
|
51
51
|
expect(uh.notation, NOTATION.FASTA);
|
|
52
52
|
expect(uh.isMsa(), true);
|
|
53
53
|
});
|
|
54
54
|
|
|
55
55
|
test('Seq-Helm', async () => {
|
|
56
|
-
const [_df, uh] = await
|
|
56
|
+
const [_df, uh] = await loadCsvWithDetection(seqHelm);
|
|
57
57
|
expect(uh.notation, NOTATION.HELM);
|
|
58
58
|
expect(uh.isHelm(), true);
|
|
59
59
|
});
|
|
60
60
|
|
|
61
61
|
test('Seq-UN', async () => {
|
|
62
|
-
const [_df, uh] = await
|
|
62
|
+
const [_df, uh] = await loadCsvWithDetection(seqUn);
|
|
63
63
|
expect(uh.notation, NOTATION.SEPARATOR);
|
|
64
64
|
expect(uh.separator, '-');
|
|
65
65
|
expect(uh.alphabet, ALPHABET.UN);
|
|
@@ -79,15 +79,15 @@ category('UnitsHandler', () => {
|
|
|
79
79
|
return [df, uh];
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
-
async function loadCsvWithTag(csv: string, tag: string, value: string):
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
}
|
|
82
|
+
// async function loadCsvWithTag(csv: string, tag: string, value: string):
|
|
83
|
+
// Promise<[df: DG.DataFrame, uh: UnitsHandler]> {
|
|
84
|
+
// const df = DG.DataFrame.fromCsv(csv);
|
|
85
|
+
// const col = df.getCol('seq');
|
|
86
|
+
// col.setTag(tag, value);
|
|
87
|
+
// col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
88
|
+
// if (value === NOTATION.SEPARATOR)
|
|
89
|
+
// col.setTag(TAGS.separator, '-');
|
|
90
|
+
// const uh = UnitsHandler.getOrCreate(df.getCol('seq'));
|
|
91
|
+
// return [df, uh];
|
|
92
|
+
// }
|
|
93
93
|
});
|
|
@@ -2,13 +2,12 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
|
|
5
|
-
import
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
6
7
|
import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
|
|
7
|
-
import * as C from './constants';
|
|
8
8
|
import {MonomerPlacer} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
|
|
9
9
|
import {
|
|
10
10
|
getPaletteByType,
|
|
11
|
-
getSplitter,
|
|
12
11
|
monomerToShort,
|
|
13
12
|
MonomerToShortFunc,
|
|
14
13
|
NOTATION,
|
|
@@ -18,8 +17,16 @@ import {
|
|
|
18
17
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
19
18
|
import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
|
|
20
19
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
|
|
21
|
+
import {
|
|
22
|
+
Temps as mmcrTemps, Tags as mmcrTags,
|
|
23
|
+
tempTAGS, rendererSettingsChangedState
|
|
24
|
+
} from '../utils/cell-renderer-consts';
|
|
25
|
+
import * as C from './constants';
|
|
26
|
+
|
|
27
|
+
import {_package, getBioLib} from '../package';
|
|
28
|
+
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
29
|
+
|
|
23
30
|
|
|
24
31
|
type TempType = { [tagName: string]: any };
|
|
25
32
|
|
|
@@ -30,20 +37,21 @@ function getUpdatedWidth(grid: DG.Grid | null, g: CanvasRenderingContext2D, x: n
|
|
|
30
37
|
return grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
31
38
|
}
|
|
32
39
|
|
|
33
|
-
export function processSequence(subParts:
|
|
34
|
-
const simplified = !subParts.some((amino, index) =>
|
|
40
|
+
export function processSequence(subParts: ISeqSplitted): [string[], boolean] {
|
|
41
|
+
const simplified = !wu.enumerate(subParts).some(([amino, index]) =>
|
|
35
42
|
amino.length > 1 &&
|
|
36
43
|
index != 0 &&
|
|
37
44
|
index != subParts.length - 1);
|
|
38
45
|
|
|
39
46
|
const text: string[] = [];
|
|
40
47
|
const gap = simplified ? '' : ' ';
|
|
41
|
-
|
|
48
|
+
for (const [amino, index] of wu.enumerate(subParts)) {
|
|
49
|
+
let aminoRes = amino;
|
|
42
50
|
if (index < subParts.length)
|
|
43
|
-
|
|
51
|
+
aminoRes += `${amino ? '' : '-'}${gap}`;
|
|
44
52
|
|
|
45
|
-
text.push(
|
|
46
|
-
}
|
|
53
|
+
text.push(aminoRes);
|
|
54
|
+
}
|
|
47
55
|
return [text, simplified];
|
|
48
56
|
}
|
|
49
57
|
|
|
@@ -188,11 +196,12 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
188
196
|
|
|
189
197
|
const separator = tableCol.getTag(bioTAGS.separator) ?? '';
|
|
190
198
|
const splitLimit = w / 5;
|
|
191
|
-
const
|
|
199
|
+
const uh = UnitsHandler.getOrCreate(tableCol);
|
|
200
|
+
const splitterFunc: SplitterFunc = uh.getSplitter(splitLimit);
|
|
192
201
|
|
|
193
202
|
const tempReferenceSequence: string | null = tableColTemp[tempTAGS.referenceSequence];
|
|
194
203
|
const tempCurrentWord: string | null = tableColTemp[tempTAGS.currentWord];
|
|
195
|
-
const referenceSequence:
|
|
204
|
+
const referenceSequence: ISeqSplitted = splitterFunc(
|
|
196
205
|
((tempReferenceSequence != null) && (tempReferenceSequence != '')) ?
|
|
197
206
|
tempReferenceSequence : tempCurrentWord ?? '');
|
|
198
207
|
|
|
@@ -226,7 +235,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
226
235
|
// maxLengthWords = colTemp[tempTAGS.bioMaxLengthWords];
|
|
227
236
|
// }
|
|
228
237
|
|
|
229
|
-
const subParts:
|
|
238
|
+
const subParts: ISeqSplitted = splitterFunc(value);
|
|
230
239
|
/* let x1 = x; */
|
|
231
240
|
let color = undefinedColor;
|
|
232
241
|
let drawStyle = DrawStyle.classic;
|
|
@@ -234,7 +243,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
234
243
|
if (aligned && aligned.includes('MSA') && units == NOTATION.SEPARATOR)
|
|
235
244
|
drawStyle = DrawStyle.MSA;
|
|
236
245
|
|
|
237
|
-
for (const [
|
|
246
|
+
for (const [amino, index] of wu.enumerate(subParts)) {
|
|
238
247
|
color = palette.get(amino);
|
|
239
248
|
g.fillStyle = undefinedColor;
|
|
240
249
|
const last = index === subParts.length - 1;
|
|
@@ -281,16 +290,16 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
|
281
290
|
_cellStyle: DG.GridCellStyle): void {
|
|
282
291
|
const grid = gridCell.grid;
|
|
283
292
|
const cell = gridCell.cell;
|
|
293
|
+
const tableCol = gridCell.tableColumn as DG.Column<string>;
|
|
284
294
|
const s: string = cell.value ?? '';
|
|
285
|
-
const separator = gridCell.tableColumn!.tags[bioTAGS.separator];
|
|
286
|
-
const units: string = gridCell.tableColumn!.tags[DG.TAGS.UNITS];
|
|
287
295
|
w = getUpdatedWidth(grid, g, x, w);
|
|
288
296
|
//TODO: can this be replaced/merged with splitSequence?
|
|
289
297
|
const [s1, s2] = s.split('#');
|
|
290
|
-
const
|
|
298
|
+
const uh = UnitsHandler.getOrCreate(tableCol);
|
|
299
|
+
const splitter = uh.getSplitter();
|
|
291
300
|
const subParts1 = splitter(s1);
|
|
292
301
|
const subParts2 = splitter(s2);
|
|
293
|
-
drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, units);
|
|
302
|
+
drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, uh.units);
|
|
294
303
|
}
|
|
295
304
|
}
|
|
296
305
|
|
|
@@ -300,14 +309,14 @@ export function drawMoleculeDifferenceOnCanvas(
|
|
|
300
309
|
y: number,
|
|
301
310
|
w: number,
|
|
302
311
|
h: number,
|
|
303
|
-
subParts1:
|
|
304
|
-
subParts2:
|
|
312
|
+
subParts1: ISeqSplitted,
|
|
313
|
+
subParts2: ISeqSplitted,
|
|
305
314
|
units: string,
|
|
306
315
|
fullStringLength?: boolean,
|
|
307
316
|
molDifferences?: { [key: number]: HTMLCanvasElement },
|
|
308
317
|
): void {
|
|
309
318
|
if (subParts1.length !== subParts2.length) {
|
|
310
|
-
const sequences: IComparedSequences = fillShorterSequence(subParts1, subParts2);
|
|
319
|
+
const sequences: IComparedSequences = fillShorterSequence(wu(subParts1).toArray(), wu(subParts2).toArray());
|
|
311
320
|
subParts1 = sequences.subParts1;
|
|
312
321
|
subParts2 = sequences.subParts2;
|
|
313
322
|
}
|