@datagrok/bio 2.0.16 → 2.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +1058 -718
- package/dist/package.js +741 -535
- package/package.json +14 -14
- package/src/analysis/sequence-activity-cliffs.ts +10 -10
- package/src/analysis/sequence-similarity-viewer.ts +4 -3
- package/src/calculations/monomerLevelMols.ts +5 -3
- package/src/package-test.ts +1 -0
- package/src/package.ts +16 -14
- package/src/tests/WebLogo-positions-test.ts +47 -46
- package/src/tests/WebLogo-test.ts +14 -14
- package/src/tests/convert-test.ts +5 -3
- package/src/tests/fasta-export-tests.ts +110 -0
- package/src/tests/splitters-test.ts +19 -5
- package/src/utils/cell-renderer.ts +24 -29
- package/src/utils/convert.ts +11 -12
- package/src/utils/multiple-sequence-alignment.ts +4 -5
- package/src/utils/save-as-fasta.ts +109 -0
- package/src/utils/utils.ts +7 -5
- package/src/viewers/vd-regions-viewer.ts +16 -17
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
import {after, before, category, test, expect, expectArray, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
|
|
3
1
|
import * as grok from 'datagrok-api/grok';
|
|
4
2
|
import * as ui from 'datagrok-api/ui';
|
|
5
3
|
import * as DG from 'datagrok-api/dg';
|
|
6
|
-
import
|
|
7
|
-
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
6
|
+
import {after, before, category, test, expect, expectArray, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
8
7
|
import * as C from '../utils/constants';
|
|
8
|
+
import {splitToMonomers, _package, getHelmMonomers} from '../package';
|
|
9
|
+
|
|
9
10
|
|
|
10
11
|
category('splitters', () => {
|
|
11
12
|
let tvList: DG.TableView[];
|
|
@@ -27,6 +28,11 @@ category('splitters', () => {
|
|
|
27
28
|
const helm2 = 'PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.N.meK}$$$';
|
|
28
29
|
|
|
29
30
|
const data: { [key: string]: [string, string[]] } = {
|
|
31
|
+
fastaMulti: [
|
|
32
|
+
'M[MeI]YKETLL[MeF]PKTDFPMRGGL[MeA]',
|
|
33
|
+
['M', 'MeI', 'Y', 'K', 'E', 'T', 'L', 'L', 'MeF', 'P',
|
|
34
|
+
'K', 'T', 'D', 'F', 'P', 'M', 'R', 'G', 'G', 'L', 'MeA']
|
|
35
|
+
],
|
|
30
36
|
helm1: [
|
|
31
37
|
'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$',
|
|
32
38
|
['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et',
|
|
@@ -59,6 +65,8 @@ category('splitters', () => {
|
|
|
59
65
|
],
|
|
60
66
|
};
|
|
61
67
|
|
|
68
|
+
test('fastaMulti', async () => { await _testFastaSplitter(data.fastaMulti[0], data.fastaMulti[1]); });
|
|
69
|
+
|
|
62
70
|
test('helm1', async () => { await _testHelmSplitter(data.helm1[0], data.helm1[1]); });
|
|
63
71
|
test('helm2', async () => { await _testHelmSplitter(data.helm2[0], data.helm2[1]); });
|
|
64
72
|
test('helm3-multichar', async () => { await _testHelmSplitter(data.helm3[0], data.helm3[1]); });
|
|
@@ -113,8 +121,14 @@ PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
|
|
|
113
121
|
});
|
|
114
122
|
});
|
|
115
123
|
|
|
124
|
+
export async function _testFastaSplitter(src: string, tgt: string[]) {
|
|
125
|
+
const res: string[] = bio.splitterAsFasta(src);
|
|
126
|
+
console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
|
|
127
|
+
expectArray(res, tgt);
|
|
128
|
+
}
|
|
129
|
+
|
|
116
130
|
export async function _testHelmSplitter(src: string, tgt: string[]) {
|
|
117
|
-
const res: string[] =
|
|
131
|
+
const res: string[] = bio.splitterAsHelm(src);
|
|
118
132
|
console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
|
|
119
133
|
expectArray(res, tgt);
|
|
120
134
|
}
|
|
@@ -1,31 +1,26 @@
|
|
|
1
|
-
import * as
|
|
2
|
-
import * as DG from 'datagrok-api/dg';
|
|
3
|
-
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
4
|
-
import {NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
5
|
-
import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
|
|
6
|
-
import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
-
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
8
2
|
import * as ui from 'datagrok-api/ui';
|
|
9
|
-
import
|
|
10
|
-
import
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
import * as C from './constants';
|
|
11
6
|
|
|
12
7
|
const undefinedColor = 'rgb(100,100,100)';
|
|
13
|
-
const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string =
|
|
8
|
+
const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = bio.monomerToShort;
|
|
14
9
|
|
|
15
10
|
|
|
16
|
-
function getPaletteByType(paletteType: string): SeqPalette {
|
|
11
|
+
function getPaletteByType(paletteType: string): bio.SeqPalette {
|
|
17
12
|
switch (paletteType) {
|
|
18
13
|
case 'PT':
|
|
19
|
-
return AminoacidsPalettes.GrokGroups;
|
|
14
|
+
return bio.AminoacidsPalettes.GrokGroups;
|
|
20
15
|
case 'NT':
|
|
21
|
-
return NucleotidesPalettes.Chromatogram;
|
|
16
|
+
return bio.NucleotidesPalettes.Chromatogram;
|
|
22
17
|
case 'DNA':
|
|
23
|
-
return NucleotidesPalettes.Chromatogram;
|
|
18
|
+
return bio.NucleotidesPalettes.Chromatogram;
|
|
24
19
|
case 'RNA':
|
|
25
|
-
return NucleotidesPalettes.Chromatogram;
|
|
20
|
+
return bio.NucleotidesPalettes.Chromatogram;
|
|
26
21
|
// other
|
|
27
22
|
default:
|
|
28
|
-
return UnknownSeqPalettes.Color;
|
|
23
|
+
return bio.UnknownSeqPalettes.Color;
|
|
29
24
|
}
|
|
30
25
|
}
|
|
31
26
|
|
|
@@ -66,7 +61,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
66
61
|
}
|
|
67
62
|
|
|
68
63
|
onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
|
|
69
|
-
if (gridCell.cell.column.getTag(UnitsHandler.TAGS.aligned) !== 'SEQ.MSA')
|
|
64
|
+
if (gridCell.cell.column.getTag(bio.UnitsHandler.TAGS.aligned) !== 'SEQ.MSA')
|
|
70
65
|
return;
|
|
71
66
|
|
|
72
67
|
const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
|
|
@@ -94,7 +89,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
94
89
|
}
|
|
95
90
|
left = (argsX >= maxLengthWordsSum[left]) ? left + 1 : left;
|
|
96
91
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
97
|
-
const splitterFunc: SplitterFunc =
|
|
92
|
+
const splitterFunc: bio.SplitterFunc = bio.getSplitter('separator', separator);
|
|
98
93
|
const subParts: string[] = splitterFunc(gridCell.cell.value);
|
|
99
94
|
(((subParts[left]?.length ?? 0) > 0)) ?
|
|
100
95
|
ui.tooltip.show(ui.div(subParts[left]), e.x + 16, e.y + 16) : ui.tooltip.hide();
|
|
@@ -135,7 +130,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
135
130
|
|
|
136
131
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
137
132
|
const splitLimit = gridCell.bounds.width / 5;
|
|
138
|
-
const splitterFunc: SplitterFunc =
|
|
133
|
+
const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, separator, splitLimit);
|
|
139
134
|
const referenceSequence: string[] = splitterFunc(((gridCell.cell.column?.temp['reference-sequence'] != null) && (gridCell.cell.column?.temp['reference-sequence'] != ''))
|
|
140
135
|
? gridCell.cell.column.temp['reference-sequence'] : gridCell.cell.column.temp['current-word'] ?? '');
|
|
141
136
|
const monomerWidth = (gridCell.cell.column?.temp['monomer-width'] != null) ? gridCell.cell.column.temp['monomer-width'] : 'short';
|
|
@@ -188,15 +183,15 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
188
183
|
const subParts: string[] = splitterFunc(cell.value);
|
|
189
184
|
let x1 = x;
|
|
190
185
|
let color = undefinedColor;
|
|
191
|
-
let drawStyle = DrawStyle.classic;
|
|
186
|
+
let drawStyle = bio.DrawStyle.classic;
|
|
192
187
|
if (gridCell.cell.column.getTag('aligned').includes('MSA') && gridCell.cell.column.getTag('units') === 'separator')
|
|
193
|
-
drawStyle = DrawStyle.MSA;
|
|
188
|
+
drawStyle = bio.DrawStyle.MSA;
|
|
194
189
|
|
|
195
190
|
subParts.every((amino, index) => {
|
|
196
191
|
color = palette.get(amino);
|
|
197
192
|
g.fillStyle = undefinedColor;
|
|
198
193
|
let last = index === subParts.length - 1;
|
|
199
|
-
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell, referenceSequence, maxLengthOfMonomer);
|
|
194
|
+
x1 = bio.printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell, referenceSequence, maxLengthOfMonomer);
|
|
200
195
|
return x1 - minDistanceRenderer - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x) <= gridCell.bounds.width;
|
|
201
196
|
});
|
|
202
197
|
|
|
@@ -273,7 +268,7 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
|
273
268
|
w = getUpdatedWidth(grid, g, x, w);
|
|
274
269
|
//TODO: can this be replaced/merged with splitSequence?
|
|
275
270
|
const [s1, s2] = s.split('#');
|
|
276
|
-
const splitter =
|
|
271
|
+
const splitter = bio.getSplitter(units, separator);
|
|
277
272
|
const subParts1 = splitter(s1);
|
|
278
273
|
const subParts2 = splitter(s2);
|
|
279
274
|
drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, units, separator);
|
|
@@ -315,7 +310,7 @@ export function drawMoleculeDifferenceOnCanvas(
|
|
|
315
310
|
g.font = '12px monospace';
|
|
316
311
|
g.textBaseline = 'top';
|
|
317
312
|
|
|
318
|
-
let palette: SeqPalette = UnknownSeqPalettes.Color;
|
|
313
|
+
let palette: bio.SeqPalette = bio.UnknownSeqPalettes.Color;
|
|
319
314
|
if (units != 'HELM')
|
|
320
315
|
palette = getPaletteByType(units.substring(units.length - 2));
|
|
321
316
|
|
|
@@ -327,12 +322,12 @@ export function drawMoleculeDifferenceOnCanvas(
|
|
|
327
322
|
|
|
328
323
|
if (amino1 != amino2) {
|
|
329
324
|
const color2 = palette.get(amino2);
|
|
330
|
-
const subX0 = printLeftOrCentered(updatedX, updatedY - vShift, w, h, g, amino1, color1, 0, true);
|
|
331
|
-
const subX1 = printLeftOrCentered(updatedX, updatedY + vShift, w, h, g, amino2, color2, 0, true);
|
|
325
|
+
const subX0 = bio.printLeftOrCentered(updatedX, updatedY - vShift, w, h, g, amino1, color1, 0, true);
|
|
326
|
+
const subX1 = bio.printLeftOrCentered(updatedX, updatedY + vShift, w, h, g, amino2, color2, 0, true);
|
|
332
327
|
updatedX = Math.max(subX1, subX0);
|
|
333
328
|
if (molDifferences)
|
|
334
329
|
molDifferences[i] = createDifferenceCanvas(amino1, amino2, color1, color2, updatedY, vShift, h);
|
|
335
|
-
} else { updatedX = printLeftOrCentered(updatedX, updatedY, w, h, g, amino1, color1, 0, true, 0.5); }
|
|
330
|
+
} else { updatedX = bio.printLeftOrCentered(updatedX, updatedY, w, h, g, amino1, color1, 0, true, 0.5); }
|
|
336
331
|
updatedX += 4;
|
|
337
332
|
}
|
|
338
333
|
g.restore();
|
|
@@ -356,7 +351,7 @@ function createDifferenceCanvas(
|
|
|
356
351
|
canvas.width = width + 4;
|
|
357
352
|
context.font = '12px monospace';
|
|
358
353
|
context.textBaseline = 'top';
|
|
359
|
-
printLeftOrCentered(0, y - shift, width, h, context, amino1, color1, 0, true);
|
|
360
|
-
printLeftOrCentered(0, y + shift, width, h, context, amino2, color2, 0, true);
|
|
354
|
+
bio.printLeftOrCentered(0, y - shift, width, h, context, amino1, color1, 0, true);
|
|
355
|
+
bio.printLeftOrCentered(0, y + shift, width, h, context, amino2, color2, 0, true);
|
|
361
356
|
return canvas;
|
|
362
357
|
}
|
package/src/utils/convert.ts
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
-
import
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
5
|
|
|
6
|
+
import $ from 'cash-dom';
|
|
6
7
|
import {Subscription} from 'rxjs';
|
|
7
|
-
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
8
|
-
import {NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
let convertDialog: DG.Dialog | null = null;
|
|
@@ -17,13 +16,13 @@ let convertDialogSubs: Subscription[] = [];
|
|
|
17
16
|
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
18
17
|
*/
|
|
19
18
|
export function convert(col: DG.Column): void {
|
|
20
|
-
const converter = new NotationConverter(col);
|
|
21
|
-
const currentNotation: NOTATION = converter.notation;
|
|
19
|
+
const converter = new bio.NotationConverter(col);
|
|
20
|
+
const currentNotation: bio.NOTATION = converter.notation;
|
|
22
21
|
//TODO: read all notations
|
|
23
22
|
const notations = [
|
|
24
|
-
NOTATION.FASTA,
|
|
25
|
-
NOTATION.SEPARATOR,
|
|
26
|
-
NOTATION.HELM
|
|
23
|
+
bio.NOTATION.FASTA,
|
|
24
|
+
bio.NOTATION.SEPARATOR,
|
|
25
|
+
bio.NOTATION.HELM
|
|
27
26
|
];
|
|
28
27
|
const separatorArray = ['-', '.', '/'];
|
|
29
28
|
const filteredNotations = notations.filter((e) => e !== currentNotation);
|
|
@@ -33,7 +32,7 @@ export function convert(col: DG.Column): void {
|
|
|
33
32
|
|
|
34
33
|
// hide the separator input for non-SEPARATOR target notations
|
|
35
34
|
const toggleSeparator = () => {
|
|
36
|
-
if (targetNotationInput.value !== NOTATION.SEPARATOR)
|
|
35
|
+
if (targetNotationInput.value !== bio.NOTATION.SEPARATOR)
|
|
37
36
|
$(separatorInput.root).hide();
|
|
38
37
|
else
|
|
39
38
|
$(separatorInput.root).show();
|
|
@@ -64,7 +63,7 @@ export function convert(col: DG.Column): void {
|
|
|
64
63
|
separatorInput.root
|
|
65
64
|
]))
|
|
66
65
|
.onOK(async () => {
|
|
67
|
-
const targetNotation = targetNotationInput.value as NOTATION;
|
|
66
|
+
const targetNotation = targetNotationInput.value as bio.NOTATION;
|
|
68
67
|
const separator: string | null = separatorInput.value;
|
|
69
68
|
|
|
70
69
|
await convertDo(col, targetNotation, separator);
|
|
@@ -81,9 +80,9 @@ export function convert(col: DG.Column): void {
|
|
|
81
80
|
|
|
82
81
|
/** Creates a new column with converted sequences and detects its semantic type */
|
|
83
82
|
export async function convertDo(
|
|
84
|
-
srcCol: DG.Column, targetNotation: NOTATION, separator: string | null
|
|
83
|
+
srcCol: DG.Column, targetNotation: bio.NOTATION, separator: string | null
|
|
85
84
|
): Promise<DG.Column> {
|
|
86
|
-
const converter = new NotationConverter(srcCol);
|
|
85
|
+
const converter = new bio.NotationConverter(srcCol);
|
|
87
86
|
const newColumn = converter.convert(targetNotation, separator);
|
|
88
87
|
srcCol.dataFrame.columns.add(newColumn);
|
|
89
88
|
|
|
@@ -36,11 +36,10 @@ export async function runKalign(srcCol: DG.Column, isAligned = false, unUsedName
|
|
|
36
36
|
sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
|
|
37
37
|
|
|
38
38
|
const fasta = _stringsToFasta(sequences);
|
|
39
|
-
const CLI = await new Aioli(
|
|
40
|
-
|
|
41
|
-
version: '3.3.1',
|
|
42
|
-
|
|
43
|
-
});
|
|
39
|
+
const CLI = await new Aioli([
|
|
40
|
+
'base/1.0.0',
|
|
41
|
+
{tool: 'kalign', version: '3.3.1', reinit: true,}
|
|
42
|
+
]);
|
|
44
43
|
|
|
45
44
|
console.log(['fasta.length =', fasta.length]);
|
|
46
45
|
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
6
|
+
import wu from 'wu';
|
|
7
|
+
|
|
8
|
+
const FASTA_LINE_WIDTH = 60;
|
|
9
|
+
|
|
10
|
+
/** Shows dialog to select id columns list and seq column, builds and downloads FASTA content */
|
|
11
|
+
export function saveAsFastaUI() {
|
|
12
|
+
// Use grid for column order adjusted by user
|
|
13
|
+
let grid: DG.Grid = grok.shell.tv.grid;
|
|
14
|
+
|
|
15
|
+
const idGColList: DG.GridColumn[] = wu.count(0).take(grid.columns.length)
|
|
16
|
+
.map((colI: number) => grid.columns.byIndex(colI)!)
|
|
17
|
+
.filter((gcol: DG.GridColumn) => gcol.column ? gcol.column.semType !== DG.SEMTYPE.MACROMOLECULE : false).toArray();
|
|
18
|
+
const defaultIdGCol: DG.GridColumn | undefined = idGColList
|
|
19
|
+
.find((gcol: DG.GridColumn) => gcol.name.toLowerCase().indexOf('id') !== -1);
|
|
20
|
+
const idDefaultValue = defaultIdGCol ? [defaultIdGCol.name] : [];
|
|
21
|
+
|
|
22
|
+
const idGColListInput = ui.multiChoiceInput('Seq id columns', idDefaultValue,
|
|
23
|
+
idGColList.map((gcol: DG.GridColumn) => gcol.name));
|
|
24
|
+
|
|
25
|
+
const seqGColList: DG.GridColumn[] = wu.count(0).take(grid.columns.length)/* range rom 0 to grid.columns.length */
|
|
26
|
+
.map((colI: number) => grid.columns.byIndex(colI)!)
|
|
27
|
+
.filter((gc: DG.GridColumn) => {
|
|
28
|
+
const col: DG.Column | null = gc.column;
|
|
29
|
+
if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
30
|
+
const uh = new bio.UnitsHandler(col);
|
|
31
|
+
return uh.isFasta();
|
|
32
|
+
}
|
|
33
|
+
return false;
|
|
34
|
+
}).toArray();
|
|
35
|
+
|
|
36
|
+
const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0].name : [];
|
|
37
|
+
const seqColInput = ui.choiceInput('Seq column', seqDefaultValue,
|
|
38
|
+
seqGColList.map((gCol: DG.GridColumn) => gCol.name));
|
|
39
|
+
|
|
40
|
+
const lineWidthInput = ui.intInput('FASTA line width', FASTA_LINE_WIDTH);
|
|
41
|
+
|
|
42
|
+
ui.dialog({title: 'Save as FASTA',})
|
|
43
|
+
.add(ui.inputs([
|
|
44
|
+
idGColListInput,
|
|
45
|
+
seqColInput,
|
|
46
|
+
lineWidthInput
|
|
47
|
+
]))
|
|
48
|
+
.onOK(() => {
|
|
49
|
+
const valueIdColList: DG.Column[] = idGColListInput.value ?
|
|
50
|
+
idGColListInput.value.map((colName: string) => grid.columns.byName(colName)!.column!) : [];
|
|
51
|
+
const valueSeqCol: DG.Column | null = seqColInput.value ?
|
|
52
|
+
grid.columns.byName(seqColInput.value as string)!.column : null;
|
|
53
|
+
const valueLineWidth = lineWidthInput.value ?? FASTA_LINE_WIDTH;
|
|
54
|
+
|
|
55
|
+
if (!valueSeqCol)
|
|
56
|
+
grok.shell.warning(`Seq column is mandatory to save as FASTA.`);
|
|
57
|
+
|
|
58
|
+
const resFastaTxt: string = saveAsFastaDo(valueIdColList, valueSeqCol!, valueLineWidth);
|
|
59
|
+
|
|
60
|
+
const aEl: HTMLAnchorElement = document.createElement('a',);
|
|
61
|
+
aEl.setAttribute('href', `data:text/plain;charset=utf-8,${encodeURIComponent(resFastaTxt)}`);
|
|
62
|
+
aEl.setAttribute('download', `${grid.dataFrame.name}.fasta`);
|
|
63
|
+
aEl.click();
|
|
64
|
+
})
|
|
65
|
+
.show();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/** */
|
|
69
|
+
export function saveAsFastaDo(
|
|
70
|
+
idColList: DG.Column[], seqCol: DG.Column, lineWidth: number = FASTA_LINE_WIDTH, lineSeparator: string = '\n'
|
|
71
|
+
): string {
|
|
72
|
+
const splitter: bio.SplitterFunc = bio.splitterAsFasta;
|
|
73
|
+
|
|
74
|
+
const fastaLines: string[] = [];
|
|
75
|
+
|
|
76
|
+
for (let rowI: number = 0; rowI < seqCol.length; rowI++) {
|
|
77
|
+
// multiple identifiers separated by vertical bars
|
|
78
|
+
// https://en.wikipedia.org/wiki/FASTA_format
|
|
79
|
+
|
|
80
|
+
const seqId: string = idColList.map((col) => col.get(rowI).toString()).join('|');
|
|
81
|
+
const seq: string = seqCol.get(rowI);
|
|
82
|
+
const seqLineList: string[] = wrapSequence(seq, splitter, lineWidth);
|
|
83
|
+
|
|
84
|
+
fastaLines.push(`>${seqId}${lineSeparator}`);
|
|
85
|
+
for (const line of seqLineList)
|
|
86
|
+
fastaLines.push(`${line}${lineSeparator}`);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
//return fastaLines.join(lineSeparator);
|
|
90
|
+
return ''.concat(...fastaLines);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/* split sequence for monomers to prevent wrapping monomer partially */
|
|
94
|
+
export function wrapSequence(seq: string, splitter: bio.SplitterFunc, lineWidth: number = FASTA_LINE_WIDTH): string[] {
|
|
95
|
+
const seqMonomerList = splitter(seq);
|
|
96
|
+
let seqPos: number = 0;
|
|
97
|
+
const seqLength: number = seqMonomerList.length;
|
|
98
|
+
|
|
99
|
+
const seqLineList: string[] = [];
|
|
100
|
+
while (seqPos < seqLength) {
|
|
101
|
+
/* join sliced monomer into line */
|
|
102
|
+
const seqLine: string[] = seqMonomerList.slice(seqPos, seqPos + lineWidth);
|
|
103
|
+
const seqLineTxt: string = seqLine.map((m) => m.length > 1 ? `[${m}]` : m).join('');
|
|
104
|
+
seqLineList.push(seqLineTxt);
|
|
105
|
+
seqPos += seqLine.length;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return seqLineList;
|
|
109
|
+
}
|
package/src/utils/utils.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import {WebLogo, SplitterFunc} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
3
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
4
6
|
import {
|
|
5
7
|
CAP_GROUP_NAME, CAP_GROUP_SMILES, jsonSdfMonomerLibDict, MONOMER_ENCODE_MAX, MONOMER_ENCODE_MIN, MONOMER_SYMBOL,
|
|
6
8
|
RGROUP_ALTER_ID, RGROUP_FIELD, RGROUP_LABEL, SDF_MONOMER_NAME
|
|
@@ -18,7 +20,7 @@ export function encodeMonomers(col: DG.Column): DG.Column | null {
|
|
|
18
20
|
const monomerSymbolDict: { [key: string]: number } = {};
|
|
19
21
|
const units = col.tags[DG.TAGS.UNITS];
|
|
20
22
|
const sep = col.getTag(UnitsHandler.TAGS.separator);
|
|
21
|
-
const splitterFunc: SplitterFunc =
|
|
23
|
+
const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);
|
|
22
24
|
const encodedStringArray = [];
|
|
23
25
|
for (let i = 0; i < col.length; ++i) {
|
|
24
26
|
let encodedMonomerStr = '';
|
|
@@ -42,7 +44,7 @@ export function encodeMonomers(col: DG.Column): DG.Column | null {
|
|
|
42
44
|
export function getMolfilesFromSeq(col: DG.Column, monomersLibObject: any[]): any[][] | null {
|
|
43
45
|
const units = col.tags[DG.TAGS.UNITS];
|
|
44
46
|
const sep = col.getTag('separator');
|
|
45
|
-
const splitterFunc: SplitterFunc =
|
|
47
|
+
const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);
|
|
46
48
|
const monomersDict = createMomomersMolDict(monomersLibObject);
|
|
47
49
|
const molFiles = [];
|
|
48
50
|
for (let i = 0; i < col.length; ++i) {
|
|
@@ -66,7 +68,7 @@ export function getMolfilesFromSeq(col: DG.Column, monomersLibObject: any[]): an
|
|
|
66
68
|
export function getMolfilesFromSingleSeq(cell: DG.Cell, monomersLibObject: any[]): any[][] | null {
|
|
67
69
|
const units = cell.column.tags[DG.TAGS.UNITS];
|
|
68
70
|
const sep = cell.column!.getTag('separator');
|
|
69
|
-
const splitterFunc: SplitterFunc =
|
|
71
|
+
const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);
|
|
70
72
|
const monomersDict = createMomomersMolDict(monomersLibObject);
|
|
71
73
|
const molFiles = [];
|
|
72
74
|
const macroMolecule = cell.value;
|
|
@@ -1,12 +1,9 @@
|
|
|
1
1
|
import * as ui from 'datagrok-api/ui';
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
4
5
|
|
|
5
|
-
|
|
6
|
-
import {IVdRegionsViewer} from '@datagrok-libraries/bio/src/viewers/vd-regions-viewer';
|
|
7
|
-
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
8
|
-
|
|
9
|
-
const vrt = VdRegionType;
|
|
6
|
+
const vrt = bio.VdRegionType;
|
|
10
7
|
|
|
11
8
|
// Positions of regions for numbering schemes
|
|
12
9
|
// http://www.bioinf.org.uk/abs/info.html
|
|
@@ -37,7 +34,7 @@ const vrt = VdRegionType;
|
|
|
37
34
|
/** Viewer with tabs based on description of chain regions.
|
|
38
35
|
* Used to define regions of an immunoglobulin LC.
|
|
39
36
|
*/
|
|
40
|
-
export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
|
|
37
|
+
export class VdRegionsViewer extends DG.JsViewer implements bio.IVdRegionsViewer {
|
|
41
38
|
// private regionsDf: DG.DataFrame;
|
|
42
39
|
private regionsFg: DG.FilterGroup | null = null;
|
|
43
40
|
// private regionsTV: DG.TableView;
|
|
@@ -46,7 +43,7 @@ export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
|
|
|
46
43
|
private isOpened: boolean = false;
|
|
47
44
|
private panelNode: DG.DockNode | null = null;
|
|
48
45
|
|
|
49
|
-
public regions: VdRegion[] = [];
|
|
46
|
+
public regions: bio.VdRegion[] = [];
|
|
50
47
|
public regionTypes: string[];
|
|
51
48
|
public chains: string[];
|
|
52
49
|
public sequenceColumnNamePostfix: string;
|
|
@@ -60,7 +57,7 @@ export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
|
|
|
60
57
|
}
|
|
61
58
|
|
|
62
59
|
// TODO: .onTableAttached is not calling on dataFrame set, onPropertyChanged also not calling
|
|
63
|
-
public async setDf(value: DG.DataFrame, regions: VdRegion[]) {
|
|
60
|
+
public async setDf(value: DG.DataFrame, regions: bio.VdRegion[]) {
|
|
64
61
|
console.debug('VdRegionsViewer.setDf()');
|
|
65
62
|
await this.destroyView();
|
|
66
63
|
this.regions = regions;
|
|
@@ -113,8 +110,10 @@ export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
|
|
|
113
110
|
await this.buildView();
|
|
114
111
|
}
|
|
115
112
|
|
|
116
|
-
public override
|
|
117
|
-
|
|
113
|
+
public override onTableAttached() {
|
|
114
|
+
window.setTimeout(async () => {
|
|
115
|
+
await this.init();
|
|
116
|
+
}, 0 /* next event cycle */);
|
|
118
117
|
}
|
|
119
118
|
|
|
120
119
|
public override async onPropertyChanged(property: DG.Property | null) {
|
|
@@ -175,7 +174,7 @@ export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
|
|
|
175
174
|
//#region -- View --
|
|
176
175
|
private host: HTMLElement | null = null;
|
|
177
176
|
private mainLayout: HTMLTableElement | null = null;
|
|
178
|
-
private logos: { [chain: string]: WebLogo }[] = [];
|
|
177
|
+
private logos: { [chain: string]: bio.WebLogo }[] = [];
|
|
179
178
|
|
|
180
179
|
private async destroyView(): Promise<void> {
|
|
181
180
|
// TODO: Unsubscribe from and remove all view elements
|
|
@@ -195,16 +194,16 @@ export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
|
|
|
195
194
|
const colNames: { [chain: string]: string } = Object.assign({},
|
|
196
195
|
...this.chains.map((chain) => ({[chain]: `${chain} ${this.sequenceColumnNamePostfix}`})));
|
|
197
196
|
|
|
198
|
-
const regionsFiltered: VdRegion[] = this.regions.filter((r: VdRegion) => this.regionTypes.includes(r.type));
|
|
197
|
+
const regionsFiltered: bio.VdRegion[] = this.regions.filter((r: bio.VdRegion) => this.regionTypes.includes(r.type));
|
|
199
198
|
|
|
200
199
|
const orderList: number[] = Array.from(new Set(regionsFiltered.map((r) => r.order))).sort();
|
|
201
200
|
|
|
202
201
|
this.logos = [];
|
|
203
202
|
|
|
204
203
|
for (let orderI = 0; orderI < orderList.length; orderI++) {
|
|
205
|
-
const regionChains: { [chain: string]: WebLogo } = {};
|
|
204
|
+
const regionChains: { [chain: string]: bio.WebLogo } = {};
|
|
206
205
|
for (const chain of this.chains) {
|
|
207
|
-
const region: VdRegion | undefined = regionsFiltered
|
|
206
|
+
const region: bio.VdRegion | undefined = regionsFiltered
|
|
208
207
|
.find((r) => r.order == orderList[orderI] && r.chain == chain);
|
|
209
208
|
regionChains[chain] = (await this.dataFrame.plot.fromType('WebLogo', {
|
|
210
209
|
sequenceColumnName: colNames[chain],
|
|
@@ -213,7 +212,7 @@ export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
|
|
|
213
212
|
fixWidth: true,
|
|
214
213
|
skipEmptyPositions: this.skipEmptyPositions,
|
|
215
214
|
positionWidth: this.positionWidth,
|
|
216
|
-
})) as unknown as WebLogo;
|
|
215
|
+
})) as unknown as bio.WebLogo;
|
|
217
216
|
}
|
|
218
217
|
// WebLogo creation fires onRootSizeChanged event even before control being added to this.logos
|
|
219
218
|
this.logos[orderI] = regionChains;
|
|
@@ -238,7 +237,7 @@ export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
|
|
|
238
237
|
})] : []),
|
|
239
238
|
// List with controls for regions
|
|
240
239
|
...[...Array(orderList.length).keys()].map((orderI) => {
|
|
241
|
-
const wl: WebLogo = this.logos[orderI][chain];
|
|
240
|
+
const wl: bio.WebLogo = this.logos[orderI][chain];
|
|
242
241
|
wl.root.style.height = '100%';
|
|
243
242
|
|
|
244
243
|
const resDiv = ui.div([wl.root]/*`${chain} ${regionsFiltered[rI]}`*/, {
|
|
@@ -255,7 +254,7 @@ export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
|
|
|
255
254
|
},
|
|
256
255
|
['', ...[...Array(orderList.length).keys()].map(
|
|
257
256
|
(orderI: number) => regionsFiltered.find(
|
|
258
|
-
(r: VdRegion) => r.order == orderList[orderI] && r.chain == this.chains[0]
|
|
257
|
+
(r: bio.VdRegion) => r.order == orderList[orderI] && r.chain == this.chains[0]
|
|
259
258
|
)!.name || 'Name')]
|
|
260
259
|
);
|
|
261
260
|
this.mainLayout.className = 'mlb-vd-regions-viewer-table2';
|