@datagrok/bio 1.5.9 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +17 -5
- package/dist/package-test.js +616 -496
- package/dist/package.js +404 -473
- package/files/samples/sample_FASTA.csv +0 -1
- package/files/samples/sample_FASTA.fasta +0 -3
- package/files/samples/sample_FASTA_DNA.csv +101 -0
- package/files/samples/sample_FASTA_PT.csv +101 -0
- package/files/samples/sample_FASTA_RNA.csv +101 -0
- package/files/samples/testSmiles2.csv +12248 -0
- package/package.json +2 -2
- package/scripts/generate_fasta_csv_for_alphabets.R +70 -0
- package/src/package-test.ts +1 -0
- package/src/package.ts +105 -20
- package/src/tests/convert-test.ts +8 -8
- package/src/tests/detectors-test.ts +15 -3
- package/src/tests/renderer-test.ts +40 -18
- package/src/utils/cell-renderer.ts +47 -75
- package/src/utils/convert.ts +10 -14
- package/src/utils/multiple-sequence-alignment.ts +4 -2
- package/src/utils/notation-converter.ts +215 -55
- package/{test-Bio-34f75e5127b8-b47d4664.html → test-Bio-34f75e5127b8-7af21e5d.html} +17 -21
- package/src/utils/chem-palette.ts +0 -280
- package/src/utils/misc.ts +0 -29
|
@@ -1,33 +1,36 @@
|
|
|
1
|
-
import * as C from
|
|
2
|
-
import {getSeparator} from "./misc";
|
|
3
|
-
import {ChemPalette} from "./chem-palette";
|
|
1
|
+
import * as C from './constants';
|
|
4
2
|
import * as DG from 'datagrok-api/dg';
|
|
5
|
-
import {AminoacidsPalettes} from
|
|
6
|
-
import {NucleotidesPalettes} from
|
|
7
|
-
import {UnknownSeqPalettes} from
|
|
8
|
-
import {SplitterFunc, WebLogo} from
|
|
9
|
-
import {SeqPalette} from
|
|
3
|
+
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
4
|
+
import {NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
5
|
+
import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
|
|
6
|
+
import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
+
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
10
8
|
import * as ui from 'datagrok-api/ui';
|
|
11
9
|
|
|
12
10
|
const lru = new DG.LruCache<any, any>();
|
|
11
|
+
const undefinedColor = 'rgb(100,100,100)';
|
|
13
12
|
|
|
14
|
-
function getPalleteByType(paletteType: string): SeqPalette
|
|
13
|
+
function getPalleteByType(paletteType: string): SeqPalette {
|
|
15
14
|
switch (paletteType) {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
15
|
+
case 'PT':
|
|
16
|
+
return AminoacidsPalettes.GrokGroups;
|
|
17
|
+
case 'NT':
|
|
18
|
+
return NucleotidesPalettes.Chromatogram;
|
|
19
|
+
case 'DNA':
|
|
20
|
+
return NucleotidesPalettes.Chromatogram;
|
|
21
|
+
case 'RNA':
|
|
22
|
+
return NucleotidesPalettes.Chromatogram;
|
|
23
|
+
// other
|
|
24
|
+
default:
|
|
25
|
+
return UnknownSeqPalettes.Color;
|
|
23
26
|
}
|
|
24
27
|
}
|
|
25
28
|
|
|
26
29
|
export function processSequence(subParts: string[]): [string[], boolean] {
|
|
27
30
|
const simplified = !subParts.some((amino, index) =>
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
+
amino.length > 1 &&
|
|
32
|
+
index != 0 &&
|
|
33
|
+
index != subParts.length - 1);
|
|
31
34
|
|
|
32
35
|
const text: string[] = [];
|
|
33
36
|
const gap = simplified ? '' : ' ';
|
|
@@ -39,6 +42,7 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
39
42
|
});
|
|
40
43
|
return [text, simplified];
|
|
41
44
|
}
|
|
45
|
+
|
|
42
46
|
/**
|
|
43
47
|
* A function that prints a string aligned to left or centered.
|
|
44
48
|
*
|
|
@@ -48,7 +52,7 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
48
52
|
* @param {number} h Height.
|
|
49
53
|
* @param {CanvasRenderingContext2D} g Canvas rendering context.
|
|
50
54
|
* @param {string} s String to print.
|
|
51
|
-
* @param {string} [color=
|
|
55
|
+
* @param {string} [color=undefinedColor] String color.
|
|
52
56
|
* @param {number} [pivot=0] Pirvot.
|
|
53
57
|
* @param {boolean} [left=false] Is left aligned.
|
|
54
58
|
* @param {boolean} [hideMod=false] Hide amino acid redidue modifications.
|
|
@@ -56,13 +60,16 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
56
60
|
* @return {number} x coordinate to start printing at.
|
|
57
61
|
*/
|
|
58
62
|
function printLeftOrCentered(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
+
x: number, y: number, w: number, h: number,
|
|
64
|
+
g: CanvasRenderingContext2D, s: string, color = undefinedColor,
|
|
65
|
+
pivot: number = 0, left = false, hideMod = false, transparencyRate: number = 1.0,
|
|
66
|
+
separator: string = '', last: boolean = false): number {
|
|
63
67
|
g.textAlign = 'start';
|
|
64
68
|
let colorPart = s.substring(0);
|
|
65
69
|
let grayPart = separator;
|
|
70
|
+
if (last) {
|
|
71
|
+
grayPart = '';
|
|
72
|
+
}
|
|
66
73
|
const textSize = g.measureText(colorPart + grayPart);
|
|
67
74
|
const indent = 5;
|
|
68
75
|
|
|
@@ -87,44 +94,6 @@ function printLeftOrCentered(
|
|
|
87
94
|
return x + dx + colorTextSize.width;
|
|
88
95
|
}
|
|
89
96
|
}
|
|
90
|
-
function renderSequense(
|
|
91
|
-
g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
|
|
92
|
-
cellStyle: DG.GridCellStyle,
|
|
93
|
-
): void {
|
|
94
|
-
const grid = gridCell.grid;
|
|
95
|
-
const cell = gridCell.cell;
|
|
96
|
-
const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(":");
|
|
97
|
-
w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
98
|
-
g.save();
|
|
99
|
-
g.beginPath();
|
|
100
|
-
g.rect(x, y, w, h);
|
|
101
|
-
g.clip();
|
|
102
|
-
g.font = '12px monospace';
|
|
103
|
-
g.textBaseline = 'top';
|
|
104
|
-
const s: string = cell.value ?? '';
|
|
105
|
-
|
|
106
|
-
//TODO: can this be replaced/merged with splitSequence?
|
|
107
|
-
const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
108
|
-
|
|
109
|
-
const palette = getPalleteByType(paletteType);
|
|
110
|
-
|
|
111
|
-
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
112
|
-
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator') );// splitter,
|
|
113
|
-
|
|
114
|
-
const subParts:string[] = splitterFunc(cell.value);
|
|
115
|
-
|
|
116
|
-
const textSize = g.measureText(subParts.join(''));
|
|
117
|
-
let x1 = Math.max(x, x + (w - textSize.width) / 2);
|
|
118
|
-
|
|
119
|
-
subParts.forEach((amino, index) => {
|
|
120
|
-
let [color, outerAmino,, pivot] = ChemPalette.getColorAAPivot(amino);
|
|
121
|
-
color = palette.get(amino);
|
|
122
|
-
g.fillStyle = ChemPalette.undefinedColor;
|
|
123
|
-
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, pivot, true, false, 1.0, separator);
|
|
124
|
-
});
|
|
125
|
-
|
|
126
|
-
g.restore();
|
|
127
|
-
}
|
|
128
97
|
|
|
129
98
|
export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
130
99
|
|
|
@@ -149,19 +118,19 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
149
118
|
* @memberof AlignedSequenceCellRenderer
|
|
150
119
|
*/
|
|
151
120
|
render(
|
|
152
|
-
|
|
153
|
-
|
|
121
|
+
g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
|
|
122
|
+
cellStyle: DG.GridCellStyle,
|
|
154
123
|
): void {
|
|
155
124
|
const grid = gridCell.grid;
|
|
156
125
|
const cell = gridCell.cell;
|
|
157
126
|
const tag = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
158
127
|
if (tag === 'HELM') {
|
|
159
|
-
let host = ui.div([], {
|
|
128
|
+
let host = ui.div([], {style: {width: `${w}px`, height: `${h}px`}});
|
|
160
129
|
host.setAttribute('dataformat', 'helm');
|
|
161
130
|
host.setAttribute('data', gridCell.cell.value);
|
|
162
131
|
gridCell.element = host;
|
|
163
132
|
//@ts-ignore
|
|
164
|
-
var canvas = new JSDraw2.Editor(host, {
|
|
133
|
+
var canvas = new JSDraw2.Editor(host, {width: w, height: h, skin: 'w8', viewonly: true});
|
|
165
134
|
var formula = canvas.getFormula(true);
|
|
166
135
|
if (!formula) {
|
|
167
136
|
gridCell.element = ui.divText(gridCell.cell.value, {style: {color: 'red'}});
|
|
@@ -174,7 +143,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
174
143
|
lru.set(gridCell.cell.value, result);
|
|
175
144
|
}
|
|
176
145
|
} else {
|
|
177
|
-
const [type, subtype, paletteType] =
|
|
146
|
+
const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(':');
|
|
178
147
|
w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
179
148
|
g.save();
|
|
180
149
|
g.beginPath();
|
|
@@ -190,19 +159,22 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
190
159
|
const palette = getPalleteByType(paletteType);
|
|
191
160
|
|
|
192
161
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
193
|
-
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator')
|
|
162
|
+
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator'));// splitter,
|
|
194
163
|
|
|
195
|
-
const subParts:string[] =
|
|
196
|
-
console.log(subParts);
|
|
164
|
+
const subParts: string[] = splitterFunc(cell.value);
|
|
165
|
+
// console.log(subParts);
|
|
197
166
|
|
|
198
167
|
const textSize = g.measureText(subParts.join(''));
|
|
199
|
-
let x1 =
|
|
200
|
-
|
|
168
|
+
let x1 = x;
|
|
169
|
+
let color = undefinedColor;
|
|
201
170
|
subParts.forEach((amino, index) => {
|
|
202
|
-
let [color, outerAmino,, pivot] = ChemPalette.getColorAAPivot(amino);
|
|
203
171
|
color = palette.get(amino);
|
|
204
|
-
g.fillStyle =
|
|
205
|
-
|
|
172
|
+
g.fillStyle = undefinedColor;
|
|
173
|
+
let last = false;
|
|
174
|
+
if (index === subParts.length - 1) {
|
|
175
|
+
last = true;
|
|
176
|
+
}
|
|
177
|
+
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, false, 1.0, separator, last);
|
|
206
178
|
});
|
|
207
179
|
|
|
208
180
|
g.restore();
|
package/src/utils/convert.ts
CHANGED
|
@@ -16,26 +16,22 @@ export function convert(col: DG.Column): void {
|
|
|
16
16
|
NOTATION.SEPARATOR,
|
|
17
17
|
NOTATION.HELM
|
|
18
18
|
];
|
|
19
|
-
const
|
|
20
|
-
const
|
|
19
|
+
const separatorArray = ['-', '.', '/'];
|
|
20
|
+
const filteredNotations = notations.filter((e) => e !== current);
|
|
21
|
+
const targetNotationInput = ui.choiceInput('Convert to', filteredNotations[0], filteredNotations);
|
|
21
22
|
|
|
22
|
-
const separatorInput = ui.choiceInput('separator',
|
|
23
|
+
const separatorInput = ui.choiceInput('Choose separator', separatorArray[0], separatorArray);
|
|
23
24
|
|
|
24
|
-
ui.dialog('Convert sequence')
|
|
25
|
+
ui.dialog('Convert sequence notation')
|
|
25
26
|
.add(ui.div([
|
|
26
|
-
ui.h1('
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
.add(ui.div([
|
|
31
|
-
ui.h1('Separator'),
|
|
32
|
-
separatorInput,
|
|
33
|
-
|
|
27
|
+
ui.h1('Current notation: ' + current),
|
|
28
|
+
targetNotationInput.root,
|
|
29
|
+
// TODO: conditional separator input
|
|
30
|
+
separatorInput.root
|
|
34
31
|
]))
|
|
35
32
|
.onOK(() => {
|
|
36
33
|
//TODO: create new converted column
|
|
37
|
-
|
|
38
|
-
const targetNotation: NOTATION = targetNotationInput.value as NOTATION;
|
|
34
|
+
const targetNotation = targetNotationInput.value as NOTATION;
|
|
39
35
|
const separator = separatorInput.value!;
|
|
40
36
|
const newColumn = converter.convert(targetNotation, separator);
|
|
41
37
|
col.dataFrame.columns.add(newColumn);
|
|
@@ -50,10 +50,12 @@ export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<D
|
|
|
50
50
|
|
|
51
51
|
await CLI.fs.writeFile('input.fa', fasta);
|
|
52
52
|
const output = await CLI.exec('kalign input.fa -f fasta -o result.fasta');
|
|
53
|
-
const buf = await CLI.cat('result.fasta');
|
|
54
|
-
|
|
55
53
|
console.warn(output);
|
|
56
54
|
|
|
55
|
+
const buf = await CLI.cat('result.fasta');
|
|
56
|
+
if (!buf)
|
|
57
|
+
throw new Error(`kalign output no result`);
|
|
58
|
+
|
|
57
59
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
58
60
|
const tgtCol = DG.Column.fromStrings(`msa(${srcCol.name})`, aligned);
|
|
59
61
|
|
|
@@ -3,9 +3,9 @@ import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
|
3
3
|
|
|
4
4
|
/** enum type to simplify setting "user-friendly" notation if necessary */
|
|
5
5
|
export const enum NOTATION {
|
|
6
|
-
FASTA = '
|
|
7
|
-
SEPARATOR = '
|
|
8
|
-
HELM = '
|
|
6
|
+
FASTA = 'FASTA',
|
|
7
|
+
SEPARATOR = 'SEPARATOR',
|
|
8
|
+
HELM = 'HELM'
|
|
9
9
|
}
|
|
10
10
|
|
|
11
11
|
/** Class for handling conversion of notation systems in Macromolecule columns */
|
|
@@ -32,14 +32,15 @@ export class NotationConverter {
|
|
|
32
32
|
|
|
33
33
|
public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
public isRna(): boolean { return this.sourceUnits.toLowerCase().endsWith('rna'); }
|
|
36
|
+
|
|
37
|
+
public isDna(): boolean { return this.sourceUnits.toLowerCase().endsWith('dna'); }
|
|
37
38
|
|
|
38
39
|
public isPeptide(): boolean { return this.sourceUnits.toLowerCase().endsWith('pt'); }
|
|
39
40
|
|
|
40
41
|
/** Associate notation types with the corresponding units */
|
|
41
42
|
/**
|
|
42
|
-
* @return {NOTATION}
|
|
43
|
+
* @return {NOTATION} Notation associated with the units type
|
|
43
44
|
*/
|
|
44
45
|
private determineSourceNotation(): NOTATION {
|
|
45
46
|
if (this.sourceUnits.toLowerCase().startsWith('fasta'))
|
|
@@ -51,7 +52,33 @@ export class NotationConverter {
|
|
|
51
52
|
return NOTATION.HELM;
|
|
52
53
|
}
|
|
53
54
|
|
|
54
|
-
|
|
55
|
+
/**
|
|
56
|
+
* Determine the separator used in SEPARATOR column
|
|
57
|
+
*
|
|
58
|
+
* @return {string} The detected separator
|
|
59
|
+
*/
|
|
60
|
+
private determineSeparator(): string {
|
|
61
|
+
// TODO: figure out how to determine the separator efficiently
|
|
62
|
+
const col = this.sourceColumn;
|
|
63
|
+
let i = 0;
|
|
64
|
+
const re = /[^a-z]/;
|
|
65
|
+
while (i < col.length) {
|
|
66
|
+
const molecule = col.get(i);
|
|
67
|
+
const foundSeparator = molecule.toLowerCase().match(re);
|
|
68
|
+
if (foundSeparator)
|
|
69
|
+
return foundSeparator[0];
|
|
70
|
+
i++;
|
|
71
|
+
}
|
|
72
|
+
throw new Error('No separators found');
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Create a new empty column of the specified notation type and the same
|
|
77
|
+
* length as sourceColumn
|
|
78
|
+
*
|
|
79
|
+
* @param {NOTATION} targetNotation
|
|
80
|
+
* @return {DG.Column}
|
|
81
|
+
*/
|
|
55
82
|
private getNewColumn(targetNotation: NOTATION): DG.Column {
|
|
56
83
|
const col = this.sourceColumn;
|
|
57
84
|
const len = col.length;
|
|
@@ -60,74 +87,199 @@ export class NotationConverter {
|
|
|
60
87
|
// dummy code
|
|
61
88
|
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));
|
|
62
89
|
newColumn.semType = 'Macromolecule';
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
90
|
+
newColumn.setTag(
|
|
91
|
+
DG.TAGS.UNITS,
|
|
92
|
+
this.sourceUnits.replace(
|
|
93
|
+
this.sourceNotation.toLowerCase().toString(),
|
|
94
|
+
targetNotation.toLowerCase().toString()
|
|
95
|
+
)
|
|
96
|
+
);
|
|
97
|
+
// TODO: specify cell renderers for all cases
|
|
98
|
+
if (this.toFasta(targetNotation)) {
|
|
99
|
+
newColumn.setTag(
|
|
100
|
+
DG.TAGS.CELL_RENDERER,
|
|
101
|
+
'Macromolecule');
|
|
102
|
+
}
|
|
66
103
|
return newColumn;
|
|
67
104
|
}
|
|
68
105
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
//
|
|
78
|
-
// On splitToMonomers(): /libraries/bio/src/viewers/WebLogo --> getSplitter
|
|
79
|
-
|
|
80
|
-
const gapSymbol = '-'; // to be specified as an argument
|
|
106
|
+
/**
|
|
107
|
+
* Convert a Macromolecule column from FASTA to SEPARATOR notation
|
|
108
|
+
*
|
|
109
|
+
* @param {string} separator A specific separator to be used
|
|
110
|
+
* @param {string} gapSymbol Gap symbol in FASTA, '-' by default
|
|
111
|
+
* @return {DG.Column} A new column in SEPARATOR notation
|
|
112
|
+
*/
|
|
113
|
+
private convertFastaToSeparator(separator: string, gapSymbol: string = '-'): DG.Column {
|
|
114
|
+
// a function splitting FASTA sequence into an array of monomers:
|
|
81
115
|
const splitterAsFasta = WebLogo.splitterAsFasta;
|
|
116
|
+
|
|
82
117
|
const newColumn = this.getNewColumn(NOTATION.SEPARATOR);
|
|
118
|
+
// assign the values to the newly created empty column
|
|
83
119
|
newColumn.init((idx: number) => {
|
|
84
|
-
const
|
|
85
|
-
const
|
|
86
|
-
for (let i = 0; i <
|
|
87
|
-
if (
|
|
88
|
-
|
|
120
|
+
const fastaPolymer = this.sourceColumn.get(idx);
|
|
121
|
+
const fastaMonomersArray = splitterAsFasta(fastaPolymer);
|
|
122
|
+
for (let i = 0; i < fastaMonomersArray.length; i++) {
|
|
123
|
+
if (fastaMonomersArray[i] === gapSymbol)
|
|
124
|
+
fastaMonomersArray[i] = '';
|
|
89
125
|
}
|
|
90
|
-
return
|
|
126
|
+
return fastaMonomersArray.join(separator);
|
|
91
127
|
});
|
|
92
128
|
return newColumn;
|
|
93
129
|
}
|
|
94
130
|
|
|
95
|
-
|
|
131
|
+
/**
|
|
132
|
+
* Convert a Macromolecule column from FASTA to HELM
|
|
133
|
+
*
|
|
134
|
+
* @param {string} fastaGapSymbol Optional fasta gap symbol
|
|
135
|
+
* @param {string} helmGapSymbol Optional helm gap symbol
|
|
136
|
+
* @return {DG.Column} A new column in HELM notation
|
|
137
|
+
*/
|
|
138
|
+
private convertFastaToHelm(
|
|
139
|
+
fastaGapSymbol: string = '-',
|
|
140
|
+
helmGapSymbol: string = '*'
|
|
141
|
+
): DG.Column {
|
|
142
|
+
// a function splitting FASTA sequence into an array of monomers
|
|
143
|
+
const splitterAsFasta = WebLogo.splitterAsFasta;
|
|
96
144
|
|
|
97
|
-
|
|
145
|
+
const prefix = (this.isDna()) ? 'DNA1{' :
|
|
146
|
+
(this.isRna()) ? 'RNA1{' :
|
|
147
|
+
(this.isPeptide()) ? 'PEPTIDE1{' :
|
|
148
|
+
'Unknown'; // this case should be handled as exceptional
|
|
149
|
+
|
|
150
|
+
if (prefix === 'Unknown')
|
|
151
|
+
throw new Error('Neither peptide, nor nucleotide');
|
|
152
|
+
|
|
153
|
+
const postfix = '}$$$';
|
|
154
|
+
const leftWrapper = (this.isDna()) ? 'D(' :
|
|
155
|
+
(this.isRna()) ? 'R(' : ''; // no wrapper for peptides
|
|
156
|
+
const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides
|
|
98
157
|
|
|
99
|
-
private convertFastaToHelm(): DG.Column {
|
|
100
|
-
const gapSymbol = '-'; // to be specified as an argument
|
|
101
|
-
const splitterAsFasta = WebLogo.splitterAsFasta;
|
|
102
158
|
const newColumn = this.getNewColumn(NOTATION.HELM);
|
|
159
|
+
// assign the values to the empty column
|
|
103
160
|
newColumn.init((idx: number) => {
|
|
104
|
-
const
|
|
105
|
-
const
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
161
|
+
const fastaPolymer = this.sourceColumn.get(idx);
|
|
162
|
+
const fastaMonomersArray = splitterAsFasta(fastaPolymer);
|
|
163
|
+
const helmArray = [prefix];
|
|
164
|
+
let firstIteration = true;
|
|
165
|
+
for (let i = 0; i < fastaMonomersArray.length; i++) {
|
|
166
|
+
if (fastaMonomersArray[i] === fastaGapSymbol) {
|
|
167
|
+
// TODO: verify the correctness of gap symbols handling
|
|
168
|
+
helmArray.push(helmGapSymbol);
|
|
169
|
+
} else {
|
|
170
|
+
const dot = firstIteration ? '' : '.';
|
|
171
|
+
const item = [dot, leftWrapper, fastaMonomersArray[i], rightWrapper];
|
|
172
|
+
helmArray.push(item.join(''));
|
|
173
|
+
}
|
|
174
|
+
firstIteration = false;
|
|
111
175
|
}
|
|
112
|
-
|
|
113
|
-
return
|
|
176
|
+
helmArray.push(postfix);
|
|
177
|
+
return helmArray.join('');
|
|
114
178
|
});
|
|
115
179
|
return newColumn;
|
|
116
180
|
}
|
|
117
181
|
|
|
118
|
-
private
|
|
182
|
+
private handleSeparatorItemForFasta(
|
|
183
|
+
idx: number,
|
|
184
|
+
separatorItemsArray: string[],
|
|
185
|
+
separator: string,
|
|
186
|
+
gapSymbol: string,
|
|
187
|
+
fastaMonomersArray: string[]
|
|
188
|
+
): void {
|
|
189
|
+
const item = separatorItemsArray[idx];
|
|
190
|
+
if (item.length > 1) {
|
|
191
|
+
// the case of a multi-character monomer
|
|
192
|
+
const monomer = '[' + item + ']';
|
|
193
|
+
fastaMonomersArray.push(monomer);
|
|
194
|
+
}
|
|
195
|
+
if (item === separator) {
|
|
196
|
+
if (idx !== 0 && separatorItemsArray[idx - 1] === separator)
|
|
197
|
+
fastaMonomersArray.push(gapSymbol);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
private convertSeparatorToFasta(
|
|
202
|
+
separator: string | null = null,
|
|
203
|
+
gapSymbol: string = '-'
|
|
204
|
+
): DG.Column {
|
|
119
205
|
// TODO: implementation
|
|
120
206
|
// * similarly to fasta2separator, divide string into monomers
|
|
121
207
|
// * adjacent separators is a gap (symbol to be specified)
|
|
122
208
|
// * the monomers MUST be single-character onles, otherwise forbid
|
|
209
|
+
// * NO, they can be multi-characters
|
|
123
210
|
// conversion
|
|
124
|
-
//
|
|
125
|
-
|
|
211
|
+
// * consider automatic determining the separator
|
|
212
|
+
|
|
213
|
+
// if (separator === null)
|
|
214
|
+
// separator = this.determineSeparator();
|
|
215
|
+
|
|
216
|
+
// a function splitting FASTA sequence into an array of monomers
|
|
217
|
+
//const splitterAsSeparator = WebLogo.getSplitterWithSeparator(separator);
|
|
218
|
+
const splitter = WebLogo.getSplitterForColumn(this._sourceColumn);
|
|
219
|
+
|
|
220
|
+
const newColumn = this.getNewColumn(NOTATION.FASTA);
|
|
221
|
+
// assign the values to the empty column
|
|
222
|
+
newColumn.init((idx: number) => {
|
|
223
|
+
const separatorPolymer = this.sourceColumn.get(idx);
|
|
224
|
+
// items can be monomers or separators
|
|
225
|
+
const separatorItemsArray = splitter(separatorPolymer);
|
|
226
|
+
const fastaMonomersArray: string[] = [];
|
|
227
|
+
for (let i = 0; i < separatorItemsArray.length; i++) {
|
|
228
|
+
const item = separatorItemsArray[i];
|
|
229
|
+
if (item.length === 0) {
|
|
230
|
+
fastaMonomersArray.push(gapSymbol);
|
|
231
|
+
} else if (item.length > 1) {
|
|
232
|
+
// the case of a multi-character monomer
|
|
233
|
+
const monomer = '[' + item + ']';
|
|
234
|
+
fastaMonomersArray.push(monomer);
|
|
235
|
+
} else {
|
|
236
|
+
fastaMonomersArray.push(item);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
return fastaMonomersArray.join('');
|
|
240
|
+
});
|
|
241
|
+
return newColumn;
|
|
126
242
|
}
|
|
127
243
|
|
|
128
|
-
private convertSeparatorToHelm(): DG.Column {
|
|
129
|
-
//
|
|
130
|
-
|
|
244
|
+
private convertSeparatorToHelm(fastaGapSymbol: string = '-', helmGapSymbol: string = '*'): DG.Column {
|
|
245
|
+
// a function splitting FASTA sequence into an array of monomers
|
|
246
|
+
const splitter = WebLogo.getSplitterForColumn(this._sourceColumn);
|
|
247
|
+
|
|
248
|
+
const prefix = (this.isDna()) ? 'DNA1{' :
|
|
249
|
+
(this.isRna()) ? 'RNA1{' :
|
|
250
|
+
(this.isPeptide()) ? 'PEPTIDE1{' :
|
|
251
|
+
'Unknown'; // this case should be handled as exceptional
|
|
252
|
+
|
|
253
|
+
if (prefix === 'Unknown')
|
|
254
|
+
throw new Error('Neither peptide, nor nucleotide');
|
|
255
|
+
|
|
256
|
+
const postfix = '}$$$';
|
|
257
|
+
const leftWrapper = (this.isDna()) ? 'D(' :
|
|
258
|
+
(this.isRna()) ? 'R(' : ''; // no wrapper for peptides
|
|
259
|
+
const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides
|
|
260
|
+
|
|
261
|
+
const newColumn = this.getNewColumn(NOTATION.HELM);
|
|
262
|
+
// assign the values to the empty column
|
|
263
|
+
newColumn.init((idx: number) => {
|
|
264
|
+
const fastaPolymer = this.sourceColumn.get(idx);
|
|
265
|
+
const fastaMonomersArray = splitter(fastaPolymer);
|
|
266
|
+
const helmArray = [prefix];
|
|
267
|
+
let firstIteration = true;
|
|
268
|
+
for (let i = 0; i < fastaMonomersArray.length; i++) {
|
|
269
|
+
if (fastaMonomersArray[i] === fastaGapSymbol) {
|
|
270
|
+
// TODO: verify the correctness of gap symbols handling
|
|
271
|
+
helmArray.push(helmGapSymbol);
|
|
272
|
+
} else {
|
|
273
|
+
const dot = firstIteration ? '' : '.';
|
|
274
|
+
const item = [dot, leftWrapper, fastaMonomersArray[i], rightWrapper];
|
|
275
|
+
helmArray.push(item.join(''));
|
|
276
|
+
}
|
|
277
|
+
firstIteration = false;
|
|
278
|
+
}
|
|
279
|
+
helmArray.push(postfix);
|
|
280
|
+
return helmArray.join('');
|
|
281
|
+
});
|
|
282
|
+
return newColumn;
|
|
131
283
|
}
|
|
132
284
|
|
|
133
285
|
private convertHelmToFasta(): DG.Column {
|
|
@@ -140,17 +292,25 @@ export class NotationConverter {
|
|
|
140
292
|
return this.getNewColumn(NOTATION.SEPARATOR);
|
|
141
293
|
}
|
|
142
294
|
|
|
143
|
-
/** Dispatcher method for notation conversion
|
|
144
|
-
|
|
145
|
-
|
|
295
|
+
/** Dispatcher method for notation conversion
|
|
296
|
+
*
|
|
297
|
+
* @param {NOTATION} targetNotation Notation we want to convert to
|
|
298
|
+
* @param {string | null} tgtSeparator Possible separator
|
|
299
|
+
* @return {DG.Column} Converted column
|
|
300
|
+
*/
|
|
301
|
+
public convert(targetNotation: NOTATION, tgtSeparator: string | null = null): DG.Column {
|
|
302
|
+
// possible exceptions
|
|
146
303
|
if (this.sourceNotation === targetNotation)
|
|
147
|
-
throw new Error('Target notation is
|
|
148
|
-
if (this.
|
|
149
|
-
|
|
304
|
+
throw new Error('Target notation is invalid');
|
|
305
|
+
if (this.toSeparator(targetNotation) && tgtSeparator === null)
|
|
306
|
+
throw new Error('Target separator is not specified');
|
|
307
|
+
|
|
308
|
+
if (this.isFasta() && this.toSeparator(targetNotation) && tgtSeparator !== null)
|
|
309
|
+
return this.convertFastaToSeparator(tgtSeparator);
|
|
150
310
|
else if (this.isFasta() && this.toHelm(targetNotation))
|
|
151
311
|
return this.convertFastaToHelm();
|
|
152
312
|
else if (this.isSeparator() && this.toFasta(targetNotation))
|
|
153
|
-
return this.convertSeparatorToFasta();
|
|
313
|
+
return this.convertSeparatorToFasta(tgtSeparator!);
|
|
154
314
|
else if (this.isSeparator() && this.toHelm(targetNotation))
|
|
155
315
|
return this.convertSeparatorToHelm();
|
|
156
316
|
else if (this.isHelm() && this.toFasta(targetNotation))
|