@datagrok-libraries/bio 2.8.3 → 2.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/utils/notation-converter.d.ts +43 -40
- package/src/utils/notation-converter.d.ts.map +1 -1
- package/src/utils/notation-converter.js +125 -150
- package/src/utils/units-handler.d.ts +70 -0
- package/src/utils/units-handler.d.ts.map +1 -0
- package/src/utils/units-handler.js +117 -0
- package/src/viewers/web-logo.d.ts +7 -2
- package/src/viewers/web-logo.d.ts.map +1 -1
- package/src/viewers/web-logo.js +40 -17
package/package.json
CHANGED
|
@@ -1,48 +1,13 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import { SplitterFunc } from '../viewers/web-logo';
|
|
3
|
-
|
|
4
|
-
export declare const enum NOTATION {
|
|
5
|
-
FASTA = "FASTA",
|
|
6
|
-
SEPARATOR = "SEPARATOR",
|
|
7
|
-
HELM = "HELM"
|
|
8
|
-
}
|
|
3
|
+
import { UnitsHandler, NOTATION } from './units-handler';
|
|
9
4
|
/** Class for handling conversion of notation systems in Macromolecule columns */
|
|
10
|
-
export declare class NotationConverter {
|
|
11
|
-
private readonly _sourceColumn;
|
|
12
|
-
private _sourceUnits;
|
|
13
|
-
private _sourceNotation;
|
|
14
|
-
private _defaultGapSymbol;
|
|
15
|
-
private _defaultGapSymbolsDict;
|
|
5
|
+
export declare class NotationConverter extends UnitsHandler {
|
|
16
6
|
private _splitter;
|
|
17
7
|
protected get splitter(): SplitterFunc;
|
|
18
|
-
private get sourceUnits();
|
|
19
|
-
private get sourceColumn();
|
|
20
|
-
get sourceNotation(): NOTATION;
|
|
21
|
-
get defaultGapSymbol(): string;
|
|
22
|
-
get separator(): string;
|
|
23
|
-
isFasta(): boolean;
|
|
24
|
-
isSeparator(): boolean;
|
|
25
|
-
isHelm(): boolean;
|
|
26
8
|
toFasta(targetNotation: NOTATION): boolean;
|
|
27
9
|
toSeparator(targetNotation: NOTATION): boolean;
|
|
28
10
|
toHelm(targetNotation: NOTATION): boolean;
|
|
29
|
-
isRna(): boolean;
|
|
30
|
-
isDna(): boolean;
|
|
31
|
-
isPeptide(): boolean;
|
|
32
|
-
convertStringToHelm(src: string, fastaGapSymbol?: string, helmGapSymbol?: string): string;
|
|
33
|
-
/** Associate notation types with the corresponding units */
|
|
34
|
-
/**
|
|
35
|
-
* @return {NOTATION} Notation associated with the units type
|
|
36
|
-
*/
|
|
37
|
-
private getSourceNotation;
|
|
38
|
-
/**
|
|
39
|
-
* Create a new empty column of the specified notation type and the same
|
|
40
|
-
* length as sourceColumn
|
|
41
|
-
*
|
|
42
|
-
* @param {NOTATION} targetNotation
|
|
43
|
-
* @return {DG.Column}
|
|
44
|
-
*/
|
|
45
|
-
private getNewColumn;
|
|
46
11
|
/**
|
|
47
12
|
* Convert a Macromolecule column from FASTA to SEPARATOR notation
|
|
48
13
|
*
|
|
@@ -51,17 +16,55 @@ export declare class NotationConverter {
|
|
|
51
16
|
* @return {DG.Column} A new column in SEPARATOR notation
|
|
52
17
|
*/
|
|
53
18
|
private convertFastaToSeparator;
|
|
19
|
+
/**
|
|
20
|
+
* Get the wrapper strings for HELM, depending on the type of the
|
|
21
|
+
* macromolecule (peptide, DNA, RNA)
|
|
22
|
+
*
|
|
23
|
+
* @return {string[]} Array of wrappers
|
|
24
|
+
*/
|
|
25
|
+
private getHelmWrappers;
|
|
26
|
+
private convertToHelmHelper;
|
|
27
|
+
/**
|
|
28
|
+
* Convert a string with SEPARATOR/FASTA notation to HELM
|
|
29
|
+
*
|
|
30
|
+
* @param {string} sourcePolymer A string to be converted
|
|
31
|
+
* @param {string | null} sourceGapSymbol An optional gap symbol, set to
|
|
32
|
+
* default values ('-' for FASTA and '' for SEPARATOR) unless specified
|
|
33
|
+
* @return {string} The target HELM string
|
|
34
|
+
*/
|
|
35
|
+
convertStringToHelm(sourcePolymer: string, sourceGapSymbol?: string | null): string;
|
|
36
|
+
/**
|
|
37
|
+
* Convert a column to HELM
|
|
38
|
+
*
|
|
39
|
+
* @param {string | null} sourceGapSymbol
|
|
40
|
+
* @return {DG.Column}
|
|
41
|
+
*/
|
|
54
42
|
private convertToHelm;
|
|
43
|
+
/**
|
|
44
|
+
* Convert SEPARATOR column to FASTA notation
|
|
45
|
+
*
|
|
46
|
+
* @param {string | null} fastaGapSymbol Optional gap symbol for FASTA
|
|
47
|
+
* @return {DG.Column} Converted column
|
|
48
|
+
*/
|
|
55
49
|
private convertSeparatorToFasta;
|
|
56
|
-
|
|
50
|
+
/**
|
|
51
|
+
* Convert HELM column to FASTA/SEPARATOR
|
|
52
|
+
*
|
|
53
|
+
* @param {string} tgtNotation Target notation: FASTA or SEPARATOR
|
|
54
|
+
* @param {string} tgtSeparator Optional target separator (for HELM ->
|
|
55
|
+
* @param {string | null} tgtGapSymbol Optional target gap symbol
|
|
56
|
+
* SEPARATOR)
|
|
57
|
+
* @return {DG.Column} Converted column
|
|
58
|
+
*/
|
|
59
|
+
private convertHelm;
|
|
57
60
|
private convertHelmToSeparator;
|
|
58
61
|
/** Dispatcher method for notation conversion
|
|
59
62
|
*
|
|
60
|
-
* @param {NOTATION}
|
|
63
|
+
* @param {NOTATION} tgtNotation Notation we want to convert to
|
|
61
64
|
* @param {string | null} tgtSeparator Possible separator
|
|
62
65
|
* @return {DG.Column} Converted column
|
|
63
66
|
*/
|
|
64
|
-
convert(
|
|
67
|
+
convert(tgtNotation: NOTATION, tgtSeparator?: string | null): DG.Column;
|
|
65
68
|
constructor(col: DG.Column);
|
|
66
69
|
}
|
|
67
70
|
//# sourceMappingURL=notation-converter.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAC,YAAY,EAAU,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAC,YAAY,EAAE,QAAQ,EAAC,MAAM,iBAAiB,CAAC;AAEvD,iFAAiF;AACjF,qBAAa,iBAAkB,SAAQ,YAAY;IACjD,OAAO,CAAC,SAAS,CAA6B;IAC9C,SAAS,KAAK,QAAQ,IAAI,YAAY,CAIrC;IAEM,OAAO,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE1C,WAAW,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE9C,MAAM,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAEhD;;;;;;OAMG;IACH,OAAO,CAAC,uBAAuB;IAmB/B;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAiBvB,OAAO,CAAC,mBAAmB;IAkB3B;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAa,EAAE,MAAM,EACrB,eAAe,GAAE,MAAM,GAAG,IAAW,GACnC,MAAM;IAOV;;;;;OAKG;IACH,OAAO,CAAC,aAAa;IAerB;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA4B/B;;;;;;;;OAQG;IACH,OAAO,CAAC,WAAW;IAiDnB,OAAO,CAAC,sBAAsB;IAK9B;;;;;OAKG;IACI,OAAO,CAAC,WAAW,EAAE,QAAQ,EAAE,YAAY,GAAE,MAAM,GAAG,IAAW,GAAG,EAAE,CAAC,MAAM;gBAmBjE,GAAG,EAAE,EAAE,CAAC,MAAM;CAGlC"}
|
|
@@ -1,110 +1,20 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
2
1
|
import { WebLogo } from '../viewers/web-logo';
|
|
2
|
+
import { UnitsHandler } from './units-handler';
|
|
3
3
|
/** Class for handling conversion of notation systems in Macromolecule columns */
|
|
4
|
-
export class NotationConverter {
|
|
4
|
+
export class NotationConverter extends UnitsHandler {
|
|
5
5
|
constructor(col) {
|
|
6
|
-
|
|
7
|
-
HELM: '*',
|
|
8
|
-
SEPARATOR: '',
|
|
9
|
-
FASTA: '-',
|
|
10
|
-
};
|
|
6
|
+
super(col);
|
|
11
7
|
this._splitter = null;
|
|
12
|
-
this._sourceColumn = col;
|
|
13
|
-
const units = this._sourceColumn.tags[DG.TAGS.UNITS];
|
|
14
|
-
if (units !== null)
|
|
15
|
-
this._sourceUnits = units;
|
|
16
|
-
else
|
|
17
|
-
throw new Error('Units are not specified in column');
|
|
18
|
-
this._sourceNotation = this.getSourceNotation();
|
|
19
|
-
this._defaultGapSymbol = (this.isFasta()) ? this._defaultGapSymbolsDict.FASTA :
|
|
20
|
-
(this.isHelm()) ? this._defaultGapSymbolsDict.HELM :
|
|
21
|
-
this._defaultGapSymbolsDict.SEPARATOR;
|
|
22
8
|
}
|
|
23
9
|
get splitter() {
|
|
24
|
-
if (this._splitter === null)
|
|
25
|
-
this._splitter = WebLogo.getSplitterForColumn(this.
|
|
26
|
-
}
|
|
10
|
+
if (this._splitter === null)
|
|
11
|
+
this._splitter = WebLogo.getSplitterForColumn(this.column);
|
|
27
12
|
return this._splitter;
|
|
28
13
|
}
|
|
29
14
|
;
|
|
30
|
-
get sourceUnits() { return this._sourceUnits; }
|
|
31
|
-
get sourceColumn() { return this._sourceColumn; }
|
|
32
|
-
get sourceNotation() { return this._sourceNotation; }
|
|
33
|
-
get defaultGapSymbol() { return this._defaultGapSymbol; }
|
|
34
|
-
get separator() {
|
|
35
|
-
const separator = this.sourceColumn.getTag('separator');
|
|
36
|
-
if (separator !== null)
|
|
37
|
-
return separator;
|
|
38
|
-
else
|
|
39
|
-
throw new Error('Separator not set');
|
|
40
|
-
}
|
|
41
|
-
isFasta() { return this.sourceNotation === "FASTA" /* NOTATION.FASTA */; }
|
|
42
|
-
isSeparator() { return this.sourceNotation === "SEPARATOR" /* NOTATION.SEPARATOR */; }
|
|
43
|
-
isHelm() { return this.sourceNotation === "HELM" /* NOTATION.HELM */; }
|
|
44
15
|
toFasta(targetNotation) { return targetNotation === "FASTA" /* NOTATION.FASTA */; }
|
|
45
16
|
toSeparator(targetNotation) { return targetNotation === "SEPARATOR" /* NOTATION.SEPARATOR */; }
|
|
46
17
|
toHelm(targetNotation) { return targetNotation === "HELM" /* NOTATION.HELM */; }
|
|
47
|
-
isRna() { return this.sourceUnits.toLowerCase().endsWith('rna'); }
|
|
48
|
-
isDna() { return this.sourceUnits.toLowerCase().endsWith('dna'); }
|
|
49
|
-
isPeptide() { return this.sourceUnits.toLowerCase().endsWith('pt'); }
|
|
50
|
-
convertStringToHelm(src, fastaGapSymbol = '-', helmGapSymbol = '*') {
|
|
51
|
-
const prefix = (this.isDna()) ? 'DNA1{' :
|
|
52
|
-
(this.isRna()) ? 'RNA1{' :
|
|
53
|
-
(this.isPeptide()) ? 'PEPTIDE1{' :
|
|
54
|
-
'Unknown'; // this case should be handled as exceptional
|
|
55
|
-
if (prefix === 'Unknown')
|
|
56
|
-
throw new Error('Neither peptide, nor nucleotide');
|
|
57
|
-
const postfix = '}$$$';
|
|
58
|
-
const leftWrapper = (this.isDna()) ? 'D(' :
|
|
59
|
-
(this.isRna()) ? 'R(' : ''; // no wrapper for peptides
|
|
60
|
-
const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides
|
|
61
|
-
const monomerArray = this.splitter(src);
|
|
62
|
-
const monomerHelmArray = monomerArray.map((mm) => {
|
|
63
|
-
if (mm === fastaGapSymbol) {
|
|
64
|
-
return helmGapSymbol;
|
|
65
|
-
}
|
|
66
|
-
else {
|
|
67
|
-
return `${leftWrapper}${mm}${rightWrapper}`;
|
|
68
|
-
}
|
|
69
|
-
});
|
|
70
|
-
return `${prefix}${monomerHelmArray.join('.')}${postfix}`;
|
|
71
|
-
}
|
|
72
|
-
/** Associate notation types with the corresponding units */
|
|
73
|
-
/**
|
|
74
|
-
* @return {NOTATION} Notation associated with the units type
|
|
75
|
-
*/
|
|
76
|
-
getSourceNotation() {
|
|
77
|
-
if (this.sourceUnits.toLowerCase().startsWith('fasta'))
|
|
78
|
-
return "FASTA" /* NOTATION.FASTA */;
|
|
79
|
-
else if (this.sourceUnits.toLowerCase().startsWith('separator'))
|
|
80
|
-
return "SEPARATOR" /* NOTATION.SEPARATOR */;
|
|
81
|
-
else if (this.sourceUnits.toLowerCase().startsWith('helm'))
|
|
82
|
-
return "HELM" /* NOTATION.HELM */;
|
|
83
|
-
else
|
|
84
|
-
throw new Error('The column has units that do not correspond to any notation');
|
|
85
|
-
}
|
|
86
|
-
/**
|
|
87
|
-
* Create a new empty column of the specified notation type and the same
|
|
88
|
-
* length as sourceColumn
|
|
89
|
-
*
|
|
90
|
-
* @param {NOTATION} targetNotation
|
|
91
|
-
* @return {DG.Column}
|
|
92
|
-
*/
|
|
93
|
-
getNewColumn(targetNotation) {
|
|
94
|
-
const col = this.sourceColumn;
|
|
95
|
-
const len = col.length;
|
|
96
|
-
const name = targetNotation.toLowerCase() + '(' + col.name + ')';
|
|
97
|
-
const newColName = col.dataFrame.columns.getUnusedName(name);
|
|
98
|
-
// dummy code
|
|
99
|
-
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));
|
|
100
|
-
newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
101
|
-
newColumn.setTag(DG.TAGS.UNITS, this.sourceUnits.replace(this.sourceNotation.toLowerCase().toString(), targetNotation.toLowerCase().toString()));
|
|
102
|
-
// TODO: specify cell renderers for all cases
|
|
103
|
-
if (this.toFasta(targetNotation)) {
|
|
104
|
-
newColumn.setTag(DG.TAGS.CELL_RENDERER, 'Macromolecule');
|
|
105
|
-
}
|
|
106
|
-
return newColumn;
|
|
107
|
-
}
|
|
108
18
|
/**
|
|
109
19
|
* Convert a Macromolecule column from FASTA to SEPARATOR notation
|
|
110
20
|
*
|
|
@@ -115,14 +25,11 @@ export class NotationConverter {
|
|
|
115
25
|
convertFastaToSeparator(separator, fastaGapSymbol = null) {
|
|
116
26
|
if (fastaGapSymbol === null)
|
|
117
27
|
fastaGapSymbol = this.defaultGapSymbol;
|
|
118
|
-
// A function splitting a sequence into an array of monomers according to
|
|
119
|
-
// its notation
|
|
120
|
-
const splitter = WebLogo.getSplitterForColumn(this.sourceColumn);
|
|
121
28
|
const newColumn = this.getNewColumn("SEPARATOR" /* NOTATION.SEPARATOR */);
|
|
122
29
|
// assign the values to the newly created empty column
|
|
123
30
|
newColumn.init((idx) => {
|
|
124
|
-
const fastaPolymer = this.
|
|
125
|
-
const fastaMonomersArray = splitter(fastaPolymer);
|
|
31
|
+
const fastaPolymer = this.column.get(idx);
|
|
32
|
+
const fastaMonomersArray = this.splitter(fastaPolymer);
|
|
126
33
|
for (let i = 0; i < fastaMonomersArray.length; i++) {
|
|
127
34
|
if (fastaMonomersArray[i] === fastaGapSymbol)
|
|
128
35
|
fastaMonomersArray[i] = this._defaultGapSymbolsDict.SEPARATOR;
|
|
@@ -132,12 +39,13 @@ export class NotationConverter {
|
|
|
132
39
|
newColumn.setTag('separator', separator);
|
|
133
40
|
return newColumn;
|
|
134
41
|
}
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
42
|
+
/**
|
|
43
|
+
* Get the wrapper strings for HELM, depending on the type of the
|
|
44
|
+
* macromolecule (peptide, DNA, RNA)
|
|
45
|
+
*
|
|
46
|
+
* @return {string[]} Array of wrappers
|
|
47
|
+
*/
|
|
48
|
+
getHelmWrappers() {
|
|
141
49
|
const prefix = (this.isDna()) ? 'DNA1{' :
|
|
142
50
|
(this.isRna()) ? 'RNA1{' :
|
|
143
51
|
(this.isPeptide()) ? 'PEPTIDE1{' :
|
|
@@ -148,46 +56,66 @@ export class NotationConverter {
|
|
|
148
56
|
const leftWrapper = (this.isDna()) ? 'D(' :
|
|
149
57
|
(this.isRna()) ? 'R(' : ''; // no wrapper for peptides
|
|
150
58
|
const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides
|
|
59
|
+
return [prefix, leftWrapper, rightWrapper, postfix];
|
|
60
|
+
}
|
|
61
|
+
// A helper function for converting strings to HELM
|
|
62
|
+
convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix) {
|
|
63
|
+
const monomerArray = this.splitter(sourcePolymer);
|
|
64
|
+
const monomerHelmArray = monomerArray.map((mm) => {
|
|
65
|
+
if (mm === sourceGapSymbol)
|
|
66
|
+
return this._defaultGapSymbolsDict.HELM;
|
|
67
|
+
else
|
|
68
|
+
return `${leftWrapper}${mm}${rightWrapper}`;
|
|
69
|
+
});
|
|
70
|
+
return `${prefix}${monomerHelmArray.join('.')}${postfix}`;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Convert a string with SEPARATOR/FASTA notation to HELM
|
|
74
|
+
*
|
|
75
|
+
* @param {string} sourcePolymer A string to be converted
|
|
76
|
+
* @param {string | null} sourceGapSymbol An optional gap symbol, set to
|
|
77
|
+
* default values ('-' for FASTA and '' for SEPARATOR) unless specified
|
|
78
|
+
* @return {string} The target HELM string
|
|
79
|
+
*/
|
|
80
|
+
convertStringToHelm(sourcePolymer, sourceGapSymbol = null) {
|
|
81
|
+
if (sourceGapSymbol === null)
|
|
82
|
+
sourceGapSymbol = this.defaultGapSymbol;
|
|
83
|
+
const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();
|
|
84
|
+
return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix);
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Convert a column to HELM
|
|
88
|
+
*
|
|
89
|
+
* @param {string | null} sourceGapSymbol
|
|
90
|
+
* @return {DG.Column}
|
|
91
|
+
*/
|
|
92
|
+
convertToHelm(sourceGapSymbol = null) {
|
|
93
|
+
if (sourceGapSymbol === null)
|
|
94
|
+
sourceGapSymbol = this.defaultGapSymbol;
|
|
95
|
+
const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();
|
|
151
96
|
const newColumn = this.getNewColumn("HELM" /* NOTATION.HELM */);
|
|
152
97
|
// assign the values to the empty column
|
|
153
98
|
newColumn.init((idx) => {
|
|
154
|
-
const sourcePolymer = this.
|
|
155
|
-
|
|
156
|
-
const helmArray = [prefix];
|
|
157
|
-
let firstIteration = true;
|
|
158
|
-
for (let i = 0; i < sourceMonomersArray.length; i++) {
|
|
159
|
-
const dot = firstIteration ? '' : '.';
|
|
160
|
-
let token = sourceMonomersArray[i];
|
|
161
|
-
if (token === sourceGapSymbol)
|
|
162
|
-
token = this._defaultGapSymbolsDict.HELM;
|
|
163
|
-
const item = [dot, leftWrapper, token, rightWrapper];
|
|
164
|
-
helmArray.push(item.join(''));
|
|
165
|
-
firstIteration = false;
|
|
166
|
-
}
|
|
167
|
-
helmArray.push(postfix);
|
|
168
|
-
return helmArray.join('');
|
|
99
|
+
const sourcePolymer = this.column.get(idx);
|
|
100
|
+
return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix);
|
|
169
101
|
});
|
|
170
102
|
return newColumn;
|
|
171
103
|
}
|
|
104
|
+
/**
|
|
105
|
+
* Convert SEPARATOR column to FASTA notation
|
|
106
|
+
*
|
|
107
|
+
* @param {string | null} fastaGapSymbol Optional gap symbol for FASTA
|
|
108
|
+
* @return {DG.Column} Converted column
|
|
109
|
+
*/
|
|
172
110
|
convertSeparatorToFasta(fastaGapSymbol = null) {
|
|
173
|
-
// TODO: implementation
|
|
174
|
-
// * similarly to fasta2separator, divide string into monomers
|
|
175
|
-
// * adjacent separators is a gap (symbol to be specified)
|
|
176
|
-
// * the monomers MUST be single-character onles, otherwise forbid
|
|
177
|
-
// * NO, they can be multi-characters
|
|
178
|
-
// conversion
|
|
179
|
-
// * consider automatic determining the separator
|
|
180
111
|
if (fastaGapSymbol === null)
|
|
181
112
|
fastaGapSymbol = this._defaultGapSymbolsDict.FASTA;
|
|
182
|
-
// A function splitting a sequence into an array of monomers according to
|
|
183
|
-
// its notation
|
|
184
|
-
const splitter = WebLogo.getSplitterForColumn(this.sourceColumn);
|
|
185
113
|
const newColumn = this.getNewColumn("FASTA" /* NOTATION.FASTA */);
|
|
186
114
|
// assign the values to the empty column
|
|
187
115
|
newColumn.init((idx) => {
|
|
188
|
-
const separatorPolymer = this.
|
|
116
|
+
const separatorPolymer = this.column.get(idx);
|
|
189
117
|
// items can be monomers or separators
|
|
190
|
-
const separatorItemsArray = splitter(separatorPolymer);
|
|
118
|
+
const separatorItemsArray = this.splitter(separatorPolymer);
|
|
191
119
|
const fastaMonomersArray = [];
|
|
192
120
|
for (let i = 0; i < separatorItemsArray.length; i++) {
|
|
193
121
|
const item = separatorItemsArray[i];
|
|
@@ -207,9 +135,56 @@ export class NotationConverter {
|
|
|
207
135
|
});
|
|
208
136
|
return newColumn;
|
|
209
137
|
}
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
138
|
+
/**
|
|
139
|
+
* Convert HELM column to FASTA/SEPARATOR
|
|
140
|
+
*
|
|
141
|
+
* @param {string} tgtNotation Target notation: FASTA or SEPARATOR
|
|
142
|
+
* @param {string} tgtSeparator Optional target separator (for HELM ->
|
|
143
|
+
* @param {string | null} tgtGapSymbol Optional target gap symbol
|
|
144
|
+
* SEPARATOR)
|
|
145
|
+
* @return {DG.Column} Converted column
|
|
146
|
+
*/
|
|
147
|
+
convertHelm(tgtNotation, tgtSeparator = '', tgtGapSymbol = null) {
|
|
148
|
+
// This function must not contain calls of isDna() and isRna(), for
|
|
149
|
+
// source helm columns may contain RNA, DNA and PT across different rows
|
|
150
|
+
if (tgtGapSymbol === null) {
|
|
151
|
+
tgtGapSymbol = (this.toFasta(tgtNotation)) ?
|
|
152
|
+
this._defaultGapSymbolsDict.FASTA :
|
|
153
|
+
this._defaultGapSymbolsDict.SEPARATOR;
|
|
154
|
+
}
|
|
155
|
+
if (this.toSeparator(tgtNotation) && tgtSeparator === '')
|
|
156
|
+
tgtSeparator = this.separator;
|
|
157
|
+
const helmWrappersRe = /(R\(|D\(|\)|P)/g;
|
|
158
|
+
const newColumn = this.getNewColumn(tgtNotation);
|
|
159
|
+
// assign the values to the empty column
|
|
160
|
+
newColumn.init((idx) => {
|
|
161
|
+
const helmPolymer = this.column.get(idx);
|
|
162
|
+
// we cannot use isDna() or isRna() because source helm columns can
|
|
163
|
+
// contain DNA, RNA and PT in different cells, so the corresponding
|
|
164
|
+
// tags cannot be set for the whole column
|
|
165
|
+
const isNucleotide = helmPolymer.startsWith('DNA') || helmPolymer.startsWith('RNA');
|
|
166
|
+
// items can be monomers or helms
|
|
167
|
+
const helmItemsArray = this.splitter(helmPolymer);
|
|
168
|
+
const tgtMonomersArray = [];
|
|
169
|
+
for (let i = 0; i < helmItemsArray.length; i++) {
|
|
170
|
+
let item = helmItemsArray[i];
|
|
171
|
+
if (isNucleotide)
|
|
172
|
+
item = item.replace(helmWrappersRe, '');
|
|
173
|
+
if (item === this._defaultGapSymbolsDict.HELM) {
|
|
174
|
+
tgtMonomersArray.push(tgtGapSymbol);
|
|
175
|
+
}
|
|
176
|
+
else if (this.toFasta(tgtNotation) && item.length > 1) {
|
|
177
|
+
// the case of a multi-character monomer converted to FASTA
|
|
178
|
+
const monomer = '[' + item + ']';
|
|
179
|
+
tgtMonomersArray.push(monomer);
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
tgtMonomersArray.push(item);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return tgtMonomersArray.join(tgtSeparator);
|
|
186
|
+
});
|
|
187
|
+
return newColumn;
|
|
213
188
|
}
|
|
214
189
|
convertHelmToSeparator() {
|
|
215
190
|
// TODO: implementatioreturn this.getNewColumn();
|
|
@@ -217,26 +192,26 @@ export class NotationConverter {
|
|
|
217
192
|
}
|
|
218
193
|
/** Dispatcher method for notation conversion
|
|
219
194
|
*
|
|
220
|
-
* @param {NOTATION}
|
|
195
|
+
* @param {NOTATION} tgtNotation Notation we want to convert to
|
|
221
196
|
* @param {string | null} tgtSeparator Possible separator
|
|
222
197
|
* @return {DG.Column} Converted column
|
|
223
198
|
*/
|
|
224
|
-
convert(
|
|
199
|
+
convert(tgtNotation, tgtSeparator = null) {
|
|
225
200
|
// possible exceptions
|
|
226
|
-
if (this.
|
|
227
|
-
throw new Error('
|
|
228
|
-
if (this.toSeparator(
|
|
229
|
-
throw new Error('
|
|
230
|
-
if (this.isFasta() && this.toSeparator(
|
|
201
|
+
if (this.notation === tgtNotation)
|
|
202
|
+
throw new Error('tgt notation is invalid');
|
|
203
|
+
if (this.toSeparator(tgtNotation) && tgtSeparator === null)
|
|
204
|
+
throw new Error('tgt separator is not specified');
|
|
205
|
+
if (this.isFasta() && this.toSeparator(tgtNotation) && tgtSeparator !== null)
|
|
231
206
|
return this.convertFastaToSeparator(tgtSeparator);
|
|
232
|
-
else if ((this.isFasta() || this.isSeparator()) && this.toHelm(
|
|
207
|
+
else if ((this.isFasta() || this.isSeparator()) && this.toHelm(tgtNotation))
|
|
233
208
|
return this.convertToHelm();
|
|
234
|
-
else if (this.isSeparator() && this.toFasta(
|
|
209
|
+
else if (this.isSeparator() && this.toFasta(tgtNotation))
|
|
235
210
|
return this.convertSeparatorToFasta();
|
|
236
|
-
else if (this.isHelm() && this.toFasta(
|
|
237
|
-
return this.
|
|
238
|
-
else
|
|
239
|
-
return this.
|
|
211
|
+
else if (this.isHelm() && this.toFasta(tgtNotation)) // the case of HELM
|
|
212
|
+
return this.convertHelm(tgtNotation);
|
|
213
|
+
else // this.isHelm() && this.toSeparator(tgtNotation)
|
|
214
|
+
return this.convertHelm(tgtNotation, tgtSeparator);
|
|
240
215
|
}
|
|
241
216
|
}
|
|
242
|
-
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"notation-converter.js","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAe,OAAO,EAAC,MAAM,qBAAqB,CAAC;AAS1D,iFAAiF;AACjF,MAAM,OAAO,iBAAiB;IA+Q5B,YAAmB,GAAc;QA1QzB,2BAAsB,GAAG;YAC/B,IAAI,EAAE,GAAG;YACT,SAAS,EAAE,EAAE;YACb,KAAK,EAAE,GAAG;SACX,CAAC;QAEM,cAAS,GAAwB,IAAI,CAAC;QAqQ5C,IAAI,CAAC,aAAa,GAAG,GAAG,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrD,IAAI,KAAK,KAAK,IAAI;YAChB,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;;YAE1B,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACvD,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAChD,IAAI,CAAC,iBAAiB,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;YAC7E,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;gBAClD,IAAI,CAAC,sBAAsB,CAAC,SAAS,CAAC;IAC5C,CAAC;IA9QD,IAAc,QAAQ;QACpB,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI,EAAE;YAC3B,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,oBAAoB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;SACnE;QACD,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAAA,CAAC;IAGF,IAAY,WAAW,KAAa,OAAO,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;IAE/D,IAAY,YAAY,KAAgB,OAAO,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC;IAEpE,IAAW,cAAc,KAAe,OAAO,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC;IAEtE,IAAW,gBAAgB,KAAa,OAAO,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC;IAExE,IAAW,SAAS;QAClB,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QACxD,IAAI,SAAS,KAAK,IAAI;YACpB,OAAO,SAAS,CAAC;;YAEjB,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;IACzC,CAAC;IAEM,OAAO,KAAc,OAAO,IAAI,CAAC,cAAc,iCAAmB,CAAC,CAAC,CAAC;IAErE,WAAW,KAAc,OAAO,IAAI,CAAC,cAAc,yCAAuB,CAAC,CAAC,CAAC;IAE7E,MAAM,KAAc,OAAO,IAAI,CAAC,cAAc,+BAAkB,CAAC,CAAC,CAAC;IAEnE,OAAO,CAAC,cAAwB,IAAa,OAAO,cAAc,iCAAmB,CAAC,CAAC,CAAC;IAExF,WAAW,CAAC,cAAwB,IAAa,OAAO,cAAc,yCAAuB,CAAC,CAAC,CAAC;IAEhG,MAAM,CAAC,cAAwB,IAAa,OAAO,cAAc,+BAAkB,CAAC,CAAC,CAAC;IAEtF,KAAK,KAAc,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE3E,KAAK,KAAc,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE3E,SAAS,KAAc,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE9E,mBAAmB,CAAC,GAAW,EAAE,iBAAyB,GAAG,EAAE,gBAAwB,GAAG;QAC/F,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACxB,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;oBAChC,SAAS,CAAC,CAAC,6CAA6C;QAE9D,IAAI,MAAM,KAAK,SAAS;YACtB,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,CAAC;QACvB,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACzC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QACxD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QAE3F,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,gBAAgB,GAAa,YAAY,CAAC,GAAG,CAAC,CAAC,EAAU,EAAE,EAAE;YACjE,IAAI,EAAE,KAAK,cAAc,EAAE;gBACzB,OAAO,aAAa,CAAC;aACtB;iBAAM;gBACL,OAAO,GAAG,WAAW,GAAG,EAAE,GAAG,YAAY,EAAE,CAAC;aAC7C;QACH,CAAC,CAAC,CAAC;QACH,OAAO,GAAG,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;IAC5D,CAAC;IAED,4DAA4D;IAC5D;;OAEG;IACK,iBAAiB;QACvB,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC;YACpD,oCAAsB;aACnB,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC;YAC7D,4CAA0B;aACvB,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC;YACxD,kCAAqB;;YAErB,MAAM,IAAI,KAAK,CAAC,6DAA6D,CAAC,CAAC;IACnF,CAAC;IAED;;;;;;OAMG;IACK,YAAY,CAAC,cAAwB;QAC3C,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,CAAC;QAC9B,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC;QACvB,MAAM,IAAI,GAAG,cAAc,CAAC,WAAW,EAAE,GAAG,GAAG,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC;QACjE,MAAM,UAAU,GAAG,GAAG,CAAC,SAAS,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAC7D,aAAa;QACb,MAAM,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,UAAU,EAAE,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QACpF,SAAS,CAAC,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC;QAC7C,SAAS,CAAC,MAAM,CACd,EAAE,CAAC,IAAI,CAAC,KAAK,EACb,IAAI,CAAC,WAAW,CAAC,OAAO,CACtB,IAAI,CAAC,cAAc,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE,EAC5C,cAAc,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE,CACxC,CACF,CAAC;QACF,6CAA6C;QAC7C,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE;YAChC,SAAS,CAAC,MAAM,CACd,EAAE,CAAC,IAAI,CAAC,aAAa,EACrB,eAAe,CAAC,CAAC;SACpB;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;OAMG;IACK,uBAAuB,CAAC,SAAiB,EAAE,iBAAgC,IAAI;QACrF,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC;QACzC,yEAAyE;QACzE,eAAe;QACf,MAAM,QAAQ,GAAG,OAAO,CAAC,oBAAoB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAEjE,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,sCAAoB,CAAC;QACxD,sDAAsD;QACtD,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAChD,MAAM,kBAAkB,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC;YAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,kBAAkB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAClD,IAAI,kBAAkB,CAAC,CAAC,CAAC,KAAK,cAAc;oBAC1C,kBAAkB,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,sBAAsB,CAAC,SAAS,CAAC;aACjE;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QACzC,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,aAAa,CAAC,kBAAiC,IAAI;QACzD,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAC1C,yEAAyE;QACzE,eAAe;QACf,MAAM,QAAQ,GAAG,OAAO,CAAC,oBAAoB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAEjE,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACxB,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;oBAChC,SAAS,CAAC,CAAC,6CAA6C;QAE9D,IAAI,MAAM,KAAK,SAAS;YACtB,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,CAAC;QACvB,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACzC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QACxD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QAE3F,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,4BAAe,CAAC;QACnD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACjD,MAAM,mBAAmB,GAAG,QAAQ,CAAC,aAAa,CAAC,CAAC;YACpD,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;YAC3B,IAAI,cAAc,GAAG,IAAI,CAAC;YAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,mBAAmB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,GAAG,GAAG,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;gBACtC,IAAI,KAAK,GAAG,mBAAmB,CAAC,CAAC,CAAC,CAAC;gBACnC,IAAI,KAAK,KAAK,eAAe;oBAC3B,KAAK,GAAG,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC;gBAC3C,MAAM,IAAI,GAAG,CAAC,GAAG,EAAE,WAAW,EAAE,KAAK,EAAE,YAAY,CAAC,CAAC;gBACrD,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC9B,cAAc,GAAG,KAAK,CAAC;aACxB;YACD,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,OAAO,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC5B,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,uBAAuB,CAAC,iBAAgC,IAAI;QAClE,uBAAuB;QACvB,8DAA8D;QAC9D,0DAA0D;QAC1D,kEAAkE;QAClE,qCAAqC;QACrC,aAAa;QACb,iDAAiD;QAEjD,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,sBAAsB,CAAC,KAAK,CAAC;QAErD,yEAAyE;QACzE,eAAe;QACf,MAAM,QAAQ,GAAG,OAAO,CAAC,oBAAoB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAEjE,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,8BAAgB,CAAC;QACpD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,gBAAgB,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACpD,sCAAsC;YACtC,MAAM,mBAAmB,GAAG,QAAQ,CAAC,gBAAgB,CAAC,CAAC;YACvD,MAAM,kBAAkB,GAAa,EAAE,CAAC;YACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,mBAAmB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,IAAI,GAAG,mBAAmB,CAAC,CAAC,CAAC,CAAC;gBACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE;oBACrB,kBAAkB,CAAC,IAAI,CAAC,cAAe,CAAC,CAAC;iBAC1C;qBAAM,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBAC1B,wCAAwC;oBACxC,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAClC;qBAAM;oBACL,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC/B;aACF;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,kBAAkB;QACxB,uBAAuB;QACvB,OAAO,IAAI,CAAC,YAAY,8BAAgB,CAAC;IAC3C,CAAC;IAEO,sBAAsB;QAC5B,iDAAiD;QACjD,OAAO,IAAI,CAAC,YAAY,sCAAoB,CAAC;IAC/C,CAAC;IAED;;;;;OAKG;IACI,OAAO,CAAC,cAAwB,EAAE,eAA8B,IAAI;QACzE,sBAAsB;QACtB,IAAI,IAAI,CAAC,cAAc,KAAK,cAAc;YACxC,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAChD,IAAI,IAAI,CAAC,WAAW,CAAC,cAAc,CAAC,IAAI,YAAY,KAAK,IAAI;YAC3D,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEvD,IAAI,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,CAAC,cAAc,CAAC,IAAI,YAAY,KAAK,IAAI;YAC7E,OAAO,IAAI,CAAC,uBAAuB,CAAC,YAAY,CAAC,CAAC;aAC/C,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC;YAC5E,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;aACzB,IAAI,IAAI,CAAC,WAAW,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC;YACzD,OAAO,IAAI,CAAC,uBAAuB,EAAE,CAAC;aACnC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC;YACpD,OAAO,IAAI,CAAC,kBAAkB,EAAE,CAAC;;YAEjC,OAAO,IAAI,CAAC,sBAAsB,EAAE,CAAC;IACzC,CAAC;CAcF","sourcesContent":["import * as DG from 'datagrok-api/dg';\nimport {SplitterFunc, WebLogo} from '../viewers/web-logo';\n\n/** enum type to simplify setting \"user-friendly\" notation if necessary */\nexport const enum NOTATION {\n  FASTA = 'FASTA',\n  SEPARATOR = 'SEPARATOR',\n  HELM = 'HELM'\n}\n\n/** Class for handling conversion of notation systems in Macromolecule columns */\nexport class NotationConverter {\n  private readonly _sourceColumn: DG.Column; // the column to be converted\n  private _sourceUnits: string; // units, of the form fasta:SEQ:NT, etc.\n  private _sourceNotation: NOTATION; // current notation (without :SEQ:NT, etc.)\n  private _defaultGapSymbol: string;\n  private _defaultGapSymbolsDict = {\n    HELM: '*',\n    SEPARATOR: '',\n    FASTA: '-',\n  };\n\n  private _splitter: SplitterFunc | null = null;\n  protected get splitter(): SplitterFunc {\n    if (this._splitter === null) {\n      this._splitter = WebLogo.getSplitterForColumn(this._sourceColumn);\n    }\n    return this._splitter;\n  };\n\n\n  private get sourceUnits(): string { return this._sourceUnits; }\n\n  private get sourceColumn(): DG.Column { return this._sourceColumn; }\n\n  public get sourceNotation(): NOTATION { return this._sourceNotation; }\n\n  public get defaultGapSymbol(): string { return this._defaultGapSymbol; }\n\n  public get separator(): string {\n    const separator = this.sourceColumn.getTag('separator');\n    if (separator !== null)\n      return separator;\n    else\n      throw new Error('Separator not set');\n  }\n\n  public isFasta(): boolean { return this.sourceNotation === NOTATION.FASTA; }\n\n  public isSeparator(): boolean { return this.sourceNotation === NOTATION.SEPARATOR; }\n\n  public isHelm(): boolean { return this.sourceNotation === NOTATION.HELM; }\n\n  public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }\n\n  public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }\n\n  public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }\n\n  public isRna(): boolean { return this.sourceUnits.toLowerCase().endsWith('rna'); }\n\n  public isDna(): boolean { return this.sourceUnits.toLowerCase().endsWith('dna'); }\n\n  public isPeptide(): boolean { return this.sourceUnits.toLowerCase().endsWith('pt'); }\n\n  public convertStringToHelm(src: string, fastaGapSymbol: string = '-', helmGapSymbol: string = '*') {\n    const prefix = (this.isDna()) ? 'DNA1{' :\n      (this.isRna()) ? 'RNA1{' :\n        (this.isPeptide()) ? 'PEPTIDE1{' :\n          'Unknown'; // this case should be handled as exceptional\n\n    if (prefix === 'Unknown')\n      throw new Error('Neither peptide, nor nucleotide');\n\n    const postfix = '}$$$';\n    const leftWrapper = (this.isDna()) ? 'D(' :\n      (this.isRna()) ? 'R(' : ''; // no wrapper for peptides\n    const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides\n\n    const monomerArray = this.splitter(src);\n    const monomerHelmArray: string[] = monomerArray.map((mm: string) => {\n      if (mm === fastaGapSymbol) {\n        return helmGapSymbol;\n      } else {\n        return `${leftWrapper}${mm}${rightWrapper}`;\n      }\n    });\n    return `${prefix}${monomerHelmArray.join('.')}${postfix}`;\n  }\n\n  /** Associate notation types with the corresponding units */\n  /**\n   * @return {NOTATION}     Notation associated with the units type\n   */\n  private getSourceNotation(): NOTATION {\n    if (this.sourceUnits.toLowerCase().startsWith('fasta'))\n      return NOTATION.FASTA;\n    else if (this.sourceUnits.toLowerCase().startsWith('separator'))\n      return NOTATION.SEPARATOR;\n    else if (this.sourceUnits.toLowerCase().startsWith('helm'))\n      return NOTATION.HELM;\n    else\n      throw new Error('The column has units that do not correspond to any notation');\n  }\n\n  /**\n   * Create a new empty column of the specified notation type and the same\n   * length as sourceColumn\n   *\n   * @param {NOTATION} targetNotation\n   * @return {DG.Column}\n   */\n  private getNewColumn(targetNotation: NOTATION): DG.Column {\n    const col = this.sourceColumn;\n    const len = col.length;\n    const name = targetNotation.toLowerCase() + '(' + col.name + ')';\n    const newColName = col.dataFrame.columns.getUnusedName(name);\n    // dummy code\n    const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));\n    newColumn.semType = DG.SEMTYPE.MACROMOLECULE;\n    newColumn.setTag(\n      DG.TAGS.UNITS,\n      this.sourceUnits.replace(\n        this.sourceNotation.toLowerCase().toString(),\n        targetNotation.toLowerCase().toString()\n      )\n    );\n    // TODO: specify cell renderers for all cases\n    if (this.toFasta(targetNotation)) {\n      newColumn.setTag(\n        DG.TAGS.CELL_RENDERER,\n        'Macromolecule');\n    }\n    return newColumn;\n  }\n\n  /**\n   * Convert a Macromolecule column from FASTA to SEPARATOR notation\n   *\n   * @param {string} separator  A specific separator to be used\n   * @param {string} fastaGapSymbol  Gap symbol in FASTA, '-' by default\n   * @return {DG.Column}        A new column in SEPARATOR notation\n   */\n  private convertFastaToSeparator(separator: string, fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this.defaultGapSymbol;\n    // A function splitting a sequence into an array of monomers according to\n    // its notation\n    const splitter = WebLogo.getSplitterForColumn(this.sourceColumn);\n\n    const newColumn = this.getNewColumn(NOTATION.SEPARATOR);\n    // assign the values to the newly created empty column\n    newColumn.init((idx: number) => {\n      const fastaPolymer = this.sourceColumn.get(idx);\n      const fastaMonomersArray = splitter(fastaPolymer);\n      for (let i = 0; i < fastaMonomersArray.length; i++) {\n        if (fastaMonomersArray[i] === fastaGapSymbol)\n          fastaMonomersArray[i] = this._defaultGapSymbolsDict.SEPARATOR;\n      }\n      return fastaMonomersArray.join(separator);\n    });\n    newColumn.setTag('separator', separator);\n    return newColumn;\n  }\n\n  private convertToHelm(sourceGapSymbol: string | null = null) {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n    // A function splitting a sequence into an array of monomers according to\n    // its notation\n    const splitter = WebLogo.getSplitterForColumn(this.sourceColumn);\n\n    const prefix = (this.isDna()) ? 'DNA1{' :\n      (this.isRna()) ? 'RNA1{' :\n        (this.isPeptide()) ? 'PEPTIDE1{' :\n          'Unknown'; // this case should be handled as exceptional\n\n    if (prefix === 'Unknown')\n      throw new Error('Neither peptide, nor nucleotide');\n\n    const postfix = '}$$$';\n    const leftWrapper = (this.isDna()) ? 'D(' :\n      (this.isRna()) ? 'R(' : ''; // no wrapper for peptides\n    const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides\n\n    const newColumn = this.getNewColumn(NOTATION.HELM);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const sourcePolymer = this.sourceColumn.get(idx);\n      const sourceMonomersArray = splitter(sourcePolymer);\n      const helmArray = [prefix];\n      let firstIteration = true;\n      for (let i = 0; i < sourceMonomersArray.length; i++) {\n        const dot = firstIteration ? '' : '.';\n        let token = sourceMonomersArray[i];\n        if (token === sourceGapSymbol)\n          token = this._defaultGapSymbolsDict.HELM;\n        const item = [dot, leftWrapper, token, rightWrapper];\n        helmArray.push(item.join(''));\n        firstIteration = false;\n      }\n      helmArray.push(postfix);\n      return helmArray.join('');\n    });\n    return newColumn;\n  }\n\n  private convertSeparatorToFasta(fastaGapSymbol: string | null = null): DG.Column {\n    // TODO: implementation\n    // * similarly to fasta2separator, divide string into monomers\n    // * adjacent separators is a gap (symbol to be specified)\n    // * the monomers MUST be single-character onles, otherwise forbid\n    // * NO, they can be multi-characters\n    // conversion\n    // * consider automatic determining the separator\n\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this._defaultGapSymbolsDict.FASTA;\n\n    // A function splitting a sequence into an array of monomers according to\n    // its notation\n    const splitter = WebLogo.getSplitterForColumn(this.sourceColumn);\n\n    const newColumn = this.getNewColumn(NOTATION.FASTA);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const separatorPolymer = this.sourceColumn.get(idx);\n      // items can be monomers or separators\n      const separatorItemsArray = splitter(separatorPolymer);\n      const fastaMonomersArray: string[] = [];\n      for (let i = 0; i < separatorItemsArray.length; i++) {\n        const item = separatorItemsArray[i];\n        if (item.length === 0) {\n          fastaMonomersArray.push(fastaGapSymbol!);\n        } else if (item.length > 1) {\n          // the case of a multi-character monomer\n          const monomer = '[' + item + ']';\n          fastaMonomersArray.push(monomer);\n        } else {\n          fastaMonomersArray.push(item);\n        }\n      }\n      return fastaMonomersArray.join('');\n    });\n    return newColumn;\n  }\n\n  private convertHelmToFasta(): DG.Column {\n    // TODO: implementation\n    return this.getNewColumn(NOTATION.FASTA);\n  }\n\n  private convertHelmToSeparator(): DG.Column {\n    // TODO: implementatioreturn this.getNewColumn();\n    return this.getNewColumn(NOTATION.SEPARATOR);\n  }\n\n  /** Dispatcher method for notation conversion\n   *\n   * @param {NOTATION} targetNotation   Notation we want to convert to\n   * @param {string | null} tgtSeparator   Possible separator\n   * @return {DG.Column}                Converted column\n   */\n  public convert(targetNotation: NOTATION, tgtSeparator: string | null = null): DG.Column {\n    // possible exceptions\n    if (this.sourceNotation === targetNotation)\n      throw new Error('Target notation is invalid');\n    if (this.toSeparator(targetNotation) && tgtSeparator === null)\n      throw new Error('Target separator is not specified');\n\n    if (this.isFasta() && this.toSeparator(targetNotation) && tgtSeparator !== null)\n      return this.convertFastaToSeparator(tgtSeparator);\n    else if ((this.isFasta() || this.isSeparator()) && this.toHelm(targetNotation))\n      return this.convertToHelm();\n    else if (this.isSeparator() && this.toFasta(targetNotation))\n      return this.convertSeparatorToFasta();\n    else if (this.isHelm() && this.toFasta(targetNotation))\n      return this.convertHelmToFasta();\n    else\n      return this.convertHelmToSeparator();\n  }\n\n  public constructor(col: DG.Column) {\n    this._sourceColumn = col;\n    const units = this._sourceColumn.tags[DG.TAGS.UNITS];\n    if (units !== null)\n      this._sourceUnits = units;\n    else\n      throw new Error('Units are not specified in column');\n    this._sourceNotation = this.getSourceNotation();\n    this._defaultGapSymbol = (this.isFasta()) ? this._defaultGapSymbolsDict.FASTA :\n      (this.isHelm()) ? this._defaultGapSymbolsDict.HELM :\n        this._defaultGapSymbolsDict.SEPARATOR;\n  }\n}\n"]}
|
|
217
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"notation-converter.js","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAIA,OAAO,EAAe,OAAO,EAAC,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAC,YAAY,EAAW,MAAM,iBAAiB,CAAC;AAEvD,iFAAiF;AACjF,MAAM,OAAO,iBAAkB,SAAQ,YAAY;IAkPjD,YAAmB,GAAc;QAC/B,KAAK,CAAC,GAAG,CAAC,CAAC;QAlPL,cAAS,GAAwB,IAAI,CAAC;IAmP9C,CAAC;IAlPD,IAAc,QAAQ;QACpB,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI;YACzB,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,oBAAoB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7D,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAAA,CAAC;IAEK,OAAO,CAAC,cAAwB,IAAa,OAAO,cAAc,iCAAmB,CAAC,CAAC,CAAC;IAExF,WAAW,CAAC,cAAwB,IAAa,OAAO,cAAc,yCAAuB,CAAC,CAAC,CAAC;IAEhG,MAAM,CAAC,cAAwB,IAAa,OAAO,cAAc,+BAAkB,CAAC,CAAC,CAAC;IAE7F;;;;;;OAMG;IACK,uBAAuB,CAAC,SAAiB,EAAE,iBAAgC,IAAI;QACrF,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAEzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,sCAAoB,CAAC;QACxD,sDAAsD;QACtD,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC1C,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,kBAAkB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAClD,IAAI,kBAAkB,CAAC,CAAC,CAAC,KAAK,cAAc;oBAC1C,kBAAkB,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,sBAAsB,CAAC,SAAS,CAAC;aACjE;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QACzC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,eAAe;QACrB,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACxB,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;oBAChC,SAAS,CAAC,CAAC,6CAA6C;QAE9D,IAAI,MAAM,KAAK,SAAS;YACtB,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,CAAC;QACvB,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACzC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QACxD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QAC3F,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,mDAAmD;IAC3C,mBAAmB,CACzB,aAAqB,EACrB,eAAuB,EACvB,MAAc,EACd,WAAmB,EACnB,YAAoB,EACpB,OAAe;QAEf,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,gBAAgB,GAAa,YAAY,CAAC,GAAG,CAAC,CAAC,EAAU,EAAE,EAAE;YACjE,IAAI,EAAE,KAAK,eAAe;gBACxB,OAAO,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC;;gBAExC,OAAO,GAAG,WAAW,GAAG,EAAE,GAAG,YAAY,EAAE,CAAC;QAChD,CAAC,CAAC,CAAC;QACH,OAAO,GAAG,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;IAC5D,CAAC;IAED;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAqB,EACrB,kBAAiC,IAAI;QAErC,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAC1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAC5E,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IAC9G,CAAC;IAED;;;;;OAKG;IACK,aAAa,CAAC,kBAAiC,IAAI;QACzD,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAE1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAE5E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,4BAAe,CAAC;QACnD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC3C,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAgB,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAC/G,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,uBAAuB,CAAC,iBAAgC,IAAI;QAClE,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,sBAAsB,CAAC,KAAK,CAAC;QAErD,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,8BAAgB,CAAC;QACpD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC9C,sCAAsC;YACtC,MAAM,mBAAmB,GAAG,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC;YAC5D,MAAM,kBAAkB,GAAa,EAAE,CAAC;YACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,mBAAmB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,IAAI,GAAG,mBAAmB,CAAC,CAAC,CAAC,CAAC;gBACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE;oBACrB,kBAAkB,CAAC,IAAI,CAAC,cAAe,CAAC,CAAC;iBAC1C;qBAAM,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBAC1B,wCAAwC;oBACxC,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAClC;qBAAM;oBACL,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC/B;aACF;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;;;OAQG;IACK,WAAW,CACjB,WAAmB,EACnB,eAAuB,EAAE,EACzB,eAA8B,IAAI;QAElC,mEAAmE;QACnE,wEAAwE;QACxE,IAAI,YAAY,KAAK,IAAI,EAAE;YACzB,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,CAAC,CAAC,CAAC;gBACtD,IAAI,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;gBACnC,IAAI,CAAC,sBAAsB,CAAC,SAAS,CAAC;SACzC;QAED,IAAI,IAAI,CAAC,WAAW,CAAC,WAAuB,CAAC,IAAI,YAAY,KAAK,EAAE;YAClE,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC;QAEhC,MAAM,cAAc,GAAG,iBAAiB,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,WAAuB,CAAC,CAAC;QAC7D,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAEzC,mEAAmE;YACnE,mEAAmE;YACnE,0CAA0C;YAC1C,MAAM,YAAY,GAAG,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEpF,iCAAiC;YACjC,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;YAClD,MAAM,gBAAgB,GAAa,EAAE,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC9C,IAAI,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;gBAC7B,IAAI,YAAY;oBACd,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;gBAC1C,IAAI,IAAI,KAAK,IAAI,CAAC,sBAAsB,CAAC,IAAI,EAAE;oBAC7C,gBAAgB,CAAC,IAAI,CAAC,YAAa,CAAC,CAAC;iBACtC;qBAAM,IAAI,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBACnE,2DAA2D;oBAC3D,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAChC;qBAAM;oBACL,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC7B;aACF;YACD,OAAO,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,sBAAsB;QAC5B,iDAAiD;QACjD,OAAO,IAAI,CAAC,YAAY,sCAAoB,CAAC;IAC/C,CAAC;IAED;;;;;OAKG;IACI,OAAO,CAAC,WAAqB,EAAE,eAA8B,IAAI;QACtE,sBAAsB;QACtB,IAAI,IAAI,CAAC,QAAQ,KAAK,WAAW;YAC/B,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YACxD,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAEpD,IAAI,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YAC1E,OAAO,IAAI,CAAC,uBAAuB,CAAC,YAAY,CAAC,CAAC;aAC/C,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC;YACzE,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;aACzB,IAAI,IAAI,CAAC,WAAW,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC;YACtD,OAAO,IAAI,CAAC,uBAAuB,EAAE,CAAC;aACnC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,mBAAmB;YACtE,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;aAClC,iDAAiD;YACpD,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,YAAa,CAAC,CAAC;IACxD,CAAC;CAKF","sourcesContent":["/* Do not change these import lines to match external modules in webpack configuration */\nimport * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\nimport {SplitterFunc, WebLogo} from '../viewers/web-logo';\nimport {UnitsHandler, NOTATION} from './units-handler';\n\n/** Class for handling conversion of notation systems in Macromolecule columns */\nexport class NotationConverter extends UnitsHandler {\n  private _splitter: SplitterFunc | null = null;\n  protected get splitter(): SplitterFunc {\n    if (this._splitter === null)\n      this._splitter = WebLogo.getSplitterForColumn(this.column);\n    return this._splitter;\n  };\n\n  public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }\n\n  public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }\n\n  public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }\n\n  /**\n   * Convert a Macromolecule column from FASTA to SEPARATOR notation\n   *\n   * @param {string} separator  A specific separator to be used\n   * @param {string} fastaGapSymbol  Gap symbol in FASTA, '-' by default\n   * @return {DG.Column}        A new column in SEPARATOR notation\n   */\n  private convertFastaToSeparator(separator: string, fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this.defaultGapSymbol;\n\n    const newColumn = this.getNewColumn(NOTATION.SEPARATOR);\n    // assign the values to the newly created empty column\n    newColumn.init((idx: number) => {\n      const fastaPolymer = this.column.get(idx);\n      const fastaMonomersArray = this.splitter(fastaPolymer);\n      for (let i = 0; i < fastaMonomersArray.length; i++) {\n        if (fastaMonomersArray[i] === fastaGapSymbol)\n          fastaMonomersArray[i] = this._defaultGapSymbolsDict.SEPARATOR;\n      }\n      return fastaMonomersArray.join(separator);\n    });\n    newColumn.setTag('separator', separator);\n    return newColumn;\n  }\n\n  /**\n   * Get the wrapper strings for HELM, depending on the type of the\n   * macromolecule (peptide, DNA, RNA)\n   *\n   * @return {string[]} Array of wrappers\n   */\n  private getHelmWrappers(): string[] {\n    const prefix = (this.isDna()) ? 'DNA1{' :\n      (this.isRna()) ? 'RNA1{' :\n        (this.isPeptide()) ? 'PEPTIDE1{' :\n          'Unknown'; // this case should be handled as exceptional\n\n    if (prefix === 'Unknown')\n      throw new Error('Neither peptide, nor nucleotide');\n\n    const postfix = '}$$$';\n    const leftWrapper = (this.isDna()) ? 'D(' :\n      (this.isRna()) ? 'R(' : ''; // no wrapper for peptides\n    const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides\n    return [prefix, leftWrapper, rightWrapper, postfix];\n  }\n\n  // A helper function for converting strings to HELM\n  private convertToHelmHelper(\n    sourcePolymer: string,\n    sourceGapSymbol: string,\n    prefix: string,\n    leftWrapper: string,\n    rightWrapper: string,\n    postfix: string\n  ): string {\n    const monomerArray = this.splitter(sourcePolymer);\n    const monomerHelmArray: string[] = monomerArray.map((mm: string) => {\n      if (mm === sourceGapSymbol)\n        return this._defaultGapSymbolsDict.HELM;\n      else\n        return `${leftWrapper}${mm}${rightWrapper}`;\n    });\n    return `${prefix}${monomerHelmArray.join('.')}${postfix}`;\n  }\n\n  /**\n   * Convert a string with SEPARATOR/FASTA notation to HELM\n   *\n   * @param {string} sourcePolymer  A string to be converted\n   * @param {string | null} sourceGapSymbol  An optional gap symbol, set to\n   * default values ('-' for FASTA and '' for SEPARATOR) unless specified\n   * @return {string}  The target HELM string\n   */\n  public convertStringToHelm(\n    sourcePolymer: string,\n    sourceGapSymbol: string | null = null\n  ) : string {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n    return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix);\n  }\n\n  /**\n   * Convert a column to HELM\n   *\n   * @param {string | null} sourceGapSymbol\n   * @return {DG.Column}\n   */\n  private convertToHelm(sourceGapSymbol: string | null = null): DG.Column {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n\n    const newColumn = this.getNewColumn(NOTATION.HELM);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const sourcePolymer = this.column.get(idx);\n      return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol!, prefix, leftWrapper, rightWrapper, postfix);\n    });\n    return newColumn;\n  }\n\n  /**\n   * Convert SEPARATOR column to FASTA notation\n   *\n   * @param {string | null} fastaGapSymbol Optional gap symbol for FASTA\n   * @return {DG.Column}  Converted column\n   */\n  private convertSeparatorToFasta(fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this._defaultGapSymbolsDict.FASTA;\n\n    const newColumn = this.getNewColumn(NOTATION.FASTA);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const separatorPolymer = this.column.get(idx);\n      // items can be monomers or separators\n      const separatorItemsArray = this.splitter(separatorPolymer);\n      const fastaMonomersArray: string[] = [];\n      for (let i = 0; i < separatorItemsArray.length; i++) {\n        const item = separatorItemsArray[i];\n        if (item.length === 0) {\n          fastaMonomersArray.push(fastaGapSymbol!);\n        } else if (item.length > 1) {\n          // the case of a multi-character monomer\n          const monomer = '[' + item + ']';\n          fastaMonomersArray.push(monomer);\n        } else {\n          fastaMonomersArray.push(item);\n        }\n      }\n      return fastaMonomersArray.join('');\n    });\n    return newColumn;\n  }\n\n  /**\n   *  Convert HELM column to FASTA/SEPARATOR\n   *\n   * @param {string} tgtNotation    Target notation: FASTA or SEPARATOR\n   * @param {string} tgtSeparator   Optional target separator (for HELM ->\n   * @param {string | null} tgtGapSymbol   Optional target gap symbol\n   * SEPARATOR)\n   * @return {DG.Column} Converted column\n   */\n  private convertHelm(\n    tgtNotation: string,\n    tgtSeparator: string = '',\n    tgtGapSymbol: string | null = null\n  ): DG.Column {\n    // This function must not contain calls of isDna() and isRna(), for\n    // source helm columns may contain RNA, DNA and PT across different rows\n    if (tgtGapSymbol === null) {\n      tgtGapSymbol = (this.toFasta(tgtNotation as NOTATION)) ?\n        this._defaultGapSymbolsDict.FASTA :\n        this._defaultGapSymbolsDict.SEPARATOR;\n    }\n\n    if (this.toSeparator(tgtNotation as NOTATION) && tgtSeparator === '')\n      tgtSeparator = this.separator;\n\n    const helmWrappersRe = /(R\\(|D\\(|\\)|P)/g;\n    const newColumn = this.getNewColumn(tgtNotation as NOTATION);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const helmPolymer = this.column.get(idx);\n\n      // we cannot use isDna() or isRna() because source helm columns can\n      // contain DNA, RNA and PT in different cells, so the corresponding\n      // tags cannot be set for the whole column\n      const isNucleotide = helmPolymer.startsWith('DNA') || helmPolymer.startsWith('RNA');\n\n      // items can be monomers or helms\n      const helmItemsArray = this.splitter(helmPolymer);\n      const tgtMonomersArray: string[] = [];\n      for (let i = 0; i < helmItemsArray.length; i++) {\n        let item = helmItemsArray[i];\n        if (isNucleotide)\n          item = item.replace(helmWrappersRe, '');\n        if (item === this._defaultGapSymbolsDict.HELM) {\n          tgtMonomersArray.push(tgtGapSymbol!);\n        } else if (this.toFasta(tgtNotation as NOTATION) && item.length > 1) {\n          // the case of a multi-character monomer converted to FASTA\n          const monomer = '[' + item + ']';\n          tgtMonomersArray.push(monomer);\n        } else {\n          tgtMonomersArray.push(item);\n        }\n      }\n      return tgtMonomersArray.join(tgtSeparator);\n    });\n    return newColumn;\n  }\n\n  private convertHelmToSeparator(): DG.Column {\n    // TODO: implementatioreturn this.getNewColumn();\n    return this.getNewColumn(NOTATION.SEPARATOR);\n  }\n\n  /** Dispatcher method for notation conversion\n   *\n   * @param {NOTATION} tgtNotation   Notation we want to convert to\n   * @param {string | null} tgtSeparator   Possible separator\n   * @return {DG.Column}                Converted column\n   */\n  public convert(tgtNotation: NOTATION, tgtSeparator: string | null = null): DG.Column {\n    // possible exceptions\n    if (this.notation === tgtNotation)\n      throw new Error('tgt notation is invalid');\n    if (this.toSeparator(tgtNotation) && tgtSeparator === null)\n      throw new Error('tgt separator is not specified');\n\n    if (this.isFasta() && this.toSeparator(tgtNotation) && tgtSeparator !== null)\n      return this.convertFastaToSeparator(tgtSeparator);\n    else if ((this.isFasta() || this.isSeparator()) && this.toHelm(tgtNotation))\n      return this.convertToHelm();\n    else if (this.isSeparator() && this.toFasta(tgtNotation))\n      return this.convertSeparatorToFasta();\n    else if (this.isHelm() && this.toFasta(tgtNotation)) // the case of HELM\n      return this.convertHelm(tgtNotation);\n    else // this.isHelm() && this.toSeparator(tgtNotation)\n      return this.convertHelm(tgtNotation, tgtSeparator!);\n  }\n\n  public constructor(col: DG.Column) {\n    super(col);\n  }\n}\n"]}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
/** enum type to simplify setting "user-friendly" notation if necessary */
|
|
3
|
+
export declare const enum NOTATION {
|
|
4
|
+
FASTA = "FASTA",
|
|
5
|
+
SEPARATOR = "SEPARATOR",
|
|
6
|
+
HELM = "HELM"
|
|
7
|
+
}
|
|
8
|
+
/** Class for handling notation units in Macromolecule columns */
|
|
9
|
+
export declare class UnitsHandler {
|
|
10
|
+
protected readonly _column: DG.Column;
|
|
11
|
+
protected _units: string;
|
|
12
|
+
protected _notation: NOTATION;
|
|
13
|
+
protected _defaultGapSymbol: string;
|
|
14
|
+
protected _defaultGapSymbolsDict: {
|
|
15
|
+
HELM: string;
|
|
16
|
+
SEPARATOR: string;
|
|
17
|
+
FASTA: string;
|
|
18
|
+
};
|
|
19
|
+
protected get units(): string;
|
|
20
|
+
protected get column(): DG.Column;
|
|
21
|
+
get notation(): NOTATION;
|
|
22
|
+
get defaultGapSymbol(): string;
|
|
23
|
+
get separator(): string;
|
|
24
|
+
isFasta(): boolean;
|
|
25
|
+
isSeparator(): boolean;
|
|
26
|
+
isHelm(): boolean;
|
|
27
|
+
isRna(): boolean;
|
|
28
|
+
isDna(): boolean;
|
|
29
|
+
isPeptide(): boolean;
|
|
30
|
+
/** Associate notation types with the corresponding units */
|
|
31
|
+
/**
|
|
32
|
+
* @return {NOTATION} Notation associated with the units type
|
|
33
|
+
*/
|
|
34
|
+
protected getNotation(): NOTATION;
|
|
35
|
+
/**
|
|
36
|
+
* Create a new empty column of the specified notation type and the same
|
|
37
|
+
* length as column
|
|
38
|
+
*
|
|
39
|
+
* @param {NOTATION} targetNotation
|
|
40
|
+
* @return {DG.Column}
|
|
41
|
+
*/
|
|
42
|
+
protected getNewColumn(targetNotation: NOTATION): DG.Column;
|
|
43
|
+
/**
|
|
44
|
+
* Create a new empty column using templateCol as a template
|
|
45
|
+
*
|
|
46
|
+
* @param {DG.Column} templateCol the properties and units of this column are used as a
|
|
47
|
+
* template to build the new one
|
|
48
|
+
* @return {DG.Column}
|
|
49
|
+
*/
|
|
50
|
+
static getNewColumn(templateCol: DG.Column): DG.Column;
|
|
51
|
+
/**
|
|
52
|
+
* A helper function checking the validity of the 'units' string
|
|
53
|
+
*
|
|
54
|
+
* @param {string} units the string to be validated
|
|
55
|
+
* @return {boolean}
|
|
56
|
+
*/
|
|
57
|
+
static unitsStringIsValid(units: string): boolean;
|
|
58
|
+
/**
|
|
59
|
+
* Construct a new column of semantic type MACROMOLECULE from the list of
|
|
60
|
+
* specified parameters
|
|
61
|
+
*
|
|
62
|
+
* @param {number} len the length of the new column
|
|
63
|
+
* @param {string} name the name of the new column
|
|
64
|
+
* @param {string} units the units of the new column
|
|
65
|
+
* @return {DG.Column}
|
|
66
|
+
*/
|
|
67
|
+
static getNewColumnFromParams(len: number, name: string, units: string): DG.Column;
|
|
68
|
+
constructor(col: DG.Column);
|
|
69
|
+
}
|
|
70
|
+
//# sourceMappingURL=units-handler.d.ts.map
|