@datagrok-libraries/bio 5.44.4 → 5.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/package.json +6 -6
- package/src/helm/consts.d.ts +2 -2
- package/src/helm/consts.d.ts.map +1 -1
- package/src/helm/consts.js.map +1 -1
- package/src/helm/helm-helper.d.ts +5 -5
- package/src/helm/helm-helper.d.ts.map +1 -1
- package/src/helm/types.d.ts +4 -3
- package/src/helm/types.d.ts.map +1 -1
- package/src/monomer-works/monomer-hover.d.ts +3 -2
- package/src/monomer-works/monomer-hover.d.ts.map +1 -1
- package/src/monomer-works/monomer-hover.js +4 -3
- package/src/monomer-works/monomer-hover.js.map +1 -1
- package/src/monomer-works/monomer-utils.d.ts +6 -5
- package/src/monomer-works/monomer-utils.d.ts.map +1 -1
- package/src/monomer-works/monomer-utils.js +10 -11
- package/src/monomer-works/monomer-utils.js.map +1 -1
- package/src/monomer-works/seq-to-molfile.d.ts +2 -2
- package/src/monomer-works/seq-to-molfile.d.ts.map +1 -1
- package/src/monomer-works/seq-to-molfile.js +2 -3
- package/src/monomer-works/seq-to-molfile.js.map +1 -1
- package/src/monomer-works/to-atomic-level-utils.d.ts +2 -2
- package/src/monomer-works/to-atomic-level-utils.d.ts.map +1 -1
- package/src/monomer-works/to-atomic-level-utils.js.map +1 -1
- package/src/monomer-works/to-atomic-level.d.ts +5 -5
- package/src/monomer-works/to-atomic-level.d.ts.map +1 -1
- package/src/monomer-works/to-atomic-level.js +17 -14
- package/src/monomer-works/to-atomic-level.js.map +1 -1
- package/src/types/index.d.ts +6 -4
- package/src/types/index.d.ts.map +1 -1
- package/src/unknown.d.ts.map +1 -1
- package/src/unknown.js +2 -2
- package/src/unknown.js.map +1 -1
- package/src/utils/cell-renderer-async-base.d.ts +1 -1
- package/src/utils/cell-renderer-async-base.d.ts.map +1 -1
- package/src/utils/cell-renderer-async-base.js +5 -3
- package/src/utils/cell-renderer-async-base.js.map +1 -1
- package/src/utils/cell-renderer-consts.d.ts +2 -1
- package/src/utils/cell-renderer-consts.d.ts.map +1 -1
- package/src/utils/cell-renderer-monomer-placer.d.ts +6 -4
- package/src/utils/cell-renderer-monomer-placer.d.ts.map +1 -1
- package/src/utils/cell-renderer-monomer-placer.js +59 -47
- package/src/utils/cell-renderer-monomer-placer.js.map +1 -1
- package/src/utils/fasta-handler.d.ts.map +1 -1
- package/src/utils/fasta-handler.js +0 -3
- package/src/utils/fasta-handler.js.map +1 -1
- package/src/utils/generator.d.ts +2 -1
- package/src/utils/generator.d.ts.map +1 -1
- package/src/utils/generator.js +2 -3
- package/src/utils/generator.js.map +1 -1
- package/src/utils/macromolecule/consts.d.ts +1 -0
- package/src/utils/macromolecule/consts.d.ts.map +1 -1
- package/src/utils/macromolecule/consts.js +1 -0
- package/src/utils/macromolecule/consts.js.map +1 -1
- package/src/utils/macromolecule/scoring.d.ts +4 -2
- package/src/utils/macromolecule/scoring.d.ts.map +1 -1
- package/src/utils/macromolecule/scoring.js +8 -8
- package/src/utils/macromolecule/scoring.js.map +1 -1
- package/src/utils/macromolecule/seq-handler.d.ts +47 -0
- package/src/utils/macromolecule/seq-handler.d.ts.map +1 -0
- package/src/utils/macromolecule/seq-handler.js +8 -0
- package/src/utils/macromolecule/seq-handler.js.map +1 -0
- package/src/utils/macromolecule/types.d.ts +2 -1
- package/src/utils/macromolecule/types.d.ts.map +1 -1
- package/src/utils/macromolecule/types.js.map +1 -1
- package/src/utils/macromolecule/utils.d.ts +2 -1
- package/src/utils/macromolecule/utils.d.ts.map +1 -1
- package/src/utils/macromolecule/utils.js +4 -5
- package/src/utils/macromolecule/utils.js.map +1 -1
- package/src/utils/seq-helper.d.ts +2 -0
- package/src/utils/seq-helper.d.ts.map +1 -1
- package/src/utils/seq-helper.js.map +1 -1
- package/src/utils/splitter.d.ts +2 -1
- package/src/utils/splitter.d.ts.map +1 -1
- package/src/utils/splitter.js +2 -3
- package/src/utils/splitter.js.map +1 -1
- package/src/viewers/helm-service.d.ts +3 -1
- package/src/viewers/helm-service.d.ts.map +1 -1
- package/src/viewers/helm-service.js +2 -1
- package/src/viewers/helm-service.js.map +1 -1
- package/src/utils/seq-handler.d.ts +0 -167
- package/src/utils/seq-handler.d.ts.map +0 -1
- package/src/utils/seq-handler.js +0 -748
- package/src/utils/seq-handler.js.map +0 -1
package/src/utils/seq-handler.js
DELETED
|
@@ -1,748 +0,0 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import wu from 'wu';
|
|
3
|
-
/* eslint-disable max-len */
|
|
4
|
-
import { ALPHABET, candidateAlphabets, getSplitterWithSeparator, NOTATION, positionSeparator, splitterAsFasta, splitterAsHelm, TAGS } from './macromolecule';
|
|
5
|
-
import { detectAlphabet, splitterAsFastaSimple, StringListSeqSplitted } from './macromolecule/utils';
|
|
6
|
-
import { mmDistanceFunctions, MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
7
|
-
import { getMonomerLibHelper } from '../monomer-works/monomer-utils';
|
|
8
|
-
import { HELM_WRAPPERS_REGEXP, PHOSPHATE_SYMBOL } from './const';
|
|
9
|
-
import { GAP_SYMBOL, GapOriginals } from './macromolecule/consts';
|
|
10
|
-
import { HelmTypes } from '../helm/consts';
|
|
11
|
-
/* eslint-enable max-len */
|
|
12
|
-
export const SeqTemps = new class {
|
|
13
|
-
constructor() {
|
|
14
|
-
/** Column's temp slot name for a SeqHandler object */
|
|
15
|
-
this.seqHandler = `seq-handler`;
|
|
16
|
-
this.notationProvider = `seq-handler.notation-provider`;
|
|
17
|
-
}
|
|
18
|
-
}();
|
|
19
|
-
/** Class for handling notation units in Macromolecule columns and
|
|
20
|
-
* conversion of notation systems in Macromolecule columns
|
|
21
|
-
*/
|
|
22
|
-
export class SeqHandler {
|
|
23
|
-
constructor(col) {
|
|
24
|
-
this.notationProvider = null;
|
|
25
|
-
this._splitter = null;
|
|
26
|
-
this.cached = true;
|
|
27
|
-
this._splitted = null;
|
|
28
|
-
this.columnVersion = null;
|
|
29
|
-
this._stats = null;
|
|
30
|
-
this._maxLength = null;
|
|
31
|
-
this._posList = null;
|
|
32
|
-
this._joiner = undefined;
|
|
33
|
-
if (col.type !== DG.TYPE.STRING)
|
|
34
|
-
throw new Error(`Unexpected column type '${col.type}', must be '${DG.TYPE.STRING}'.`);
|
|
35
|
-
this._column = col;
|
|
36
|
-
const units = this._column.meta.units;
|
|
37
|
-
if (units !== null && units !== undefined)
|
|
38
|
-
this._units = units;
|
|
39
|
-
else
|
|
40
|
-
throw new Error('Units are not specified in column');
|
|
41
|
-
this._notation = this.getNotation();
|
|
42
|
-
this._defaultGapOriginal = (this.isFasta()) ? GapOriginals[NOTATION.FASTA] :
|
|
43
|
-
(this.isHelm()) ? GapOriginals[NOTATION.HELM] :
|
|
44
|
-
GapOriginals[NOTATION.SEPARATOR];
|
|
45
|
-
if (!this.column.tags.has(TAGS.aligned) || !this.column.tags.has(TAGS.alphabet) ||
|
|
46
|
-
(!this.column.tags.has(TAGS.alphabetIsMultichar) && !this.isHelm() && this.alphabet === ALPHABET.UN)) {
|
|
47
|
-
// The following detectors and setters are to be called because the column is likely
|
|
48
|
-
// as the UnitsHandler constructor was called on the column.
|
|
49
|
-
if (this.isFasta())
|
|
50
|
-
SeqHandler.setUnitsToFastaColumn(this);
|
|
51
|
-
else if (this.isSeparator()) {
|
|
52
|
-
const separator = col.getTag(TAGS.separator);
|
|
53
|
-
SeqHandler.setUnitsToSeparatorColumn(this, separator);
|
|
54
|
-
}
|
|
55
|
-
else if (this.isHelm())
|
|
56
|
-
SeqHandler.setUnitsToHelmColumn(this);
|
|
57
|
-
else
|
|
58
|
-
throw new Error(`Unexpected units '${this.column.meta.units}'.`);
|
|
59
|
-
}
|
|
60
|
-
// if (!this.column.tags.has(TAGS.alphabetSize)) {
|
|
61
|
-
// if (this.isHelm())
|
|
62
|
-
// throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +
|
|
63
|
-
// `tag '${TAGS.alphabetSize}' is mandatory.`);
|
|
64
|
-
// else if (['UN'].includes(this.alphabet))
|
|
65
|
-
// throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
|
|
66
|
-
// `tag '${TAGS.alphabetSize}' is mandatory.`);
|
|
67
|
-
// }
|
|
68
|
-
if (!this.column.tags.has(TAGS.alphabetIsMultichar)) {
|
|
69
|
-
if (this.isHelm())
|
|
70
|
-
this.column.setTag(TAGS.alphabetIsMultichar, 'true');
|
|
71
|
-
else if (['UN'].includes(this.alphabet)) {
|
|
72
|
-
throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
|
|
73
|
-
`tag '${TAGS.alphabetIsMultichar}' is mandatory.`);
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
if (this.column.meta.units === NOTATION.CUSTOM) {
|
|
77
|
-
// this.column.temp[SeqTemps.notationProvider] must be set at detector stage
|
|
78
|
-
this.notationProvider = this.column.temp[SeqTemps.notationProvider] ?? null;
|
|
79
|
-
}
|
|
80
|
-
this.columnVersion = this.column.version;
|
|
81
|
-
}
|
|
82
|
-
static setUnitsToFastaColumn(uh) {
|
|
83
|
-
if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE || uh.column.meta.units !== NOTATION.FASTA)
|
|
84
|
-
throw new Error(`The column of notation '${NOTATION.FASTA}' must be '${DG.SEMTYPE.MACROMOLECULE}'.`);
|
|
85
|
-
uh.column.meta.units = NOTATION.FASTA;
|
|
86
|
-
SeqHandler.setTags(uh);
|
|
87
|
-
}
|
|
88
|
-
static setUnitsToSeparatorColumn(uh, separator) {
|
|
89
|
-
if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE || uh.column.meta.units !== NOTATION.SEPARATOR)
|
|
90
|
-
throw new Error(`The column of notation '${NOTATION.SEPARATOR}' must be '${DG.SEMTYPE.MACROMOLECULE}'.`);
|
|
91
|
-
if (!separator)
|
|
92
|
-
throw new Error(`The column of notation '${NOTATION.SEPARATOR}' must have the separator tag.`);
|
|
93
|
-
uh.column.meta.units = NOTATION.SEPARATOR;
|
|
94
|
-
uh.column.setTag(TAGS.separator, separator);
|
|
95
|
-
SeqHandler.setTags(uh);
|
|
96
|
-
}
|
|
97
|
-
static setUnitsToHelmColumn(uh) {
|
|
98
|
-
if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
99
|
-
throw new Error(`The column of notation '${NOTATION.HELM}' must be '${DG.SEMTYPE.MACROMOLECULE}'`);
|
|
100
|
-
uh.column.meta.units = NOTATION.HELM;
|
|
101
|
-
SeqHandler.setTags(uh);
|
|
102
|
-
}
|
|
103
|
-
/** From detectMacromolecule */
|
|
104
|
-
static setTags(uh) {
|
|
105
|
-
const units = uh.column.meta.units;
|
|
106
|
-
if ([NOTATION.FASTA, NOTATION.SEPARATOR].includes(units)) {
|
|
107
|
-
// Empty monomer alphabet is allowed, only if alphabet tag is annotated
|
|
108
|
-
if (!uh.column.getTag(TAGS.alphabet) && Object.keys(uh.stats.freq).length === 0)
|
|
109
|
-
throw new Error('Alphabet is empty and not annotated.');
|
|
110
|
-
let aligned = uh.column.getTag(TAGS.aligned);
|
|
111
|
-
if (aligned === null) {
|
|
112
|
-
aligned = uh.stats.sameLength ? "SEQ.MSA" /* ALIGNMENT.SEQ_MSA */ : "SEQ" /* ALIGNMENT.SEQ */;
|
|
113
|
-
uh.column.setTag(TAGS.aligned, aligned);
|
|
114
|
-
}
|
|
115
|
-
let alphabet = uh.column.getTag(TAGS.alphabet);
|
|
116
|
-
if (alphabet === null) {
|
|
117
|
-
alphabet = detectAlphabet(uh.stats.freq, candidateAlphabets);
|
|
118
|
-
uh.column.setTag(TAGS.alphabet, alphabet);
|
|
119
|
-
}
|
|
120
|
-
if (alphabet === ALPHABET.UN) {
|
|
121
|
-
const alphabetSize = Object.keys(uh.stats.freq).length;
|
|
122
|
-
const alphabetIsMultichar = Object.keys(uh.stats.freq).some((m) => m.length > 1);
|
|
123
|
-
uh.column.setTag(TAGS.alphabetSize, alphabetSize.toString());
|
|
124
|
-
uh.column.setTag(TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
get column() { return this._column; }
|
|
129
|
-
get length() { return this._column.length; }
|
|
130
|
-
get units() { return this._units; }
|
|
131
|
-
get notation() { return this._notation; }
|
|
132
|
-
get defaultGapOriginal() { return this._defaultGapOriginal; }
|
|
133
|
-
get separator() {
|
|
134
|
-
const separator = this.column.getTag(TAGS.separator) ?? undefined;
|
|
135
|
-
if (this.notation === NOTATION.SEPARATOR && separator === undefined)
|
|
136
|
-
throw new Error(`Separator is mandatory for column '${this.column.name}' of notation '${this.notation}'.`);
|
|
137
|
-
return separator;
|
|
138
|
-
}
|
|
139
|
-
get aligned() {
|
|
140
|
-
const aligned = this.column.getTag(TAGS.aligned);
|
|
141
|
-
// TAGS.aligned is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR
|
|
142
|
-
if (!aligned && (this.isFasta() || this.isSeparator()))
|
|
143
|
-
throw new Error('Tag aligned not set');
|
|
144
|
-
return aligned;
|
|
145
|
-
}
|
|
146
|
-
/** Alphabet name (upper case) */
|
|
147
|
-
get alphabet() {
|
|
148
|
-
const alphabet = this.column.getTag(TAGS.alphabet);
|
|
149
|
-
// TAGS.alphabet is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR
|
|
150
|
-
if (!alphabet && (this.isFasta() || this.isSeparator()))
|
|
151
|
-
throw new Error('Tag alphabet not set');
|
|
152
|
-
return alphabet;
|
|
153
|
-
}
|
|
154
|
-
get defaultBiotype() {
|
|
155
|
-
return this.alphabet === ALPHABET.RNA || this.alphabet === ALPHABET.DNA ? HelmTypes.NUCLEOTIDE : HelmTypes.AA;
|
|
156
|
-
}
|
|
157
|
-
get helmCompatible() {
|
|
158
|
-
return this.column.getTag(TAGS.isHelmCompatible);
|
|
159
|
-
}
|
|
160
|
-
getAlphabetSize() {
|
|
161
|
-
if (this.notation == NOTATION.HELM || this.alphabet == ALPHABET.UN) {
|
|
162
|
-
const alphabetSizeStr = this.column.getTag(TAGS.alphabetSize);
|
|
163
|
-
let alphabetSize;
|
|
164
|
-
if (alphabetSizeStr)
|
|
165
|
-
alphabetSize = parseInt(alphabetSizeStr);
|
|
166
|
-
else {
|
|
167
|
-
// calculate alphabetSize on demand
|
|
168
|
-
const stats = this.stats;
|
|
169
|
-
alphabetSize = Object.keys(stats.freq).length;
|
|
170
|
-
}
|
|
171
|
-
return alphabetSize;
|
|
172
|
-
}
|
|
173
|
-
else {
|
|
174
|
-
switch (this.alphabet) {
|
|
175
|
-
case ALPHABET.PT:
|
|
176
|
-
return 20;
|
|
177
|
-
case ALPHABET.DNA:
|
|
178
|
-
case ALPHABET.RNA:
|
|
179
|
-
return 4;
|
|
180
|
-
case 'NT':
|
|
181
|
-
console.warn(`Unexpected alphabet 'NT'.`);
|
|
182
|
-
return 4;
|
|
183
|
-
default:
|
|
184
|
-
throw new Error(`Unexpected alphabet '${this.alphabet}'.`);
|
|
185
|
-
}
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
getAlphabetIsMultichar() {
|
|
189
|
-
if (this.notation === NOTATION.HELM)
|
|
190
|
-
return true;
|
|
191
|
-
else if (this.alphabet !== ALPHABET.UN)
|
|
192
|
-
return false;
|
|
193
|
-
else
|
|
194
|
-
return this.column.getTag(TAGS.alphabetIsMultichar) === 'true';
|
|
195
|
-
}
|
|
196
|
-
// /** */
|
|
197
|
-
// public get splitted(): ISeqSplitted[] {
|
|
198
|
-
// // TODO: Disable cache or invalidate on changing data
|
|
199
|
-
// if (this._splitted === null) {
|
|
200
|
-
// const splitter = this.splitter;
|
|
201
|
-
// const colLength: number = this._column.length;
|
|
202
|
-
// this._splitted = new Array(colLength);
|
|
203
|
-
// const catIdxList = this._column.getRawData();
|
|
204
|
-
// const catList: string[] = this._column.categories;
|
|
205
|
-
// for (let rowIdx: number = 0; rowIdx < colLength; rowIdx++) {
|
|
206
|
-
// const seq: string = catList[catIdxList[rowIdx]];
|
|
207
|
-
// this._splitted[rowIdx] = splitter(seq);
|
|
208
|
-
// }
|
|
209
|
-
// }
|
|
210
|
-
// return this._splitted;
|
|
211
|
-
// }
|
|
212
|
-
getSplitted(rowIdx, limit) {
|
|
213
|
-
if (!this.cached || limit !== undefined) {
|
|
214
|
-
const seq = this.column.get(rowIdx);
|
|
215
|
-
return this.getSplitter(limit)(seq);
|
|
216
|
-
}
|
|
217
|
-
else {
|
|
218
|
-
if (this.column.version !== this.columnVersion || this._splitted === null) {
|
|
219
|
-
this.columnVersion = this.column.version;
|
|
220
|
-
this._splitted = new Array(this.column.length);
|
|
221
|
-
}
|
|
222
|
-
let resSS = this._splitted[rowIdx] ? this._splitted[rowIdx].deref() : undefined;
|
|
223
|
-
if (!resSS) {
|
|
224
|
-
const seq = this.column.get(rowIdx);
|
|
225
|
-
resSS = this.splitter(seq);
|
|
226
|
-
this._splitted[rowIdx] = new WeakRef(resSS);
|
|
227
|
-
}
|
|
228
|
-
return resSS;
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
/** Any Macromolecule can be represented on Helm format. The reverse is not always possible. */
|
|
232
|
-
async getHelm(rowIdx, options) {
|
|
233
|
-
const seq = this.column.get(rowIdx);
|
|
234
|
-
if (this.notationProvider) {
|
|
235
|
-
const helmCol = await this.notationProvider.getHelm(this.column, options);
|
|
236
|
-
return helmCol.get(rowIdx);
|
|
237
|
-
}
|
|
238
|
-
else
|
|
239
|
-
return this.convertToHelm(seq);
|
|
240
|
-
}
|
|
241
|
-
get stats() {
|
|
242
|
-
if (this._stats === null) {
|
|
243
|
-
const freq = {};
|
|
244
|
-
let sameLength = true;
|
|
245
|
-
let firstLength = null;
|
|
246
|
-
const colLen = this.column.length;
|
|
247
|
-
for (let rowIdx = 0; rowIdx < colLen; ++rowIdx) {
|
|
248
|
-
const mSeq = this.getSplitted(rowIdx);
|
|
249
|
-
if (firstLength == null)
|
|
250
|
-
firstLength = mSeq.length;
|
|
251
|
-
else if (mSeq.length !== firstLength)
|
|
252
|
-
sameLength = false;
|
|
253
|
-
for (let posIdx = 0; posIdx < mSeq.length; ++posIdx) {
|
|
254
|
-
const cm = mSeq.getCanonical(posIdx);
|
|
255
|
-
if (!(cm in freq))
|
|
256
|
-
freq[cm] = 0;
|
|
257
|
-
freq[cm] += 1;
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
this._stats = { freq: freq, sameLength: sameLength };
|
|
261
|
-
}
|
|
262
|
-
return this._stats;
|
|
263
|
-
}
|
|
264
|
-
get maxLength() {
|
|
265
|
-
if (this._maxLength === null) {
|
|
266
|
-
this._maxLength = this.column.length === 0 ? 0 :
|
|
267
|
-
Math.max(...wu.count(0).take(this.column.length).map((rowIdx) => this.getSplitted(rowIdx).length));
|
|
268
|
-
}
|
|
269
|
-
return this._maxLength;
|
|
270
|
-
}
|
|
271
|
-
get posList() {
|
|
272
|
-
if (this._posList === null) {
|
|
273
|
-
const posListTxt = this.column.getTag(TAGS.positionNames);
|
|
274
|
-
this._posList = posListTxt ? posListTxt.split(positionSeparator).map((p) => p.trim()) :
|
|
275
|
-
wu.count(1).take(this.maxLength).map((pos) => pos.toString()).toArray();
|
|
276
|
-
}
|
|
277
|
-
return this._posList;
|
|
278
|
-
}
|
|
279
|
-
isFasta() { return this.notation === NOTATION.FASTA; }
|
|
280
|
-
isSeparator() { return this.notation === NOTATION.SEPARATOR; }
|
|
281
|
-
isHelm() { return this.notation === NOTATION.HELM; }
|
|
282
|
-
isRna() { return this.alphabet === ALPHABET.RNA; }
|
|
283
|
-
isDna() { return this.alphabet === ALPHABET.DNA; }
|
|
284
|
-
isPeptide() { return this.alphabet === ALPHABET.PT; }
|
|
285
|
-
isMsa() { return this.aligned ? this.aligned.toUpperCase().includes('MSA') : false; }
|
|
286
|
-
isHelmCompatible() { return this.helmCompatible === 'true'; }
|
|
287
|
-
/** Checks {@link om} for being a gap
|
|
288
|
-
* @param {string} om Original monomer of sequence symbol
|
|
289
|
-
* @return {boolean}
|
|
290
|
-
*/
|
|
291
|
-
isGap(om) {
|
|
292
|
-
return !om || om === this._defaultGapOriginal;
|
|
293
|
-
}
|
|
294
|
-
/** Associate notation types with the corresponding units */
|
|
295
|
-
/**
|
|
296
|
-
* @return {NOTATION} Notation associated with the units type
|
|
297
|
-
*/
|
|
298
|
-
getNotation() {
|
|
299
|
-
if (this.units.toLowerCase().startsWith(NOTATION.FASTA))
|
|
300
|
-
return NOTATION.FASTA;
|
|
301
|
-
else if (this.units.toLowerCase().startsWith(NOTATION.SEPARATOR))
|
|
302
|
-
return NOTATION.SEPARATOR;
|
|
303
|
-
else if (this.units.toLowerCase().startsWith(NOTATION.HELM))
|
|
304
|
-
return NOTATION.HELM;
|
|
305
|
-
else if (this.units.toLowerCase().startsWith(NOTATION.CUSTOM))
|
|
306
|
-
return NOTATION.CUSTOM;
|
|
307
|
-
else
|
|
308
|
-
throw new Error(`Column '${this.column.name}' has unexpected notation '${this.units}'.`);
|
|
309
|
-
}
|
|
310
|
-
/**
|
|
311
|
-
* Get the wrapper strings for HELM, depending on the type of the
|
|
312
|
-
* macromolecule (peptide, DNA, RNA)
|
|
313
|
-
*
|
|
314
|
-
* @return {string[]} Array of wrappers
|
|
315
|
-
*/
|
|
316
|
-
getHelmWrappers() {
|
|
317
|
-
const prefix = (this.isDna()) ? 'RNA1{' :
|
|
318
|
-
(this.isRna() || this.isHelmCompatible()) ? 'RNA1{' : 'PEPTIDE1{';
|
|
319
|
-
const postfix = '}$$$$';
|
|
320
|
-
const leftWrapper = (this.isDna()) ? 'd(' :
|
|
321
|
-
(this.isRna()) ? 'r(' : '';
|
|
322
|
-
const rightWrapper = (this.isDna() || this.isRna()) ? ')p' : '';
|
|
323
|
-
return [prefix, leftWrapper, rightWrapper, postfix];
|
|
324
|
-
}
|
|
325
|
-
/**
|
|
326
|
-
* Create a new empty column of the specified notation type and the same
|
|
327
|
-
* length as column
|
|
328
|
-
*
|
|
329
|
-
* @param {NOTATION} tgtNotation
|
|
330
|
-
* @return {DG.Column}
|
|
331
|
-
*/
|
|
332
|
-
getNewColumn(tgtNotation, tgtSeparator, colName, data) {
|
|
333
|
-
const col = this.column;
|
|
334
|
-
const name = tgtNotation.toLowerCase() + '(' + col.name + ')';
|
|
335
|
-
const newColName = colName ?? col.dataFrame?.columns.getUnusedName(name) ?? name;
|
|
336
|
-
const newColumn = DG.Column.fromList('string', newColName, data ?? new Array(this.column.length).fill(''));
|
|
337
|
-
newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
338
|
-
newColumn.meta.units = tgtNotation;
|
|
339
|
-
if (tgtNotation === NOTATION.SEPARATOR) {
|
|
340
|
-
if (!tgtSeparator)
|
|
341
|
-
throw new Error(`Notation \'${NOTATION.SEPARATOR}\' requires separator value.`);
|
|
342
|
-
newColumn.setTag(TAGS.separator, tgtSeparator);
|
|
343
|
-
}
|
|
344
|
-
newColumn.setTag(DG.TAGS.CELL_RENDERER, tgtNotation === NOTATION.HELM ? 'helm' : 'sequence'); // cell.renderer
|
|
345
|
-
const srcAligned = col.getTag(TAGS.aligned);
|
|
346
|
-
if (srcAligned)
|
|
347
|
-
newColumn.setTag(TAGS.aligned, srcAligned);
|
|
348
|
-
let srcAlphabet = col.getTag(TAGS.alphabet);
|
|
349
|
-
if (!srcAlphabet && this.notation === NOTATION.HELM && tgtNotation !== NOTATION.HELM)
|
|
350
|
-
srcAlphabet = ALPHABET.UN;
|
|
351
|
-
if (srcAlphabet != null)
|
|
352
|
-
newColumn.setTag(TAGS.alphabet, srcAlphabet);
|
|
353
|
-
let srcAlphabetSize = col.getTag(TAGS.alphabetSize);
|
|
354
|
-
if (srcAlphabet != null && srcAlphabetSize)
|
|
355
|
-
newColumn.setTag(TAGS.alphabetSize, srcAlphabetSize);
|
|
356
|
-
const srcAlphabetIsMultichar = col.getTag(TAGS.alphabetIsMultichar);
|
|
357
|
-
if (srcAlphabet != null && srcAlphabetIsMultichar !== undefined)
|
|
358
|
-
newColumn.setTag(TAGS.alphabetIsMultichar, srcAlphabetIsMultichar);
|
|
359
|
-
if (tgtNotation == NOTATION.HELM) {
|
|
360
|
-
srcAlphabetSize = this.getAlphabetSize().toString();
|
|
361
|
-
newColumn.setTag(TAGS.alphabetSize, srcAlphabetSize);
|
|
362
|
-
}
|
|
363
|
-
return newColumn;
|
|
364
|
-
}
|
|
365
|
-
/** Creates a new column on data of {@link seqList} with the same tags */
|
|
366
|
-
getNewColumnFromList(name, seqList) {
|
|
367
|
-
return this.getNewColumn(this.notation, this.separator, name, seqList);
|
|
368
|
-
}
|
|
369
|
-
/**
|
|
370
|
-
* Create a new empty column using templateCol as a template
|
|
371
|
-
*
|
|
372
|
-
* @param {DG.Column} templateCol the properties and units of this column are used as a
|
|
373
|
-
* template to build the new one
|
|
374
|
-
* @return {DG.Column}
|
|
375
|
-
*/
|
|
376
|
-
static getNewColumn(templateCol) {
|
|
377
|
-
const col = SeqHandler.forColumn(templateCol);
|
|
378
|
-
const targetNotation = col.notation;
|
|
379
|
-
return col.getNewColumn(targetNotation);
|
|
380
|
-
}
|
|
381
|
-
/**
|
|
382
|
-
* A helper function checking the validity of the 'units' string
|
|
383
|
-
*
|
|
384
|
-
* @param {string} units the string to be validated
|
|
385
|
-
* @return {boolean}
|
|
386
|
-
*/
|
|
387
|
-
static unitsStringIsValid(units) {
|
|
388
|
-
units = units.toLowerCase();
|
|
389
|
-
const prefixes = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];
|
|
390
|
-
const postfixes = ['rna', 'dna', 'pt'];
|
|
391
|
-
const prefixCriterion = prefixes.some((p) => units.startsWith(p.toLowerCase()));
|
|
392
|
-
return prefixCriterion;
|
|
393
|
-
}
|
|
394
|
-
/**
|
|
395
|
-
* Construct a new column of semantic type MACROMOLECULE from the list of
|
|
396
|
-
* specified parameters
|
|
397
|
-
*
|
|
398
|
-
* @param {number} len the length of the new column
|
|
399
|
-
* @param {string} name the name of the new column
|
|
400
|
-
* @param {string} units the units of the new column
|
|
401
|
-
* @return {DG.Column}
|
|
402
|
-
*/
|
|
403
|
-
static getNewColumnFromParams(len, name, units) {
|
|
404
|
-
// WARNING: in this implementation is is impossible to verify the uniqueness
|
|
405
|
-
// of the new column's name
|
|
406
|
-
// TODO: verify the validity of units parameter
|
|
407
|
-
if (!SeqHandler.unitsStringIsValid(units))
|
|
408
|
-
throw new Error('Invalid format of \'units\' parameter');
|
|
409
|
-
const newColumn = DG.Column.fromList('string', name, new Array(len).fill(''));
|
|
410
|
-
newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
411
|
-
newColumn.meta.units = units;
|
|
412
|
-
return newColumn;
|
|
413
|
-
}
|
|
414
|
-
/** Gets function to split seq value to monomers */
|
|
415
|
-
getSplitter(limit) {
|
|
416
|
-
let splitter = null;
|
|
417
|
-
splitter = this.notationProvider ? this.notationProvider.splitter : null;
|
|
418
|
-
if (splitter)
|
|
419
|
-
return splitter;
|
|
420
|
-
if (this.units.toLowerCase().startsWith(NOTATION.FASTA)) {
|
|
421
|
-
const alphabet = this.column.getTag(TAGS.alphabet);
|
|
422
|
-
if (alphabet !== null && !this.getAlphabetIsMultichar())
|
|
423
|
-
return splitterAsFastaSimple;
|
|
424
|
-
else
|
|
425
|
-
return splitterAsFasta;
|
|
426
|
-
}
|
|
427
|
-
else if (this.units.toLowerCase().startsWith(NOTATION.SEPARATOR))
|
|
428
|
-
return getSplitterWithSeparator(this.separator, limit);
|
|
429
|
-
else if (this.units.toLowerCase().startsWith(NOTATION.HELM))
|
|
430
|
-
return splitterAsHelm;
|
|
431
|
-
else
|
|
432
|
-
throw new Error(`Unexpected units ${this.units} .`);
|
|
433
|
-
// TODO: Splitter for HELM
|
|
434
|
-
}
|
|
435
|
-
split(seq) {
|
|
436
|
-
return this.splitter(seq);
|
|
437
|
-
}
|
|
438
|
-
getDistanceFunctionName() {
|
|
439
|
-
// TODO add support for helm and separator notation
|
|
440
|
-
if (!this.isFasta())
|
|
441
|
-
throw new Error('Only FASTA notation is supported');
|
|
442
|
-
if (this.isMsa())
|
|
443
|
-
return MmDistanceFunctionsNames.HAMMING;
|
|
444
|
-
switch (this.alphabet) {
|
|
445
|
-
case ALPHABET.DNA:
|
|
446
|
-
case ALPHABET.RNA:
|
|
447
|
-
// As DNA and RNA scoring matrices are same as identity matrices(mostly),
|
|
448
|
-
// we can use very fast and optimized Levenshtein distance library
|
|
449
|
-
return MmDistanceFunctionsNames.LEVENSHTEIN;
|
|
450
|
-
case ALPHABET.PT:
|
|
451
|
-
return MmDistanceFunctionsNames.LEVENSHTEIN;
|
|
452
|
-
// For default case, let's use Levenshtein distance
|
|
453
|
-
default:
|
|
454
|
-
return MmDistanceFunctionsNames.LEVENSHTEIN;
|
|
455
|
-
}
|
|
456
|
-
}
|
|
457
|
-
getDistanceFunction() {
|
|
458
|
-
return mmDistanceFunctions[this.getDistanceFunctionName()]();
|
|
459
|
-
}
|
|
460
|
-
// checks if the separator notation is compatible with helm library
|
|
461
|
-
async checkHelmCompatibility() {
|
|
462
|
-
// check first for the column tag to avoid extra processing
|
|
463
|
-
if (this.column.tags.has(TAGS.isHelmCompatible))
|
|
464
|
-
return this.column.getTag(TAGS.isHelmCompatible) === 'true';
|
|
465
|
-
// get the monomer lib and check against the column
|
|
466
|
-
const monomerLibHelper = await getMonomerLibHelper();
|
|
467
|
-
const bioLib = monomerLibHelper.getMonomerLib();
|
|
468
|
-
// retrieve peptides
|
|
469
|
-
const peptides = bioLib.getMonomerSymbolsByType("PEPTIDE" /* HELM_POLYMER_TYPE.PEPTIDE */);
|
|
470
|
-
// convert the peptides list to a set for faster lookup
|
|
471
|
-
const peptidesSet = new Set(peptides);
|
|
472
|
-
// get splitter for given separator and check if all monomers are in the lib
|
|
473
|
-
const splitterFunc = getSplitterWithSeparator(this.separator);
|
|
474
|
-
// iterate over the columns, split them and check if all monomers are in the lib
|
|
475
|
-
//TODO maybe add missing threshold so that if there are not too many missing monomers
|
|
476
|
-
// the column is still considered helm compatible
|
|
477
|
-
const catIdxSet = new Set();
|
|
478
|
-
const rowCount = this.column.length;
|
|
479
|
-
const colRawData = this.column.getRawData();
|
|
480
|
-
for (let rowIdx = 0; rowIdx < rowCount; ++rowIdx) {
|
|
481
|
-
const catI = colRawData[rowIdx];
|
|
482
|
-
if (!(catI in catIdxSet)) {
|
|
483
|
-
catIdxSet.add(catI);
|
|
484
|
-
const seqSS = this.getSplitted(rowIdx);
|
|
485
|
-
for (let posIdx = 0; posIdx < seqSS.length; ++posIdx) {
|
|
486
|
-
const cm = seqSS.getCanonical(posIdx);
|
|
487
|
-
if (!peptidesSet.has(cm)) {
|
|
488
|
-
this.column.setTag(TAGS.isHelmCompatible, 'false');
|
|
489
|
-
return false;
|
|
490
|
-
}
|
|
491
|
-
}
|
|
492
|
-
}
|
|
493
|
-
}
|
|
494
|
-
this.column.setTag(TAGS.isHelmCompatible, 'true');
|
|
495
|
-
return true;
|
|
496
|
-
}
|
|
497
|
-
// -- Notation Converter --
|
|
498
|
-
get splitter() {
|
|
499
|
-
if (this._splitter === null)
|
|
500
|
-
this._splitter = this.getSplitter();
|
|
501
|
-
return this._splitter;
|
|
502
|
-
}
|
|
503
|
-
toFasta(targetNotation) { return targetNotation === NOTATION.FASTA; }
|
|
504
|
-
toSeparator(targetNotation) { return targetNotation === NOTATION.SEPARATOR; }
|
|
505
|
-
toHelm(targetNotation) { return targetNotation === NOTATION.HELM; }
|
|
506
|
-
/**
|
|
507
|
-
* Convert HELM string to FASTA/SEPARATOR
|
|
508
|
-
*
|
|
509
|
-
* @param {string} srcSeq A string to be converted
|
|
510
|
-
* @param {string} tgtNotation Target notation: FASTA or SEPARATOR
|
|
511
|
-
* @param {string} tgtSeparator Optional target separator (for HELM ->
|
|
512
|
-
* @param {string | null} tgtGapOriginal Optional target gap symbol
|
|
513
|
-
* SEPARATOR)
|
|
514
|
-
* @return {string} Converted string
|
|
515
|
-
*/
|
|
516
|
-
convertHelmToFastaSeparator(srcSeq, tgtNotation, tgtSeparator, tgtGapOriginal) {
|
|
517
|
-
if (!tgtGapOriginal) {
|
|
518
|
-
tgtGapOriginal = (this.toFasta(tgtNotation)) ?
|
|
519
|
-
GapOriginals[NOTATION.FASTA] :
|
|
520
|
-
GapOriginals[NOTATION.SEPARATOR];
|
|
521
|
-
}
|
|
522
|
-
if (!tgtSeparator)
|
|
523
|
-
tgtSeparator = (this.toFasta(tgtNotation)) ? '' : this.separator;
|
|
524
|
-
const isNucleotide = srcSeq.startsWith('RNA');
|
|
525
|
-
// items can be monomers or helms
|
|
526
|
-
const helmItemsArray = this.splitter(srcSeq);
|
|
527
|
-
const tgtMonomersArray = [];
|
|
528
|
-
for (let posIdx = 0; posIdx < helmItemsArray.length; ++posIdx) {
|
|
529
|
-
let om = helmItemsArray.getOriginal(posIdx);
|
|
530
|
-
if (isNucleotide)
|
|
531
|
-
om = om.replace(HELM_WRAPPERS_REGEXP, '');
|
|
532
|
-
if (om === GapOriginals[NOTATION.HELM])
|
|
533
|
-
tgtMonomersArray.push(tgtGapOriginal);
|
|
534
|
-
else if (this.toFasta(tgtNotation) && om.length > 1) {
|
|
535
|
-
// the case of a multi-character monomer converted to FASTA
|
|
536
|
-
const monomer = '[' + om + ']';
|
|
537
|
-
tgtMonomersArray.push(monomer);
|
|
538
|
-
}
|
|
539
|
-
else
|
|
540
|
-
tgtMonomersArray.push(om);
|
|
541
|
-
}
|
|
542
|
-
return tgtMonomersArray.join(tgtSeparator);
|
|
543
|
-
}
|
|
544
|
-
/** Dispatcher method for notation conversion
|
|
545
|
-
*
|
|
546
|
-
* @param {NOTATION} tgtNotation Notation we want to convert to
|
|
547
|
-
* @param {string | null} tgtSeparator Possible separator
|
|
548
|
-
* @return {DG.Column} Converted column
|
|
549
|
-
*/
|
|
550
|
-
convert(tgtNotation, tgtSeparator) {
|
|
551
|
-
// Get joiner from the source column units handler (this) knowing about the source sequence.
|
|
552
|
-
// For example, converting DNA Helm to fasta requires removing the r(X)p decoration.
|
|
553
|
-
const joiner = this.getJoiner({ notation: tgtNotation, separator: tgtSeparator });
|
|
554
|
-
const newColumn = this.getNewColumn(tgtNotation, tgtSeparator);
|
|
555
|
-
// assign the values to the newly created empty column
|
|
556
|
-
newColumn.init((rowIdx) => {
|
|
557
|
-
const srcSS = this.getSplitted(rowIdx);
|
|
558
|
-
return joiner(srcSS);
|
|
559
|
-
});
|
|
560
|
-
return newColumn;
|
|
561
|
-
}
|
|
562
|
-
/**
|
|
563
|
-
* @param name
|
|
564
|
-
* @param startIdx Start position index of the region (0-based)
|
|
565
|
-
* @param endIdx End position index of the region (0-based, inclusive)
|
|
566
|
-
*/
|
|
567
|
-
getRegion(startIdx, endIdx, name) {
|
|
568
|
-
const regCol = this.getNewColumn(this.notation, this.separator);
|
|
569
|
-
regCol.name = name;
|
|
570
|
-
const startIdxVal = startIdx ?? 0;
|
|
571
|
-
const endIdxVal = endIdx ?? this.maxLength - 1;
|
|
572
|
-
const joiner = this.getJoiner();
|
|
573
|
-
const regLength = endIdxVal - startIdxVal + 1;
|
|
574
|
-
const gapOM = GapOriginals[this.notation];
|
|
575
|
-
regCol.init((rowI) => {
|
|
576
|
-
const seqS = this.getSplitted(rowI);
|
|
577
|
-
// Custom slicing instead of array method to maintain gaps
|
|
578
|
-
const regOMList = new Array(regLength);
|
|
579
|
-
for (let regJPos = 0; regJPos < regLength; ++regJPos) {
|
|
580
|
-
const seqJPos = startIdxVal + regJPos;
|
|
581
|
-
regOMList[regJPos] = seqJPos < seqS.length ? seqS.getOriginal(seqJPos) : gapOM;
|
|
582
|
-
}
|
|
583
|
-
return joiner(new StringListSeqSplitted(regOMList, gapOM));
|
|
584
|
-
});
|
|
585
|
-
const getRegionOfPositionNames = (str) => {
|
|
586
|
-
const srcPosList = str.split(',').map((p) => p.trim());
|
|
587
|
-
const regPosList = new Array(regLength);
|
|
588
|
-
for (let regJPos = 0; regJPos < regLength; ++regJPos) {
|
|
589
|
-
const srcJPos = startIdxVal + regJPos;
|
|
590
|
-
regPosList[regJPos] = srcJPos < srcPosList.length ? srcPosList[srcJPos] : '?';
|
|
591
|
-
}
|
|
592
|
-
return regPosList.join(positionSeparator);
|
|
593
|
-
};
|
|
594
|
-
const srcPositionNamesStr = this.column.getTag(TAGS.positionNames);
|
|
595
|
-
if (srcPositionNamesStr)
|
|
596
|
-
regCol.setTag(TAGS.positionNames, getRegionOfPositionNames(srcPositionNamesStr));
|
|
597
|
-
const srcPositionLabelsStr = this.column.getTag(TAGS.positionLabels);
|
|
598
|
-
if (srcPositionLabelsStr)
|
|
599
|
-
regCol.setTag(TAGS.positionLabels, getRegionOfPositionNames(srcPositionLabelsStr));
|
|
600
|
-
return regCol;
|
|
601
|
-
}
|
|
602
|
-
get joiner() {
|
|
603
|
-
if (!this._joiner)
|
|
604
|
-
this._joiner = this.getJoiner();
|
|
605
|
-
return this._joiner;
|
|
606
|
-
}
|
|
607
|
-
getJoiner(opts) {
|
|
608
|
-
const notation = opts ? opts.notation : this.notation;
|
|
609
|
-
const separator = opts ? opts.separator : this.separator;
|
|
610
|
-
let res;
|
|
611
|
-
const srcSh = this;
|
|
612
|
-
switch (notation) {
|
|
613
|
-
case NOTATION.FASTA: {
|
|
614
|
-
res = function (srcSS) { return srcSh.joinToFasta(srcSS, srcSh.isHelm()); };
|
|
615
|
-
break;
|
|
616
|
-
}
|
|
617
|
-
case NOTATION.SEPARATOR: {
|
|
618
|
-
if (!separator)
|
|
619
|
-
throw new Error(`Separator is mandatory for notation '${notation}'.`);
|
|
620
|
-
res = function (srcSS) { return joinToSeparator(srcSS, separator, srcSh.isHelm()); };
|
|
621
|
-
break;
|
|
622
|
-
}
|
|
623
|
-
case NOTATION.HELM: {
|
|
624
|
-
const isDnaOrRna = srcSh.alphabet === ALPHABET.DNA || srcSh.alphabet === ALPHABET.RNA;
|
|
625
|
-
const wrappers = srcSh.getHelmWrappers();
|
|
626
|
-
res = function (srcSS) { return joinToHelm(srcSS, wrappers, isDnaOrRna); };
|
|
627
|
-
break;
|
|
628
|
-
}
|
|
629
|
-
default:
|
|
630
|
-
throw new Error(`Unexpected notation '${notation}'.`);
|
|
631
|
-
}
|
|
632
|
-
return res;
|
|
633
|
-
}
|
|
634
|
-
getConverter(tgtUnits, tgtSeparator = undefined) {
|
|
635
|
-
if (tgtUnits === NOTATION.SEPARATOR && !tgtSeparator)
|
|
636
|
-
throw new Error(`Target separator is not specified for target units '${NOTATION.SEPARATOR}'.`);
|
|
637
|
-
const srcSh = this;
|
|
638
|
-
if (tgtUnits === NOTATION.FASTA)
|
|
639
|
-
return function (srcSeq) { return srcSh.convertToFasta(srcSeq); };
|
|
640
|
-
if (tgtUnits === NOTATION.HELM)
|
|
641
|
-
return function (srcSeq) { return srcSh.convertToHelm(srcSeq); };
|
|
642
|
-
else if (tgtUnits === NOTATION.SEPARATOR)
|
|
643
|
-
return function (srcSeq) { return srcSh.convertToSeparator(srcSeq, tgtSeparator); };
|
|
644
|
-
else
|
|
645
|
-
throw new Error();
|
|
646
|
-
}
|
|
647
|
-
/** Gets a column's UnitsHandler object from temp slot or creates a new and stores it to the temp slot. */
|
|
648
|
-
static forColumn(col) {
|
|
649
|
-
// TODO: Invalidate col.temp[Temps.uh] checking column's metadata
|
|
650
|
-
let res = col.temp[SeqTemps.seqHandler];
|
|
651
|
-
if (!res || res.columnVersion !== col.version)
|
|
652
|
-
res = col.temp[SeqTemps.seqHandler] = new SeqHandler(col);
|
|
653
|
-
return res;
|
|
654
|
-
}
|
|
655
|
-
// -- joiners & converters --
|
|
656
|
-
joinToFasta(seqS, isHelm) {
|
|
657
|
-
const resMList = new Array(seqS.length);
|
|
658
|
-
for (let posIdx = 0; posIdx < seqS.length; ++posIdx) {
|
|
659
|
-
const cm = seqS.getOriginal(posIdx);
|
|
660
|
-
let om = seqS.getOriginal(posIdx);
|
|
661
|
-
if (isHelm)
|
|
662
|
-
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
663
|
-
if (cm === GAP_SYMBOL)
|
|
664
|
-
om = GapOriginals[NOTATION.FASTA];
|
|
665
|
-
else if (cm === PHOSPHATE_SYMBOL)
|
|
666
|
-
om = '';
|
|
667
|
-
else if (om.length > 1)
|
|
668
|
-
om = '[' + om + ']';
|
|
669
|
-
resMList[posIdx] = om;
|
|
670
|
-
}
|
|
671
|
-
return resMList.join('');
|
|
672
|
-
}
|
|
673
|
-
convertToFasta(src) {
|
|
674
|
-
const srcUhSplitter = this.splitter;
|
|
675
|
-
const srcSS = this.isHelm() ? this.splitterAsHelmNucl(src) : srcUhSplitter(src);
|
|
676
|
-
return this.joinToFasta(srcSS, this.isHelm());
|
|
677
|
-
}
|
|
678
|
-
convertToSeparator(src, tgtSeparator) {
|
|
679
|
-
const srcSS = this.isHelm() ? this.splitterAsHelmNucl(src) : this.splitter(src);
|
|
680
|
-
return joinToSeparator(srcSS, tgtSeparator, this.isHelm());
|
|
681
|
-
}
|
|
682
|
-
convertToHelm(src) {
|
|
683
|
-
if (this.notation == NOTATION.HELM)
|
|
684
|
-
return src;
|
|
685
|
-
const wrappers = this.getHelmWrappers();
|
|
686
|
-
const isDnaOrRna = src.startsWith('DNA') || src.startsWith('RNA');
|
|
687
|
-
const srcSS = this.splitter(src);
|
|
688
|
-
return joinToHelm(srcSS, wrappers, isDnaOrRna);
|
|
689
|
-
}
|
|
690
|
-
/** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus. */
|
|
691
|
-
splitterAsHelmNucl(src) {
|
|
692
|
-
const srcMList = this.splitter(src);
|
|
693
|
-
const tgtMList = new Array(srcMList.length);
|
|
694
|
-
const isDna = src.startsWith('DNA');
|
|
695
|
-
const isRna = src.startsWith('RNA');
|
|
696
|
-
for (let posIdx = 0; posIdx < srcMList.length; ++posIdx) {
|
|
697
|
-
let om = srcMList.getOriginal(posIdx);
|
|
698
|
-
if (isDna || isRna) {
|
|
699
|
-
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
700
|
-
om = om === PHOSPHATE_SYMBOL ? null : om;
|
|
701
|
-
}
|
|
702
|
-
tgtMList[posIdx] = om ? om : null;
|
|
703
|
-
}
|
|
704
|
-
return new StringListSeqSplitted(tgtMList.filter((om) => !!om), GapOriginals[NOTATION.HELM]);
|
|
705
|
-
}
|
|
706
|
-
// Custom notation provider
|
|
707
|
-
getRendererBack(gridCol, tableCol) {
|
|
708
|
-
const temp = this.column.temp;
|
|
709
|
-
let res = temp.rendererBack;
|
|
710
|
-
if (!res)
|
|
711
|
-
res = temp.rendererBack = this.notationProvider.createCellRendererBack(gridCol, tableCol);
|
|
712
|
-
return res;
|
|
713
|
-
}
|
|
714
|
-
}
|
|
715
|
-
// -- joiners --
|
|
716
|
-
function joinToSeparator(seqS, tgtSeparator, isHelm) {
|
|
717
|
-
const resMList = new Array(seqS.length);
|
|
718
|
-
for (let posIdx = 0; posIdx < seqS.length; ++posIdx) {
|
|
719
|
-
const cm = seqS.getCanonical(posIdx);
|
|
720
|
-
let om = seqS.getOriginal(posIdx);
|
|
721
|
-
if (isHelm)
|
|
722
|
-
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
723
|
-
if (cm === GAP_SYMBOL)
|
|
724
|
-
om = GapOriginals[NOTATION.SEPARATOR];
|
|
725
|
-
else if (cm === PHOSPHATE_SYMBOL)
|
|
726
|
-
om = '';
|
|
727
|
-
resMList[posIdx] = om;
|
|
728
|
-
}
|
|
729
|
-
return resMList.join(tgtSeparator);
|
|
730
|
-
}
|
|
731
|
-
function joinToHelm(srcSS, wrappers, isDnaOrRna) {
|
|
732
|
-
const [prefix, leftWrapper, rightWrapper, postfix] = wrappers;
|
|
733
|
-
const resOMList = new Array(srcSS.length);
|
|
734
|
-
for (let posIdx = 0; posIdx < srcSS.length; ++posIdx) {
|
|
735
|
-
const cm = srcSS.getCanonical(posIdx);
|
|
736
|
-
let om = srcSS.getOriginal(posIdx);
|
|
737
|
-
if (cm === GAP_SYMBOL)
|
|
738
|
-
om = GapOriginals[NOTATION.HELM];
|
|
739
|
-
else {
|
|
740
|
-
if (isDnaOrRna)
|
|
741
|
-
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
742
|
-
om = om.length === 1 ? `${leftWrapper}${om}${rightWrapper}` : `${leftWrapper}[${om}]${rightWrapper}`;
|
|
743
|
-
}
|
|
744
|
-
resOMList[posIdx] = om;
|
|
745
|
-
}
|
|
746
|
-
return `${prefix}${resOMList.join('.')}${postfix}`;
|
|
747
|
-
}
|
|
748
|
-
//# sourceMappingURL=seq-handler.js.map
|