@datagrok/bio 2.15.13 → 2.16.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/detectors.js +16 -11
- package/dist/455.js.map +1 -1
- package/dist/980.js +1 -1
- package/dist/980.js.map +1 -1
- package/dist/package-test.js +6 -6
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/package.json +14 -14
- package/src/analysis/sequence-activity-cliffs.ts +9 -8
- package/src/analysis/sequence-diversity-viewer.ts +6 -4
- package/src/analysis/sequence-similarity-viewer.ts +9 -6
- package/src/analysis/sequence-space.ts +3 -2
- package/src/calculations/monomerLevelMols.ts +4 -5
- package/src/demo/bio01-similarity-diversity.ts +4 -1
- package/src/package-test.ts +1 -1
- package/src/package-types.ts +34 -2
- package/src/package.ts +60 -76
- package/src/substructure-search/substructure-search.ts +15 -9
- package/src/tests/WebLogo-layout-tests.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +11 -5
- package/src/tests/WebLogo-project-tests.ts +1 -1
- package/src/tests/activity-cliffs-utils.ts +11 -14
- package/src/tests/bio-tests.ts +85 -79
- package/src/tests/checkInputColumn-tests.ts +15 -10
- package/src/tests/converters-test.ts +12 -5
- package/src/tests/detectors-benchmark-tests.ts +5 -2
- package/src/tests/detectors-tests.ts +51 -44
- package/src/tests/detectors-weak-and-likely-tests.ts +12 -5
- package/src/tests/fasta-export-tests.ts +13 -5
- package/src/tests/helm-tests.ts +85 -0
- package/src/tests/mm-distance-tests.ts +14 -7
- package/src/tests/monomer-libraries-tests.ts +1 -1
- package/src/tests/msa-tests.ts +33 -24
- package/src/tests/renderers-monomer-placer-tests.ts +2 -5
- package/src/tests/renderers-test.ts +15 -9
- package/src/tests/scoring.ts +9 -6
- package/src/tests/seq-handler-get-helm-tests.ts +7 -5
- package/src/tests/seq-handler-get-region-tests.ts +9 -3
- package/src/tests/seq-handler-splitted-tests.ts +11 -5
- package/src/tests/seq-handler-tests.ts +17 -10
- package/src/tests/sequence-space-utils.ts +9 -4
- package/src/tests/splitters-test.ts +5 -4
- package/src/tests/substructure-filters-tests.ts +22 -23
- package/src/tests/to-atomic-level-tests.ts +5 -3
- package/src/tests/to-atomic-level-ui-tests.ts +4 -1
- package/src/tests/utils/detectors-utils.ts +4 -4
- package/src/utils/calculate-scores.ts +11 -9
- package/src/utils/cell-renderer-custom.ts +27 -17
- package/src/utils/cell-renderer.ts +14 -8
- package/src/utils/check-input-column.ts +13 -9
- package/src/utils/context-menu.ts +4 -4
- package/src/utils/convert.ts +21 -14
- package/src/utils/get-region-func-editor.ts +8 -5
- package/src/utils/get-region.ts +4 -5
- package/src/utils/helm-to-molfile/converter/helm.ts +4 -4
- package/src/utils/helm-to-molfile/utils.ts +5 -6
- package/src/utils/macromolecule-column-widget.ts +6 -7
- package/src/utils/monomer-cell-renderer-base.ts +8 -1
- package/src/utils/monomer-lib/lib-manager.ts +3 -2
- package/src/utils/monomer-lib/monomer-colors.ts +10 -10
- package/src/utils/monomer-lib/monomer-lib-base.ts +6 -1
- package/src/utils/monomer-lib/monomer-lib.ts +15 -9
- package/src/utils/multiple-sequence-alignment-ui.ts +30 -30
- package/src/utils/save-as-fasta.ts +19 -12
- package/src/utils/seq-helper/seq-handler.ts +836 -0
- package/src/utils/seq-helper/seq-helper.ts +43 -19
- package/src/utils/sequence-to-mol.ts +7 -8
- package/src/utils/split-to-monomers.ts +7 -2
- package/src/utils/types.ts +8 -7
- package/src/utils/ui-utils.ts +2 -2
- package/src/viewers/web-logo-viewer.ts +18 -16
- package/src/widgets/bio-substructure-filter-helm.ts +5 -2
- package/src/widgets/bio-substructure-filter.ts +14 -24
- package/src/widgets/composition-analysis-widget.ts +6 -6
- package/src/widgets/representations.ts +7 -4
- package/src/tests/detectors-custom-notation-tests.ts +0 -37
- package/src/utils/cyclized.ts +0 -89
- package/src/utils/dimerized.ts +0 -10
|
@@ -0,0 +1,836 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
import wu from 'wu';
|
|
4
|
+
|
|
5
|
+
/* eslint-disable max-len */
|
|
6
|
+
import {ALIGNMENT, ALPHABET, candidateAlphabets, getSplitterWithSeparator, NOTATION, positionSeparator, splitterAsFasta, splitterAsHelm, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule/index';
|
|
7
|
+
import {INotationProvider, ISeqSplitted, SeqColStats, SplitterFunc,} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
|
+
import {detectAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
9
|
+
import {mmDistanceFunctions, MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
10
|
+
import {mmDistanceFunctionType} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
|
|
11
|
+
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
12
|
+
import {HELM_POLYMER_TYPE, HELM_WRAPPERS_REGEXP, PHOSPHATE_SYMBOL} from '@datagrok-libraries/bio/src/utils/const';
|
|
13
|
+
import {GAP_SYMBOL, GapOriginals} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
|
|
14
|
+
import {CellRendererBackBase, GridCellRendererTemp} from '@datagrok-libraries/bio/src/utils/cell-renderer-back-base';
|
|
15
|
+
import {HelmTypes} from '@datagrok-libraries/bio/src/helm/consts';
|
|
16
|
+
import {HelmType} from '@datagrok-libraries/bio/src/helm/types';
|
|
17
|
+
import {ISeqHandler, ConvertFunc, JoinerFunc, SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
18
|
+
|
|
19
|
+
import {SeqHelper} from './seq-helper';
|
|
20
|
+
|
|
21
|
+
/* eslint-enable max-len */
|
|
22
|
+
|
|
23
|
+
/** Class for handling notation units in Macromolecule columns and
|
|
24
|
+
* conversion of notation systems in Macromolecule columns
|
|
25
|
+
*/
|
|
26
|
+
export class SeqHandler implements ISeqHandler {
|
|
27
|
+
protected readonly _column: DG.Column; // the column to be converted
|
|
28
|
+
protected readonly _units: string; // units, of the form fasta, separator
|
|
29
|
+
protected readonly _notation: NOTATION; // current notation (without :SEQ:NT, etc.)
|
|
30
|
+
protected readonly _defaultGapOriginal: string;
|
|
31
|
+
protected readonly notationProvider: INotationProvider | null = null;
|
|
32
|
+
|
|
33
|
+
private _splitter: SplitterFunc | null = null;
|
|
34
|
+
|
|
35
|
+
protected constructor(col: DG.Column<string>,
|
|
36
|
+
private readonly seqHelper: SeqHelper,
|
|
37
|
+
) {
|
|
38
|
+
if (col.type !== DG.TYPE.STRING)
|
|
39
|
+
throw new Error(`Unexpected column type '${col.type}', must be '${DG.TYPE.STRING}'.`);
|
|
40
|
+
this._column = col;
|
|
41
|
+
const units = this._column.meta.units;
|
|
42
|
+
if (units !== null && units !== undefined)
|
|
43
|
+
this._units = units;
|
|
44
|
+
else
|
|
45
|
+
throw new Error('Units are not specified in column');
|
|
46
|
+
this._notation = this.getNotation();
|
|
47
|
+
this._defaultGapOriginal = (this.isFasta()) ? GapOriginals[NOTATION.FASTA] :
|
|
48
|
+
(this.isHelm()) ? GapOriginals[NOTATION.HELM] :
|
|
49
|
+
GapOriginals[NOTATION.SEPARATOR];
|
|
50
|
+
|
|
51
|
+
if (!this.column.tags.has(TAGS.aligned) || !this.column.tags.has(TAGS.alphabet) ||
|
|
52
|
+
(!this.column.tags.has(TAGS.alphabetIsMultichar) && !this.isHelm() && this.alphabet === ALPHABET.UN)
|
|
53
|
+
) {
|
|
54
|
+
// The following detectors and setters are to be called because the column is likely
|
|
55
|
+
// as the UnitsHandler constructor was called on the column.
|
|
56
|
+
if (this.isFasta())
|
|
57
|
+
this.seqHelper.setUnitsToFastaColumn(this);
|
|
58
|
+
else if (this.isSeparator()) {
|
|
59
|
+
const separator = col.getTag(TAGS.separator);
|
|
60
|
+
this.seqHelper.setUnitsToSeparatorColumn(this, separator);
|
|
61
|
+
} else if (this.isHelm())
|
|
62
|
+
this.seqHelper.setUnitsToHelmColumn(this);
|
|
63
|
+
else
|
|
64
|
+
throw new Error(`Unexpected units '${this.column.meta.units}'.`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// if (!this.column.tags.has(TAGS.alphabetSize)) {
|
|
68
|
+
// if (this.isHelm())
|
|
69
|
+
// throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +
|
|
70
|
+
// `tag '${TAGS.alphabetSize}' is mandatory.`);
|
|
71
|
+
// else if (['UN'].includes(this.alphabet))
|
|
72
|
+
// throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
|
|
73
|
+
// `tag '${TAGS.alphabetSize}' is mandatory.`);
|
|
74
|
+
// }
|
|
75
|
+
|
|
76
|
+
if (!this.column.tags.has(TAGS.alphabetIsMultichar)) {
|
|
77
|
+
if (this.isHelm())
|
|
78
|
+
this.column.setTag(TAGS.alphabetIsMultichar, 'true');
|
|
79
|
+
else if (['UN'].includes(this.alphabet)) {
|
|
80
|
+
throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
|
|
81
|
+
`tag '${TAGS.alphabetIsMultichar}' is mandatory.`);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (this.column.meta.units === NOTATION.CUSTOM) {
|
|
86
|
+
// this.column.temp[SeqTemps.notationProvider] must be set at detector stage
|
|
87
|
+
this.notationProvider = this.column.temp[SeqTemps.notationProvider] ?? null;
|
|
88
|
+
}
|
|
89
|
+
this.columnVersion = this.column.version;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** From detectMacromolecule */
|
|
93
|
+
public static setTags(uh: SeqHandler): void {
|
|
94
|
+
const units = uh.column.meta.units as NOTATION;
|
|
95
|
+
|
|
96
|
+
if ([NOTATION.FASTA, NOTATION.SEPARATOR].includes(units)) {
|
|
97
|
+
// Empty monomer alphabet is allowed, only if alphabet tag is annotated
|
|
98
|
+
if (!uh.column.getTag(TAGS.alphabet) && Object.keys(uh.stats.freq).length === 0)
|
|
99
|
+
throw new Error('Alphabet is empty and not annotated.');
|
|
100
|
+
|
|
101
|
+
let aligned = uh.column.getTag(TAGS.aligned);
|
|
102
|
+
if (aligned === null) {
|
|
103
|
+
aligned = uh.stats.sameLength ? ALIGNMENT.SEQ_MSA : ALIGNMENT.SEQ;
|
|
104
|
+
uh.column.setTag(TAGS.aligned, aligned);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
let alphabet = uh.column.getTag(TAGS.alphabet);
|
|
108
|
+
if (alphabet === null) {
|
|
109
|
+
alphabet = detectAlphabet(uh.stats.freq, candidateAlphabets);
|
|
110
|
+
uh.column.setTag(TAGS.alphabet, alphabet);
|
|
111
|
+
}
|
|
112
|
+
if (alphabet === ALPHABET.UN) {
|
|
113
|
+
const alphabetSize = Object.keys(uh.stats.freq).length;
|
|
114
|
+
const alphabetIsMultichar = Object.keys(uh.stats.freq).some((m) => m.length > 1);
|
|
115
|
+
uh.column.setTag(TAGS.alphabetSize, alphabetSize.toString());
|
|
116
|
+
uh.column.setTag(TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
get column(): DG.Column { return this._column; }
|
|
122
|
+
|
|
123
|
+
public get length(): number { return this._column.length; }
|
|
124
|
+
|
|
125
|
+
public get units(): string { return this._units; }
|
|
126
|
+
|
|
127
|
+
public get notation(): NOTATION { return this._notation; }
|
|
128
|
+
|
|
129
|
+
public get defaultGapOriginal(): string { return this._defaultGapOriginal; }
|
|
130
|
+
|
|
131
|
+
public get separator(): string | undefined {
|
|
132
|
+
const separator: string | undefined = this.column.getTag(TAGS.separator) ?? undefined;
|
|
133
|
+
if (this.notation === NOTATION.SEPARATOR && separator === undefined)
|
|
134
|
+
throw new Error(`Separator is mandatory for column '${this.column.name}' of notation '${this.notation}'.`);
|
|
135
|
+
return separator;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
public get aligned(): string {
|
|
139
|
+
const aligned = this.column.getTag(TAGS.aligned);
|
|
140
|
+
|
|
141
|
+
// TAGS.aligned is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR
|
|
142
|
+
if (!aligned && (this.isFasta() || this.isSeparator()))
|
|
143
|
+
throw new Error('Tag aligned not set');
|
|
144
|
+
|
|
145
|
+
return aligned;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/** Alphabet name (upper case) */
|
|
149
|
+
public get alphabet(): string {
|
|
150
|
+
const alphabet = this.column.getTag(TAGS.alphabet);
|
|
151
|
+
|
|
152
|
+
// TAGS.alphabet is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR
|
|
153
|
+
if (!alphabet && (this.isFasta() || this.isSeparator()))
|
|
154
|
+
throw new Error('Tag alphabet not set');
|
|
155
|
+
|
|
156
|
+
return alphabet;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
public get defaultBiotype(): HelmType {
|
|
160
|
+
return this.alphabet === ALPHABET.RNA || this.alphabet === ALPHABET.DNA ? HelmTypes.NUCLEOTIDE : HelmTypes.AA;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
protected get helmCompatible(): string | undefined {
|
|
164
|
+
return this.column.getTag(TAGS.isHelmCompatible);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
public getAlphabetSize(): number {
|
|
168
|
+
if (this.notation == NOTATION.HELM || this.alphabet == ALPHABET.UN) {
|
|
169
|
+
const alphabetSizeStr = this.column.getTag(TAGS.alphabetSize);
|
|
170
|
+
let alphabetSize: number;
|
|
171
|
+
if (alphabetSizeStr)
|
|
172
|
+
alphabetSize = parseInt(alphabetSizeStr);
|
|
173
|
+
else {
|
|
174
|
+
// calculate alphabetSize on demand
|
|
175
|
+
const stats = this.stats;
|
|
176
|
+
alphabetSize = Object.keys(stats.freq).length;
|
|
177
|
+
}
|
|
178
|
+
return alphabetSize;
|
|
179
|
+
} else {
|
|
180
|
+
switch (this.alphabet) {
|
|
181
|
+
case ALPHABET.PT:
|
|
182
|
+
return 20;
|
|
183
|
+
case ALPHABET.DNA:
|
|
184
|
+
case ALPHABET.RNA:
|
|
185
|
+
return 4;
|
|
186
|
+
case 'NT':
|
|
187
|
+
console.warn(`Unexpected alphabet 'NT'.`);
|
|
188
|
+
return 4;
|
|
189
|
+
default:
|
|
190
|
+
throw new Error(`Unexpected alphabet '${this.alphabet}'.`);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
public getAlphabetIsMultichar(): boolean {
|
|
196
|
+
if (this.notation === NOTATION.HELM)
|
|
197
|
+
return true;
|
|
198
|
+
else if (this.alphabet !== ALPHABET.UN)
|
|
199
|
+
return false;
|
|
200
|
+
else
|
|
201
|
+
return this.column.getTag(TAGS.alphabetIsMultichar) === 'true';
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
private cached: boolean = true;
|
|
205
|
+
private _splitted: WeakRef<ISeqSplitted>[] | null = null;
|
|
206
|
+
private columnVersion: number | null = null;
|
|
207
|
+
// /** */
|
|
208
|
+
// public get splitted(): ISeqSplitted[] {
|
|
209
|
+
// // TODO: Disable cache or invalidate on changing data
|
|
210
|
+
// if (this._splitted === null) {
|
|
211
|
+
// const splitter = this.splitter;
|
|
212
|
+
// const colLength: number = this._column.length;
|
|
213
|
+
// this._splitted = new Array(colLength);
|
|
214
|
+
// const catIdxList = this._column.getRawData();
|
|
215
|
+
// const catList: string[] = this._column.categories;
|
|
216
|
+
// for (let rowIdx: number = 0; rowIdx < colLength; rowIdx++) {
|
|
217
|
+
// const seq: string = catList[catIdxList[rowIdx]];
|
|
218
|
+
// this._splitted[rowIdx] = splitter(seq);
|
|
219
|
+
// }
|
|
220
|
+
// }
|
|
221
|
+
// return this._splitted;
|
|
222
|
+
// }
|
|
223
|
+
public getSplitted(rowIdx: number, limit?: number): ISeqSplitted {
|
|
224
|
+
if (!this.cached || limit !== undefined) {
|
|
225
|
+
const seq = this.column.get(rowIdx);
|
|
226
|
+
return this.getSplitter(limit)(seq);
|
|
227
|
+
} else {
|
|
228
|
+
if (this.column.version !== this.columnVersion || this._splitted === null) {
|
|
229
|
+
this.columnVersion = this.column.version;
|
|
230
|
+
this._splitted = new Array<WeakRef<ISeqSplitted>>(this.column.length);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
let resSS: ISeqSplitted | undefined = this._splitted[rowIdx] ? this._splitted[rowIdx].deref() : undefined;
|
|
234
|
+
if (!resSS) {
|
|
235
|
+
const seq = this.column.get(rowIdx);
|
|
236
|
+
resSS = this.splitter(seq);
|
|
237
|
+
this._splitted[rowIdx] = new WeakRef(resSS);
|
|
238
|
+
}
|
|
239
|
+
return resSS;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/** Any Macromolecule can be represented on Helm format. The reverse is not always possible. */
|
|
244
|
+
public async getHelm(rowIdx: number, options?: any): Promise<DG.SemanticValue<string>> {
|
|
245
|
+
const seq: string = this.column.get(rowIdx);
|
|
246
|
+
let resHelmSV: DG.SemanticValue<string>;
|
|
247
|
+
if (this.notationProvider)
|
|
248
|
+
resHelmSV = await this.notationProvider.getHelm(seq, options);
|
|
249
|
+
else {
|
|
250
|
+
const resHelm = this.convertToHelm(seq);
|
|
251
|
+
resHelmSV = DG.SemanticValue.fromValueType(resHelm, DG.SEMTYPE.MACROMOLECULE, NOTATION.HELM);
|
|
252
|
+
// TODO: set tags from column
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return resHelmSV;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
private _stats: SeqColStats | null = null;
|
|
259
|
+
|
|
260
|
+
public get stats(): SeqColStats {
|
|
261
|
+
if (this._stats === null) {
|
|
262
|
+
const freq: { [m: string]: number } = {};
|
|
263
|
+
let sameLength = true;
|
|
264
|
+
let firstLength = null;
|
|
265
|
+
|
|
266
|
+
const colLen = this.column.length;
|
|
267
|
+
for (let rowIdx: number = 0; rowIdx < colLen; ++rowIdx) {
|
|
268
|
+
const mSeq: ISeqSplitted = this.getSplitted(rowIdx);
|
|
269
|
+
if (firstLength == null)
|
|
270
|
+
firstLength = mSeq.length;
|
|
271
|
+
else if (mSeq.length !== firstLength)
|
|
272
|
+
sameLength = false;
|
|
273
|
+
|
|
274
|
+
for (let posIdx = 0; posIdx < mSeq.length; ++posIdx) {
|
|
275
|
+
const cm = mSeq.getCanonical(posIdx);
|
|
276
|
+
if (!(cm in freq))
|
|
277
|
+
freq[cm] = 0;
|
|
278
|
+
freq[cm] += 1;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
this._stats = {freq: freq, sameLength: sameLength};
|
|
282
|
+
}
|
|
283
|
+
return this._stats;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
private _maxLength: number | null = null;
|
|
287
|
+
public get maxLength(): number {
|
|
288
|
+
if (this._maxLength === null) {
|
|
289
|
+
this._maxLength = this.column.length === 0 ? 0 :
|
|
290
|
+
Math.max(...wu.count(0).take(this.column.length).map((rowIdx) => this.getSplitted(rowIdx).length));
|
|
291
|
+
}
|
|
292
|
+
return this._maxLength!;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
private _posList: string[] | null = null;
|
|
296
|
+
public get posList(): string[] {
|
|
297
|
+
if (this._posList === null) {
|
|
298
|
+
const posListTxt = this.column.getTag(TAGS.positionNames);
|
|
299
|
+
this._posList = posListTxt ? posListTxt.split(positionSeparator).map((p) => p.trim()) :
|
|
300
|
+
wu.count(1).take(this.maxLength).map((pos) => pos.toString()).toArray();
|
|
301
|
+
}
|
|
302
|
+
return this._posList!;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
public isFasta(): boolean { return this.notation === NOTATION.FASTA; }
|
|
306
|
+
|
|
307
|
+
public isSeparator(): boolean { return this.notation === NOTATION.SEPARATOR || !!this.separator; }
|
|
308
|
+
|
|
309
|
+
public isHelm(): boolean { return this.notation === NOTATION.HELM; }
|
|
310
|
+
|
|
311
|
+
public isRna(): boolean { return this.alphabet === ALPHABET.RNA; }
|
|
312
|
+
|
|
313
|
+
public isDna(): boolean { return this.alphabet === ALPHABET.DNA; }
|
|
314
|
+
|
|
315
|
+
public isPeptide(): boolean { return this.alphabet === ALPHABET.PT; }
|
|
316
|
+
|
|
317
|
+
public isMsa(): boolean { return this.aligned ? this.aligned.toUpperCase().includes('MSA') : false; }
|
|
318
|
+
|
|
319
|
+
public isHelmCompatible(): boolean { return this.helmCompatible === 'true'; }
|
|
320
|
+
|
|
321
|
+
/** Checks {@link om} for being a gap
|
|
322
|
+
* @param {string} om Original monomer of sequence symbol
|
|
323
|
+
* @return {boolean}
|
|
324
|
+
*/
|
|
325
|
+
public isGap(om: string): boolean {
|
|
326
|
+
return !om || om === this._defaultGapOriginal;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/** Associate notation types with the corresponding units */
|
|
330
|
+
/**
|
|
331
|
+
* @return {NOTATION} Notation associated with the units type
|
|
332
|
+
*/
|
|
333
|
+
protected getNotation(): NOTATION {
|
|
334
|
+
if (this.units.toLowerCase().startsWith(NOTATION.FASTA))
|
|
335
|
+
return NOTATION.FASTA;
|
|
336
|
+
else if (this.units.toLowerCase().startsWith(NOTATION.SEPARATOR))
|
|
337
|
+
return NOTATION.SEPARATOR;
|
|
338
|
+
else if (this.units.toLowerCase().startsWith(NOTATION.HELM))
|
|
339
|
+
return NOTATION.HELM;
|
|
340
|
+
else if (this.units.toLowerCase().startsWith(NOTATION.CUSTOM))
|
|
341
|
+
return NOTATION.CUSTOM;
|
|
342
|
+
else
|
|
343
|
+
throw new Error(`Column '${this.column.name}' has unexpected notation '${this.units}'.`);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Get the wrapper strings for HELM, depending on the type of the
|
|
349
|
+
* macromolecule (peptide, DNA, RNA)
|
|
350
|
+
*
|
|
351
|
+
* @return {string[]} Array of wrappers
|
|
352
|
+
*/
|
|
353
|
+
public getHelmWrappers(): string[] {
|
|
354
|
+
const prefix = (this.isDna()) ? 'RNA1{' :
|
|
355
|
+
(this.isRna() || this.isHelmCompatible()) ? 'RNA1{' : 'PEPTIDE1{';
|
|
356
|
+
|
|
357
|
+
const postfix = '}$$$$';
|
|
358
|
+
const leftWrapper = (this.isDna()) ? 'd(' :
|
|
359
|
+
(this.isRna()) ? 'r(' : '';
|
|
360
|
+
const rightWrapper = (this.isDna() || this.isRna()) ? ')p' : '';
|
|
361
|
+
return [prefix, leftWrapper, rightWrapper, postfix];
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* Create a new empty column of the specified notation type and the same
|
|
366
|
+
* length as column
|
|
367
|
+
*
|
|
368
|
+
* @param {NOTATION} tgtNotation
|
|
369
|
+
* @return {DG.Column}
|
|
370
|
+
*/
|
|
371
|
+
protected getNewColumn(
|
|
372
|
+
tgtNotation: NOTATION, tgtSeparator?: string, colName?: string, data?: string[]
|
|
373
|
+
): DG.Column<string> {
|
|
374
|
+
const col = this.column;
|
|
375
|
+
const name = tgtNotation.toLowerCase() + '(' + col.name + ')';
|
|
376
|
+
const newColName = colName ?? col.dataFrame?.columns.getUnusedName(name) ?? name;
|
|
377
|
+
const newColumn = DG.Column.fromList('string', newColName, data ?? new Array(this.column.length).fill(''));
|
|
378
|
+
newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
379
|
+
newColumn.meta.units = tgtNotation;
|
|
380
|
+
if (tgtNotation === NOTATION.SEPARATOR) {
|
|
381
|
+
if (!tgtSeparator) throw new Error(`Notation \'${NOTATION.SEPARATOR}\' requires separator value.`);
|
|
382
|
+
newColumn.setTag(TAGS.separator, tgtSeparator);
|
|
383
|
+
}
|
|
384
|
+
newColumn.setTag(DG.TAGS.CELL_RENDERER, tgtNotation === NOTATION.HELM ? 'helm' : 'sequence'); // cell.renderer
|
|
385
|
+
|
|
386
|
+
const srcAligned = col.getTag(TAGS.aligned);
|
|
387
|
+
if (srcAligned)
|
|
388
|
+
newColumn.setTag(TAGS.aligned, srcAligned);
|
|
389
|
+
|
|
390
|
+
let srcAlphabet = col.getTag(TAGS.alphabet);
|
|
391
|
+
if (!srcAlphabet && this.notation === NOTATION.HELM && tgtNotation !== NOTATION.HELM)
|
|
392
|
+
srcAlphabet = ALPHABET.UN;
|
|
393
|
+
if (srcAlphabet != null)
|
|
394
|
+
newColumn.setTag(TAGS.alphabet, srcAlphabet);
|
|
395
|
+
|
|
396
|
+
let srcAlphabetSize: string = col.getTag(TAGS.alphabetSize);
|
|
397
|
+
if (srcAlphabet != null && srcAlphabetSize)
|
|
398
|
+
newColumn.setTag(TAGS.alphabetSize, srcAlphabetSize);
|
|
399
|
+
|
|
400
|
+
const srcAlphabetIsMultichar: string = col.getTag(TAGS.alphabetIsMultichar);
|
|
401
|
+
if (srcAlphabet != null && srcAlphabetIsMultichar !== undefined)
|
|
402
|
+
newColumn.setTag(TAGS.alphabetIsMultichar, srcAlphabetIsMultichar);
|
|
403
|
+
|
|
404
|
+
if (tgtNotation == NOTATION.HELM) {
|
|
405
|
+
srcAlphabetSize = this.getAlphabetSize().toString();
|
|
406
|
+
newColumn.setTag(TAGS.alphabetSize, srcAlphabetSize);
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
return newColumn;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/** Creates a new column on data of {@link seqList} with the same tags */
|
|
413
|
+
public getNewColumnFromList(name: string, seqList: string[]): DG.Column<string> {
|
|
414
|
+
return this.getNewColumn(this.notation, this.separator, name, seqList);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* A helper function checking the validity of the 'units' string
|
|
419
|
+
*
|
|
420
|
+
* @param {string} units the string to be validated
|
|
421
|
+
* @return {boolean}
|
|
422
|
+
*/
|
|
423
|
+
public static unitsStringIsValid(units: string): boolean {
|
|
424
|
+
units = units.toLowerCase();
|
|
425
|
+
const prefixes = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];
|
|
426
|
+
const postfixes = ['rna', 'dna', 'pt'];
|
|
427
|
+
|
|
428
|
+
const prefixCriterion = prefixes.some((p) => units.startsWith(p.toLowerCase()));
|
|
429
|
+
return prefixCriterion;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Construct a new column of semantic type MACROMOLECULE from the list of
|
|
434
|
+
* specified parameters
|
|
435
|
+
*
|
|
436
|
+
* @param {number} len the length of the new column
|
|
437
|
+
* @param {string} name the name of the new column
|
|
438
|
+
* @param {string} units the units of the new column
|
|
439
|
+
* @return {DG.Column}
|
|
440
|
+
*/
|
|
441
|
+
public static getNewColumnFromParams(
|
|
442
|
+
len: number,
|
|
443
|
+
name: string,
|
|
444
|
+
units: string
|
|
445
|
+
): DG.Column {
|
|
446
|
+
// WARNING: in this implementation is is impossible to verify the uniqueness
|
|
447
|
+
// of the new column's name
|
|
448
|
+
// TODO: verify the validity of units parameter
|
|
449
|
+
if (!SeqHandler.unitsStringIsValid(units))
|
|
450
|
+
throw new Error('Invalid format of \'units\' parameter');
|
|
451
|
+
const newColumn = DG.Column.fromList('string', name, new Array(len).fill(''));
|
|
452
|
+
newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
453
|
+
newColumn.meta.units = units;
|
|
454
|
+
return newColumn;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
/** Gets function to split seq value to monomers */
|
|
458
|
+
protected getSplitter(limit?: number): SplitterFunc {
|
|
459
|
+
let splitter: SplitterFunc | null = null;
|
|
460
|
+
splitter = this.notationProvider ? this.notationProvider.splitter : null;
|
|
461
|
+
if (splitter) return splitter;
|
|
462
|
+
|
|
463
|
+
if (this.units.toLowerCase().startsWith(NOTATION.FASTA)) {
|
|
464
|
+
const alphabet: string | null = this.column.getTag(TAGS.alphabet);
|
|
465
|
+
if (alphabet !== null && !this.getAlphabetIsMultichar())
|
|
466
|
+
return splitterAsFastaSimple;
|
|
467
|
+
else
|
|
468
|
+
return splitterAsFasta;
|
|
469
|
+
} else if (this.units.toLowerCase().startsWith(NOTATION.SEPARATOR))
|
|
470
|
+
return getSplitterWithSeparator(this.separator!, limit);
|
|
471
|
+
else if (this.units.toLowerCase().startsWith(NOTATION.HELM))
|
|
472
|
+
return splitterAsHelm;
|
|
473
|
+
else
|
|
474
|
+
throw new Error(`Unexpected units ${this.units} .`);
|
|
475
|
+
|
|
476
|
+
// TODO: Splitter for HELM
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
public split(seq: string): ISeqSplitted {
|
|
480
|
+
return this.splitter(seq);
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
public getDistanceFunctionName(): MmDistanceFunctionsNames {
|
|
484
|
+
// TODO add support for helm and separator notation
|
|
485
|
+
if (!this.isFasta())
|
|
486
|
+
throw new Error('Only FASTA notation is supported');
|
|
487
|
+
if (this.isMsa())
|
|
488
|
+
return MmDistanceFunctionsNames.HAMMING;
|
|
489
|
+
switch (this.alphabet) {
|
|
490
|
+
case ALPHABET.DNA:
|
|
491
|
+
case ALPHABET.RNA:
|
|
492
|
+
// As DNA and RNA scoring matrices are same as identity matrices(mostly),
|
|
493
|
+
// we can use very fast and optimized Levenshtein distance library
|
|
494
|
+
return MmDistanceFunctionsNames.LEVENSHTEIN;
|
|
495
|
+
case ALPHABET.PT:
|
|
496
|
+
return MmDistanceFunctionsNames.LEVENSHTEIN;
|
|
497
|
+
// For default case, let's use Levenshtein distance
|
|
498
|
+
default:
|
|
499
|
+
return MmDistanceFunctionsNames.LEVENSHTEIN;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
public getDistanceFunction(): mmDistanceFunctionType {
|
|
504
|
+
return mmDistanceFunctions[this.getDistanceFunctionName()]();
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
// checks if the separator notation is compatible with helm library
|
|
508
|
+
public async checkHelmCompatibility(): Promise<boolean> {
|
|
509
|
+
// check first for the column tag to avoid extra processing
|
|
510
|
+
if (this.column.tags.has(TAGS.isHelmCompatible))
|
|
511
|
+
return this.column.getTag(TAGS.isHelmCompatible) === 'true';
|
|
512
|
+
|
|
513
|
+
// get the monomer lib and check against the column
|
|
514
|
+
const monomerLibHelper: IMonomerLibHelper = await getMonomerLibHelper();
|
|
515
|
+
const bioLib = monomerLibHelper.getMonomerLib();
|
|
516
|
+
// retrieve peptides
|
|
517
|
+
const peptides = bioLib.getMonomerSymbolsByType(HELM_POLYMER_TYPE.PEPTIDE);
|
|
518
|
+
// convert the peptides list to a set for faster lookup
|
|
519
|
+
const peptidesSet = new Set(peptides);
|
|
520
|
+
// get splitter for given separator and check if all monomers are in the lib
|
|
521
|
+
const splitterFunc = getSplitterWithSeparator(this.separator!);
|
|
522
|
+
// iterate over the columns, split them and check if all monomers are in the lib
|
|
523
|
+
//TODO maybe add missing threshold so that if there are not too many missing monomers
|
|
524
|
+
// the column is still considered helm compatible
|
|
525
|
+
const catIdxSet: Set<number> = new Set();
|
|
526
|
+
const rowCount = this.column.length;
|
|
527
|
+
const colRawData = this.column.getRawData();
|
|
528
|
+
for (let rowIdx = 0; rowIdx < rowCount; ++rowIdx) {
|
|
529
|
+
const catI = colRawData[rowIdx];
|
|
530
|
+
if (!(catI in catIdxSet)) {
|
|
531
|
+
catIdxSet.add(catI);
|
|
532
|
+
const seqSS = this.getSplitted(rowIdx);
|
|
533
|
+
for (let posIdx = 0; posIdx < seqSS.length; ++posIdx) {
|
|
534
|
+
const cm = seqSS.getCanonical(posIdx);
|
|
535
|
+
if (!peptidesSet.has(cm)) {
|
|
536
|
+
this.column.setTag(TAGS.isHelmCompatible, 'false');
|
|
537
|
+
return false;
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
this.column.setTag(TAGS.isHelmCompatible, 'true');
|
|
543
|
+
return true;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
// -- Notation Converter --
|
|
547
|
+
|
|
548
|
+
protected get splitter(): SplitterFunc {
|
|
549
|
+
if (this._splitter === null)
|
|
550
|
+
this._splitter = this.getSplitter();
|
|
551
|
+
return this._splitter;
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }
|
|
555
|
+
|
|
556
|
+
public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }
|
|
557
|
+
|
|
558
|
+
public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }
|
|
559
|
+
|
|
560
|
+
/**
|
|
561
|
+
* Convert HELM string to FASTA/SEPARATOR
|
|
562
|
+
*
|
|
563
|
+
* @param {string} srcSeq A string to be converted
|
|
564
|
+
* @param {string} tgtNotation Target notation: FASTA or SEPARATOR
|
|
565
|
+
* @param {string} tgtSeparator Optional target separator (for HELM ->
|
|
566
|
+
* @param {string | null} tgtGapOriginal Optional target gap symbol
|
|
567
|
+
* SEPARATOR)
|
|
568
|
+
* @return {string} Converted string
|
|
569
|
+
*/
|
|
570
|
+
public convertHelmToFastaSeparator(
|
|
571
|
+
srcSeq: string, tgtNotation: string, tgtSeparator?: string, tgtGapOriginal?: string
|
|
572
|
+
): string {
|
|
573
|
+
if (!tgtGapOriginal) {
|
|
574
|
+
tgtGapOriginal = (this.toFasta(tgtNotation as NOTATION)) ?
|
|
575
|
+
GapOriginals[NOTATION.FASTA] :
|
|
576
|
+
GapOriginals[NOTATION.SEPARATOR];
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
if (!tgtSeparator)
|
|
580
|
+
tgtSeparator = (this.toFasta(tgtNotation as NOTATION)) ? '' : this.separator;
|
|
581
|
+
|
|
582
|
+
const isNucleotide = srcSeq.startsWith('RNA');
|
|
583
|
+
// items can be monomers or helms
|
|
584
|
+
const helmItemsArray = this.splitter(srcSeq);
|
|
585
|
+
const tgtMonomersArray: string[] = [];
|
|
586
|
+
for (let posIdx = 0; posIdx < helmItemsArray.length; ++posIdx) {
|
|
587
|
+
let om: string = helmItemsArray.getOriginal(posIdx);
|
|
588
|
+
if (isNucleotide)
|
|
589
|
+
om = om.replace(HELM_WRAPPERS_REGEXP, '');
|
|
590
|
+
if (om === GapOriginals[NOTATION.HELM])
|
|
591
|
+
tgtMonomersArray.push(tgtGapOriginal);
|
|
592
|
+
else if (this.toFasta(tgtNotation as NOTATION) && om.length > 1) {
|
|
593
|
+
// the case of a multi-character monomer converted to FASTA
|
|
594
|
+
const monomer = '[' + om + ']';
|
|
595
|
+
tgtMonomersArray.push(monomer);
|
|
596
|
+
} else
|
|
597
|
+
tgtMonomersArray.push(om);
|
|
598
|
+
}
|
|
599
|
+
return tgtMonomersArray.join(tgtSeparator);
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
/** Dispatcher method for notation conversion
|
|
603
|
+
*
|
|
604
|
+
* @param {NOTATION} tgtNotation Notation we want to convert to
|
|
605
|
+
* @param {string | null} tgtSeparator Possible separator
|
|
606
|
+
* @return {DG.Column} Converted column
|
|
607
|
+
*/
|
|
608
|
+
public convert(tgtNotation: NOTATION, tgtSeparator?: string): DG.Column<string> {
|
|
609
|
+
// Get joiner from the source column units handler (this) knowing about the source sequence.
|
|
610
|
+
// For example, converting DNA Helm to fasta requires removing the r(X)p decoration.
|
|
611
|
+
const joiner: JoinerFunc = this.getJoiner({notation: tgtNotation, separator: tgtSeparator});
|
|
612
|
+
const newColumn = this.getNewColumn(tgtNotation, tgtSeparator);
|
|
613
|
+
// assign the values to the newly created empty column
|
|
614
|
+
newColumn.init((rowIdx: number) => {
|
|
615
|
+
const srcSS = this.getSplitted(rowIdx);
|
|
616
|
+
return joiner(srcSS);
|
|
617
|
+
});
|
|
618
|
+
return newColumn;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
/**
|
|
622
|
+
* @param name
|
|
623
|
+
* @param startIdx Start position index of the region (0-based)
|
|
624
|
+
* @param endIdx End position index of the region (0-based, inclusive)
|
|
625
|
+
*/
|
|
626
|
+
public getRegion(startIdx: number | null, endIdx: number | null, name: string): DG.Column<string> {
|
|
627
|
+
const regCol: DG.Column<string> = this.getNewColumn(this.notation, this.separator);
|
|
628
|
+
regCol.name = name;
|
|
629
|
+
|
|
630
|
+
const startIdxVal: number = startIdx ?? 0;
|
|
631
|
+
const endIdxVal: number = endIdx ?? this.maxLength - 1;
|
|
632
|
+
|
|
633
|
+
const joiner = this.getJoiner();
|
|
634
|
+
|
|
635
|
+
const regLength = endIdxVal - startIdxVal + 1;
|
|
636
|
+
const gapOM = GapOriginals[this.notation];
|
|
637
|
+
regCol.init((rowI): string => {
|
|
638
|
+
const seqS = this.getSplitted(rowI);
|
|
639
|
+
// Custom slicing instead of array method to maintain gaps
|
|
640
|
+
const regOMList: string[] = new Array<string>(regLength);
|
|
641
|
+
for (let regJPos: number = 0; regJPos < regLength; ++regJPos) {
|
|
642
|
+
const seqJPos = startIdxVal + regJPos;
|
|
643
|
+
regOMList[regJPos] = seqJPos < seqS.length ? seqS.getOriginal(seqJPos) : gapOM;
|
|
644
|
+
}
|
|
645
|
+
return joiner(new StringListSeqSplitted(regOMList, gapOM));
|
|
646
|
+
});
|
|
647
|
+
|
|
648
|
+
const getRegionOfPositionNames = (str: string): string => {
|
|
649
|
+
const srcPosList = str.split(',').map((p) => p.trim());
|
|
650
|
+
const regPosList = new Array<string>(regLength);
|
|
651
|
+
for (let regJPos: number = 0; regJPos < regLength; ++regJPos) {
|
|
652
|
+
const srcJPos = startIdxVal + regJPos;
|
|
653
|
+
regPosList[regJPos] = srcJPos < srcPosList.length ? srcPosList[srcJPos] : '?';
|
|
654
|
+
}
|
|
655
|
+
return regPosList.join(positionSeparator);
|
|
656
|
+
};
|
|
657
|
+
|
|
658
|
+
const srcPositionNamesStr = this.column.getTag(TAGS.positionNames);
|
|
659
|
+
if (srcPositionNamesStr) regCol.setTag(TAGS.positionNames, getRegionOfPositionNames(srcPositionNamesStr));
|
|
660
|
+
|
|
661
|
+
const srcPositionLabelsStr = this.column.getTag(TAGS.positionLabels);
|
|
662
|
+
if (srcPositionLabelsStr) regCol.setTag(TAGS.positionLabels, getRegionOfPositionNames(srcPositionLabelsStr));
|
|
663
|
+
|
|
664
|
+
return regCol;
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
private _joiner?: JoinerFunc = undefined;
|
|
668
|
+
|
|
669
|
+
public get joiner(): JoinerFunc {
|
|
670
|
+
if (!this._joiner)
|
|
671
|
+
this._joiner = this.getJoiner();
|
|
672
|
+
|
|
673
|
+
return this._joiner;
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
public getJoiner(opts?: { notation: NOTATION, separator?: string }): JoinerFunc {
|
|
677
|
+
const notation = opts ? opts.notation : this.notation;
|
|
678
|
+
const separator = opts ? opts.separator : this.separator;
|
|
679
|
+
|
|
680
|
+
let res: JoinerFunc;
|
|
681
|
+
const srcSh = this;
|
|
682
|
+
switch (notation) {
|
|
683
|
+
case NOTATION.FASTA: {
|
|
684
|
+
res = function(srcSS: ISeqSplitted): string { return srcSh.joinToFasta(srcSS, srcSh.isHelm()); };
|
|
685
|
+
break;
|
|
686
|
+
}
|
|
687
|
+
case NOTATION.SEPARATOR: {
|
|
688
|
+
if (!separator) throw new Error(`Separator is mandatory for notation '${notation}'.`);
|
|
689
|
+
res = function(srcSS: ISeqSplitted): string { return joinToSeparator(srcSS, separator, srcSh.isHelm()); };
|
|
690
|
+
break;
|
|
691
|
+
}
|
|
692
|
+
case NOTATION.HELM: {
|
|
693
|
+
const isDnaOrRna = srcSh.alphabet === ALPHABET.DNA || srcSh.alphabet === ALPHABET.RNA;
|
|
694
|
+
const wrappers = srcSh.getHelmWrappers();
|
|
695
|
+
res = function(srcSS: ISeqSplitted): string { return joinToHelm(srcSS, wrappers, isDnaOrRna); };
|
|
696
|
+
break;
|
|
697
|
+
}
|
|
698
|
+
default:
|
|
699
|
+
throw new Error(`Unexpected notation '${notation}'.`);
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
return res;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
public getConverter(tgtUnits: NOTATION, tgtSeparator: string | undefined = undefined): ConvertFunc {
|
|
706
|
+
if (tgtUnits === NOTATION.SEPARATOR && !tgtSeparator)
|
|
707
|
+
throw new Error(`Target separator is not specified for target units '${NOTATION.SEPARATOR}'.`);
|
|
708
|
+
|
|
709
|
+
const srcSh = this;
|
|
710
|
+
if (tgtUnits === NOTATION.FASTA)
|
|
711
|
+
return function(srcSeq: string) { return srcSh.convertToFasta(srcSeq); };
|
|
712
|
+
if (tgtUnits === NOTATION.HELM)
|
|
713
|
+
return function(srcSeq: string) { return srcSh.convertToHelm(srcSeq); };
|
|
714
|
+
else if (tgtUnits === NOTATION.SEPARATOR)
|
|
715
|
+
return function(srcSeq: string) { return srcSh.convertToSeparator(srcSeq, tgtSeparator!); };
|
|
716
|
+
else
|
|
717
|
+
throw new Error();
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
/** Gets a column's UnitsHandler object from temp slot or creates a new and stores it to the temp slot. */
|
|
721
|
+
public static forColumn(col: DG.Column<string>, seqHelper: SeqHelper): SeqHandler {
|
|
722
|
+
// TODO: Invalidate col.temp[Temps.uh] checking column's metadata
|
|
723
|
+
let res = col.temp[SeqTemps.seqHandler];
|
|
724
|
+
if (!res || res.columnVersion !== col.version)
|
|
725
|
+
res = col.temp[SeqTemps.seqHandler] = new SeqHandler(col, seqHelper);
|
|
726
|
+
return res;
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
// -- joiners & converters --
|
|
730
|
+
|
|
731
|
+
private joinToFasta(seqS: ISeqSplitted, isHelm: boolean): string {
|
|
732
|
+
const resMList: string[] = new Array<string>(seqS.length);
|
|
733
|
+
for (let posIdx: number = 0; posIdx < seqS.length; ++posIdx) {
|
|
734
|
+
const cm: string = seqS.getOriginal(posIdx);
|
|
735
|
+
let om: string = seqS.getOriginal(posIdx);
|
|
736
|
+
if (isHelm)
|
|
737
|
+
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
738
|
+
|
|
739
|
+
if (cm === GAP_SYMBOL)
|
|
740
|
+
om = GapOriginals[NOTATION.FASTA];
|
|
741
|
+
else if (cm === PHOSPHATE_SYMBOL)
|
|
742
|
+
om = '';
|
|
743
|
+
else if (om.length > 1)
|
|
744
|
+
om = '[' + om + ']';
|
|
745
|
+
|
|
746
|
+
resMList[posIdx] = om;
|
|
747
|
+
}
|
|
748
|
+
return resMList.join('');
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
private convertToFasta(src: string): string {
|
|
752
|
+
const srcUhSplitter: SplitterFunc = this.splitter;
|
|
753
|
+
const srcSS: ISeqSplitted = this.isHelm() ? this.splitterAsHelmNucl(src) : srcUhSplitter(src);
|
|
754
|
+
return this.joinToFasta(srcSS, this.isHelm());
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
private convertToSeparator(src: string, tgtSeparator: string): string {
|
|
758
|
+
const srcSS: ISeqSplitted = this.isHelm() ? this.splitterAsHelmNucl(src) : this.splitter(src);
|
|
759
|
+
return joinToSeparator(srcSS, tgtSeparator, this.isHelm());
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
private convertToHelm(src: string): string {
|
|
763
|
+
if (this.notation == NOTATION.HELM) return src;
|
|
764
|
+
|
|
765
|
+
const wrappers = this.getHelmWrappers();
|
|
766
|
+
|
|
767
|
+
const isDnaOrRna = src.startsWith('DNA') || src.startsWith('RNA');
|
|
768
|
+
const srcSS = this.splitter(src);
|
|
769
|
+
return joinToHelm(srcSS, wrappers, isDnaOrRna);
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
/** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus. */
|
|
773
|
+
private splitterAsHelmNucl(src: string): ISeqSplitted {
|
|
774
|
+
const srcMList: ISeqSplitted = this.splitter(src);
|
|
775
|
+
const tgtMList: (string | null)[] = new Array<string>(srcMList.length);
|
|
776
|
+
const isDna = src.startsWith('DNA');
|
|
777
|
+
const isRna = src.startsWith('RNA');
|
|
778
|
+
for (let posIdx: number = 0; posIdx < srcMList.length; ++posIdx) {
|
|
779
|
+
let om: string | null = srcMList.getOriginal(posIdx);
|
|
780
|
+
if (isDna || isRna) {
|
|
781
|
+
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
782
|
+
om = om === PHOSPHATE_SYMBOL ? null : om;
|
|
783
|
+
}
|
|
784
|
+
tgtMList[posIdx] = om ? om : null;
|
|
785
|
+
}
|
|
786
|
+
return new StringListSeqSplitted(tgtMList.filter((om) => !!om) as string[], GapOriginals[NOTATION.HELM]);
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
// Custom notation provider
|
|
790
|
+
|
|
791
|
+
getRendererBack(gridCol: DG.GridColumn | null, tableCol: DG.Column<string>): CellRendererBackBase<string> {
|
|
792
|
+
const temp = this.column.temp as GridCellRendererTemp<any>;
|
|
793
|
+
let res = temp.rendererBack;
|
|
794
|
+
if (!res)
|
|
795
|
+
res = temp.rendererBack = this.notationProvider!.createCellRendererBack(gridCol, tableCol);
|
|
796
|
+
return res;
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
// -- joiners --
|
|
801
|
+
|
|
802
|
+
function joinToSeparator(seqS: ISeqSplitted, tgtSeparator: string, isHelm: boolean): string {
|
|
803
|
+
const resMList: string[] = new Array<string>(seqS.length);
|
|
804
|
+
for (let posIdx: number = 0; posIdx < seqS.length; ++posIdx) {
|
|
805
|
+
const cm = seqS.getCanonical(posIdx);
|
|
806
|
+
let om = seqS.getOriginal(posIdx);
|
|
807
|
+
if (isHelm)
|
|
808
|
+
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
809
|
+
|
|
810
|
+
if (cm === GAP_SYMBOL)
|
|
811
|
+
om = GapOriginals[NOTATION.SEPARATOR];
|
|
812
|
+
else if (cm === PHOSPHATE_SYMBOL)
|
|
813
|
+
om = '';
|
|
814
|
+
resMList[posIdx] = om;
|
|
815
|
+
}
|
|
816
|
+
return resMList.join(tgtSeparator);
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
function joinToHelm(srcSS: ISeqSplitted, wrappers: string[], isDnaOrRna: boolean): string {
|
|
820
|
+
const [prefix, leftWrapper, rightWrapper, postfix] = wrappers;
|
|
821
|
+
const resOMList: string[] = new Array<string>(srcSS.length);
|
|
822
|
+
for (let posIdx: number = 0; posIdx < srcSS.length; ++posIdx) {
|
|
823
|
+
const cm = srcSS.getCanonical(posIdx);
|
|
824
|
+
let om: string = srcSS.getOriginal(posIdx);
|
|
825
|
+
if (cm === GAP_SYMBOL)
|
|
826
|
+
om = GapOriginals[NOTATION.HELM];
|
|
827
|
+
else {
|
|
828
|
+
if (isDnaOrRna)
|
|
829
|
+
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
830
|
+
om = om.length === 1 ? `${leftWrapper}${om}${rightWrapper}` : `${leftWrapper}[${om}]${rightWrapper}`;
|
|
831
|
+
}
|
|
832
|
+
resOMList[posIdx] = om;
|
|
833
|
+
}
|
|
834
|
+
return `${prefix}${resOMList.join('.')}${postfix}`;
|
|
835
|
+
}
|
|
836
|
+
|