@datagrok-libraries/bio 5.44.4 → 5.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/CHANGELOG.md +9 -0
  2. package/package.json +6 -6
  3. package/src/helm/consts.d.ts +2 -2
  4. package/src/helm/consts.d.ts.map +1 -1
  5. package/src/helm/consts.js.map +1 -1
  6. package/src/helm/helm-helper.d.ts +5 -5
  7. package/src/helm/helm-helper.d.ts.map +1 -1
  8. package/src/helm/types.d.ts +4 -3
  9. package/src/helm/types.d.ts.map +1 -1
  10. package/src/monomer-works/monomer-hover.d.ts +3 -2
  11. package/src/monomer-works/monomer-hover.d.ts.map +1 -1
  12. package/src/monomer-works/monomer-hover.js +4 -3
  13. package/src/monomer-works/monomer-hover.js.map +1 -1
  14. package/src/monomer-works/monomer-utils.d.ts +6 -5
  15. package/src/monomer-works/monomer-utils.d.ts.map +1 -1
  16. package/src/monomer-works/monomer-utils.js +10 -11
  17. package/src/monomer-works/monomer-utils.js.map +1 -1
  18. package/src/monomer-works/seq-to-molfile.d.ts +2 -2
  19. package/src/monomer-works/seq-to-molfile.d.ts.map +1 -1
  20. package/src/monomer-works/seq-to-molfile.js +2 -3
  21. package/src/monomer-works/seq-to-molfile.js.map +1 -1
  22. package/src/monomer-works/to-atomic-level-utils.d.ts +2 -2
  23. package/src/monomer-works/to-atomic-level-utils.d.ts.map +1 -1
  24. package/src/monomer-works/to-atomic-level-utils.js.map +1 -1
  25. package/src/monomer-works/to-atomic-level.d.ts +5 -5
  26. package/src/monomer-works/to-atomic-level.d.ts.map +1 -1
  27. package/src/monomer-works/to-atomic-level.js +17 -14
  28. package/src/monomer-works/to-atomic-level.js.map +1 -1
  29. package/src/types/index.d.ts +6 -4
  30. package/src/types/index.d.ts.map +1 -1
  31. package/src/unknown.d.ts.map +1 -1
  32. package/src/unknown.js +2 -2
  33. package/src/unknown.js.map +1 -1
  34. package/src/utils/cell-renderer-async-base.d.ts +1 -1
  35. package/src/utils/cell-renderer-async-base.d.ts.map +1 -1
  36. package/src/utils/cell-renderer-async-base.js +5 -3
  37. package/src/utils/cell-renderer-async-base.js.map +1 -1
  38. package/src/utils/cell-renderer-consts.d.ts +2 -1
  39. package/src/utils/cell-renderer-consts.d.ts.map +1 -1
  40. package/src/utils/cell-renderer-monomer-placer.d.ts +6 -4
  41. package/src/utils/cell-renderer-monomer-placer.d.ts.map +1 -1
  42. package/src/utils/cell-renderer-monomer-placer.js +59 -47
  43. package/src/utils/cell-renderer-monomer-placer.js.map +1 -1
  44. package/src/utils/fasta-handler.d.ts.map +1 -1
  45. package/src/utils/fasta-handler.js +0 -3
  46. package/src/utils/fasta-handler.js.map +1 -1
  47. package/src/utils/generator.d.ts +2 -1
  48. package/src/utils/generator.d.ts.map +1 -1
  49. package/src/utils/generator.js +2 -3
  50. package/src/utils/generator.js.map +1 -1
  51. package/src/utils/macromolecule/consts.d.ts +1 -0
  52. package/src/utils/macromolecule/consts.d.ts.map +1 -1
  53. package/src/utils/macromolecule/consts.js +1 -0
  54. package/src/utils/macromolecule/consts.js.map +1 -1
  55. package/src/utils/macromolecule/scoring.d.ts +4 -2
  56. package/src/utils/macromolecule/scoring.d.ts.map +1 -1
  57. package/src/utils/macromolecule/scoring.js +8 -8
  58. package/src/utils/macromolecule/scoring.js.map +1 -1
  59. package/src/utils/macromolecule/seq-handler.d.ts +47 -0
  60. package/src/utils/macromolecule/seq-handler.d.ts.map +1 -0
  61. package/src/utils/macromolecule/seq-handler.js +8 -0
  62. package/src/utils/macromolecule/seq-handler.js.map +1 -0
  63. package/src/utils/macromolecule/types.d.ts +2 -1
  64. package/src/utils/macromolecule/types.d.ts.map +1 -1
  65. package/src/utils/macromolecule/types.js.map +1 -1
  66. package/src/utils/macromolecule/utils.d.ts +2 -1
  67. package/src/utils/macromolecule/utils.d.ts.map +1 -1
  68. package/src/utils/macromolecule/utils.js +4 -5
  69. package/src/utils/macromolecule/utils.js.map +1 -1
  70. package/src/utils/seq-helper.d.ts +2 -0
  71. package/src/utils/seq-helper.d.ts.map +1 -1
  72. package/src/utils/seq-helper.js.map +1 -1
  73. package/src/utils/splitter.d.ts +2 -1
  74. package/src/utils/splitter.d.ts.map +1 -1
  75. package/src/utils/splitter.js +2 -3
  76. package/src/utils/splitter.js.map +1 -1
  77. package/src/viewers/helm-service.d.ts +3 -1
  78. package/src/viewers/helm-service.d.ts.map +1 -1
  79. package/src/viewers/helm-service.js +2 -1
  80. package/src/viewers/helm-service.js.map +1 -1
  81. package/src/utils/seq-handler.d.ts +0 -167
  82. package/src/utils/seq-handler.d.ts.map +0 -1
  83. package/src/utils/seq-handler.js +0 -748
  84. package/src/utils/seq-handler.js.map +0 -1
@@ -1,748 +0,0 @@
1
- import * as DG from 'datagrok-api/dg';
2
- import wu from 'wu';
3
- /* eslint-disable max-len */
4
- import { ALPHABET, candidateAlphabets, getSplitterWithSeparator, NOTATION, positionSeparator, splitterAsFasta, splitterAsHelm, TAGS } from './macromolecule';
5
- import { detectAlphabet, splitterAsFastaSimple, StringListSeqSplitted } from './macromolecule/utils';
6
- import { mmDistanceFunctions, MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
7
- import { getMonomerLibHelper } from '../monomer-works/monomer-utils';
8
- import { HELM_WRAPPERS_REGEXP, PHOSPHATE_SYMBOL } from './const';
9
- import { GAP_SYMBOL, GapOriginals } from './macromolecule/consts';
10
- import { HelmTypes } from '../helm/consts';
11
- /* eslint-enable max-len */
12
- export const SeqTemps = new class {
13
- constructor() {
14
- /** Column's temp slot name for a SeqHandler object */
15
- this.seqHandler = `seq-handler`;
16
- this.notationProvider = `seq-handler.notation-provider`;
17
- }
18
- }();
19
- /** Class for handling notation units in Macromolecule columns and
20
- * conversion of notation systems in Macromolecule columns
21
- */
22
- export class SeqHandler {
23
- constructor(col) {
24
- this.notationProvider = null;
25
- this._splitter = null;
26
- this.cached = true;
27
- this._splitted = null;
28
- this.columnVersion = null;
29
- this._stats = null;
30
- this._maxLength = null;
31
- this._posList = null;
32
- this._joiner = undefined;
33
- if (col.type !== DG.TYPE.STRING)
34
- throw new Error(`Unexpected column type '${col.type}', must be '${DG.TYPE.STRING}'.`);
35
- this._column = col;
36
- const units = this._column.meta.units;
37
- if (units !== null && units !== undefined)
38
- this._units = units;
39
- else
40
- throw new Error('Units are not specified in column');
41
- this._notation = this.getNotation();
42
- this._defaultGapOriginal = (this.isFasta()) ? GapOriginals[NOTATION.FASTA] :
43
- (this.isHelm()) ? GapOriginals[NOTATION.HELM] :
44
- GapOriginals[NOTATION.SEPARATOR];
45
- if (!this.column.tags.has(TAGS.aligned) || !this.column.tags.has(TAGS.alphabet) ||
46
- (!this.column.tags.has(TAGS.alphabetIsMultichar) && !this.isHelm() && this.alphabet === ALPHABET.UN)) {
47
- // The following detectors and setters are to be called because the column is likely
48
- // as the UnitsHandler constructor was called on the column.
49
- if (this.isFasta())
50
- SeqHandler.setUnitsToFastaColumn(this);
51
- else if (this.isSeparator()) {
52
- const separator = col.getTag(TAGS.separator);
53
- SeqHandler.setUnitsToSeparatorColumn(this, separator);
54
- }
55
- else if (this.isHelm())
56
- SeqHandler.setUnitsToHelmColumn(this);
57
- else
58
- throw new Error(`Unexpected units '${this.column.meta.units}'.`);
59
- }
60
- // if (!this.column.tags.has(TAGS.alphabetSize)) {
61
- // if (this.isHelm())
62
- // throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +
63
- // `tag '${TAGS.alphabetSize}' is mandatory.`);
64
- // else if (['UN'].includes(this.alphabet))
65
- // throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
66
- // `tag '${TAGS.alphabetSize}' is mandatory.`);
67
- // }
68
- if (!this.column.tags.has(TAGS.alphabetIsMultichar)) {
69
- if (this.isHelm())
70
- this.column.setTag(TAGS.alphabetIsMultichar, 'true');
71
- else if (['UN'].includes(this.alphabet)) {
72
- throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
73
- `tag '${TAGS.alphabetIsMultichar}' is mandatory.`);
74
- }
75
- }
76
- if (this.column.meta.units === NOTATION.CUSTOM) {
77
- // this.column.temp[SeqTemps.notationProvider] must be set at detector stage
78
- this.notationProvider = this.column.temp[SeqTemps.notationProvider] ?? null;
79
- }
80
- this.columnVersion = this.column.version;
81
- }
82
- static setUnitsToFastaColumn(uh) {
83
- if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE || uh.column.meta.units !== NOTATION.FASTA)
84
- throw new Error(`The column of notation '${NOTATION.FASTA}' must be '${DG.SEMTYPE.MACROMOLECULE}'.`);
85
- uh.column.meta.units = NOTATION.FASTA;
86
- SeqHandler.setTags(uh);
87
- }
88
- static setUnitsToSeparatorColumn(uh, separator) {
89
- if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE || uh.column.meta.units !== NOTATION.SEPARATOR)
90
- throw new Error(`The column of notation '${NOTATION.SEPARATOR}' must be '${DG.SEMTYPE.MACROMOLECULE}'.`);
91
- if (!separator)
92
- throw new Error(`The column of notation '${NOTATION.SEPARATOR}' must have the separator tag.`);
93
- uh.column.meta.units = NOTATION.SEPARATOR;
94
- uh.column.setTag(TAGS.separator, separator);
95
- SeqHandler.setTags(uh);
96
- }
97
- static setUnitsToHelmColumn(uh) {
98
- if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE)
99
- throw new Error(`The column of notation '${NOTATION.HELM}' must be '${DG.SEMTYPE.MACROMOLECULE}'`);
100
- uh.column.meta.units = NOTATION.HELM;
101
- SeqHandler.setTags(uh);
102
- }
103
- /** From detectMacromolecule */
104
- static setTags(uh) {
105
- const units = uh.column.meta.units;
106
- if ([NOTATION.FASTA, NOTATION.SEPARATOR].includes(units)) {
107
- // Empty monomer alphabet is allowed, only if alphabet tag is annotated
108
- if (!uh.column.getTag(TAGS.alphabet) && Object.keys(uh.stats.freq).length === 0)
109
- throw new Error('Alphabet is empty and not annotated.');
110
- let aligned = uh.column.getTag(TAGS.aligned);
111
- if (aligned === null) {
112
- aligned = uh.stats.sameLength ? "SEQ.MSA" /* ALIGNMENT.SEQ_MSA */ : "SEQ" /* ALIGNMENT.SEQ */;
113
- uh.column.setTag(TAGS.aligned, aligned);
114
- }
115
- let alphabet = uh.column.getTag(TAGS.alphabet);
116
- if (alphabet === null) {
117
- alphabet = detectAlphabet(uh.stats.freq, candidateAlphabets);
118
- uh.column.setTag(TAGS.alphabet, alphabet);
119
- }
120
- if (alphabet === ALPHABET.UN) {
121
- const alphabetSize = Object.keys(uh.stats.freq).length;
122
- const alphabetIsMultichar = Object.keys(uh.stats.freq).some((m) => m.length > 1);
123
- uh.column.setTag(TAGS.alphabetSize, alphabetSize.toString());
124
- uh.column.setTag(TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
125
- }
126
- }
127
- }
128
- get column() { return this._column; }
129
- get length() { return this._column.length; }
130
- get units() { return this._units; }
131
- get notation() { return this._notation; }
132
- get defaultGapOriginal() { return this._defaultGapOriginal; }
133
- get separator() {
134
- const separator = this.column.getTag(TAGS.separator) ?? undefined;
135
- if (this.notation === NOTATION.SEPARATOR && separator === undefined)
136
- throw new Error(`Separator is mandatory for column '${this.column.name}' of notation '${this.notation}'.`);
137
- return separator;
138
- }
139
- get aligned() {
140
- const aligned = this.column.getTag(TAGS.aligned);
141
- // TAGS.aligned is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR
142
- if (!aligned && (this.isFasta() || this.isSeparator()))
143
- throw new Error('Tag aligned not set');
144
- return aligned;
145
- }
146
- /** Alphabet name (upper case) */
147
- get alphabet() {
148
- const alphabet = this.column.getTag(TAGS.alphabet);
149
- // TAGS.alphabet is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR
150
- if (!alphabet && (this.isFasta() || this.isSeparator()))
151
- throw new Error('Tag alphabet not set');
152
- return alphabet;
153
- }
154
- get defaultBiotype() {
155
- return this.alphabet === ALPHABET.RNA || this.alphabet === ALPHABET.DNA ? HelmTypes.NUCLEOTIDE : HelmTypes.AA;
156
- }
157
- get helmCompatible() {
158
- return this.column.getTag(TAGS.isHelmCompatible);
159
- }
160
- getAlphabetSize() {
161
- if (this.notation == NOTATION.HELM || this.alphabet == ALPHABET.UN) {
162
- const alphabetSizeStr = this.column.getTag(TAGS.alphabetSize);
163
- let alphabetSize;
164
- if (alphabetSizeStr)
165
- alphabetSize = parseInt(alphabetSizeStr);
166
- else {
167
- // calculate alphabetSize on demand
168
- const stats = this.stats;
169
- alphabetSize = Object.keys(stats.freq).length;
170
- }
171
- return alphabetSize;
172
- }
173
- else {
174
- switch (this.alphabet) {
175
- case ALPHABET.PT:
176
- return 20;
177
- case ALPHABET.DNA:
178
- case ALPHABET.RNA:
179
- return 4;
180
- case 'NT':
181
- console.warn(`Unexpected alphabet 'NT'.`);
182
- return 4;
183
- default:
184
- throw new Error(`Unexpected alphabet '${this.alphabet}'.`);
185
- }
186
- }
187
- }
188
- getAlphabetIsMultichar() {
189
- if (this.notation === NOTATION.HELM)
190
- return true;
191
- else if (this.alphabet !== ALPHABET.UN)
192
- return false;
193
- else
194
- return this.column.getTag(TAGS.alphabetIsMultichar) === 'true';
195
- }
196
- // /** */
197
- // public get splitted(): ISeqSplitted[] {
198
- // // TODO: Disable cache or invalidate on changing data
199
- // if (this._splitted === null) {
200
- // const splitter = this.splitter;
201
- // const colLength: number = this._column.length;
202
- // this._splitted = new Array(colLength);
203
- // const catIdxList = this._column.getRawData();
204
- // const catList: string[] = this._column.categories;
205
- // for (let rowIdx: number = 0; rowIdx < colLength; rowIdx++) {
206
- // const seq: string = catList[catIdxList[rowIdx]];
207
- // this._splitted[rowIdx] = splitter(seq);
208
- // }
209
- // }
210
- // return this._splitted;
211
- // }
212
- getSplitted(rowIdx, limit) {
213
- if (!this.cached || limit !== undefined) {
214
- const seq = this.column.get(rowIdx);
215
- return this.getSplitter(limit)(seq);
216
- }
217
- else {
218
- if (this.column.version !== this.columnVersion || this._splitted === null) {
219
- this.columnVersion = this.column.version;
220
- this._splitted = new Array(this.column.length);
221
- }
222
- let resSS = this._splitted[rowIdx] ? this._splitted[rowIdx].deref() : undefined;
223
- if (!resSS) {
224
- const seq = this.column.get(rowIdx);
225
- resSS = this.splitter(seq);
226
- this._splitted[rowIdx] = new WeakRef(resSS);
227
- }
228
- return resSS;
229
- }
230
- }
231
- /** Any Macromolecule can be represented on Helm format. The reverse is not always possible. */
232
- async getHelm(rowIdx, options) {
233
- const seq = this.column.get(rowIdx);
234
- if (this.notationProvider) {
235
- const helmCol = await this.notationProvider.getHelm(this.column, options);
236
- return helmCol.get(rowIdx);
237
- }
238
- else
239
- return this.convertToHelm(seq);
240
- }
241
- get stats() {
242
- if (this._stats === null) {
243
- const freq = {};
244
- let sameLength = true;
245
- let firstLength = null;
246
- const colLen = this.column.length;
247
- for (let rowIdx = 0; rowIdx < colLen; ++rowIdx) {
248
- const mSeq = this.getSplitted(rowIdx);
249
- if (firstLength == null)
250
- firstLength = mSeq.length;
251
- else if (mSeq.length !== firstLength)
252
- sameLength = false;
253
- for (let posIdx = 0; posIdx < mSeq.length; ++posIdx) {
254
- const cm = mSeq.getCanonical(posIdx);
255
- if (!(cm in freq))
256
- freq[cm] = 0;
257
- freq[cm] += 1;
258
- }
259
- }
260
- this._stats = { freq: freq, sameLength: sameLength };
261
- }
262
- return this._stats;
263
- }
264
- get maxLength() {
265
- if (this._maxLength === null) {
266
- this._maxLength = this.column.length === 0 ? 0 :
267
- Math.max(...wu.count(0).take(this.column.length).map((rowIdx) => this.getSplitted(rowIdx).length));
268
- }
269
- return this._maxLength;
270
- }
271
- get posList() {
272
- if (this._posList === null) {
273
- const posListTxt = this.column.getTag(TAGS.positionNames);
274
- this._posList = posListTxt ? posListTxt.split(positionSeparator).map((p) => p.trim()) :
275
- wu.count(1).take(this.maxLength).map((pos) => pos.toString()).toArray();
276
- }
277
- return this._posList;
278
- }
279
- isFasta() { return this.notation === NOTATION.FASTA; }
280
- isSeparator() { return this.notation === NOTATION.SEPARATOR; }
281
- isHelm() { return this.notation === NOTATION.HELM; }
282
- isRna() { return this.alphabet === ALPHABET.RNA; }
283
- isDna() { return this.alphabet === ALPHABET.DNA; }
284
- isPeptide() { return this.alphabet === ALPHABET.PT; }
285
- isMsa() { return this.aligned ? this.aligned.toUpperCase().includes('MSA') : false; }
286
- isHelmCompatible() { return this.helmCompatible === 'true'; }
287
- /** Checks {@link om} for being a gap
288
- * @param {string} om Original monomer of sequence symbol
289
- * @return {boolean}
290
- */
291
- isGap(om) {
292
- return !om || om === this._defaultGapOriginal;
293
- }
294
- /** Associate notation types with the corresponding units */
295
- /**
296
- * @return {NOTATION} Notation associated with the units type
297
- */
298
- getNotation() {
299
- if (this.units.toLowerCase().startsWith(NOTATION.FASTA))
300
- return NOTATION.FASTA;
301
- else if (this.units.toLowerCase().startsWith(NOTATION.SEPARATOR))
302
- return NOTATION.SEPARATOR;
303
- else if (this.units.toLowerCase().startsWith(NOTATION.HELM))
304
- return NOTATION.HELM;
305
- else if (this.units.toLowerCase().startsWith(NOTATION.CUSTOM))
306
- return NOTATION.CUSTOM;
307
- else
308
- throw new Error(`Column '${this.column.name}' has unexpected notation '${this.units}'.`);
309
- }
310
- /**
311
- * Get the wrapper strings for HELM, depending on the type of the
312
- * macromolecule (peptide, DNA, RNA)
313
- *
314
- * @return {string[]} Array of wrappers
315
- */
316
- getHelmWrappers() {
317
- const prefix = (this.isDna()) ? 'RNA1{' :
318
- (this.isRna() || this.isHelmCompatible()) ? 'RNA1{' : 'PEPTIDE1{';
319
- const postfix = '}$$$$';
320
- const leftWrapper = (this.isDna()) ? 'd(' :
321
- (this.isRna()) ? 'r(' : '';
322
- const rightWrapper = (this.isDna() || this.isRna()) ? ')p' : '';
323
- return [prefix, leftWrapper, rightWrapper, postfix];
324
- }
325
- /**
326
- * Create a new empty column of the specified notation type and the same
327
- * length as column
328
- *
329
- * @param {NOTATION} tgtNotation
330
- * @return {DG.Column}
331
- */
332
- getNewColumn(tgtNotation, tgtSeparator, colName, data) {
333
- const col = this.column;
334
- const name = tgtNotation.toLowerCase() + '(' + col.name + ')';
335
- const newColName = colName ?? col.dataFrame?.columns.getUnusedName(name) ?? name;
336
- const newColumn = DG.Column.fromList('string', newColName, data ?? new Array(this.column.length).fill(''));
337
- newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
338
- newColumn.meta.units = tgtNotation;
339
- if (tgtNotation === NOTATION.SEPARATOR) {
340
- if (!tgtSeparator)
341
- throw new Error(`Notation \'${NOTATION.SEPARATOR}\' requires separator value.`);
342
- newColumn.setTag(TAGS.separator, tgtSeparator);
343
- }
344
- newColumn.setTag(DG.TAGS.CELL_RENDERER, tgtNotation === NOTATION.HELM ? 'helm' : 'sequence'); // cell.renderer
345
- const srcAligned = col.getTag(TAGS.aligned);
346
- if (srcAligned)
347
- newColumn.setTag(TAGS.aligned, srcAligned);
348
- let srcAlphabet = col.getTag(TAGS.alphabet);
349
- if (!srcAlphabet && this.notation === NOTATION.HELM && tgtNotation !== NOTATION.HELM)
350
- srcAlphabet = ALPHABET.UN;
351
- if (srcAlphabet != null)
352
- newColumn.setTag(TAGS.alphabet, srcAlphabet);
353
- let srcAlphabetSize = col.getTag(TAGS.alphabetSize);
354
- if (srcAlphabet != null && srcAlphabetSize)
355
- newColumn.setTag(TAGS.alphabetSize, srcAlphabetSize);
356
- const srcAlphabetIsMultichar = col.getTag(TAGS.alphabetIsMultichar);
357
- if (srcAlphabet != null && srcAlphabetIsMultichar !== undefined)
358
- newColumn.setTag(TAGS.alphabetIsMultichar, srcAlphabetIsMultichar);
359
- if (tgtNotation == NOTATION.HELM) {
360
- srcAlphabetSize = this.getAlphabetSize().toString();
361
- newColumn.setTag(TAGS.alphabetSize, srcAlphabetSize);
362
- }
363
- return newColumn;
364
- }
365
- /** Creates a new column on data of {@link seqList} with the same tags */
366
- getNewColumnFromList(name, seqList) {
367
- return this.getNewColumn(this.notation, this.separator, name, seqList);
368
- }
369
- /**
370
- * Create a new empty column using templateCol as a template
371
- *
372
- * @param {DG.Column} templateCol the properties and units of this column are used as a
373
- * template to build the new one
374
- * @return {DG.Column}
375
- */
376
- static getNewColumn(templateCol) {
377
- const col = SeqHandler.forColumn(templateCol);
378
- const targetNotation = col.notation;
379
- return col.getNewColumn(targetNotation);
380
- }
381
- /**
382
- * A helper function checking the validity of the 'units' string
383
- *
384
- * @param {string} units the string to be validated
385
- * @return {boolean}
386
- */
387
- static unitsStringIsValid(units) {
388
- units = units.toLowerCase();
389
- const prefixes = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];
390
- const postfixes = ['rna', 'dna', 'pt'];
391
- const prefixCriterion = prefixes.some((p) => units.startsWith(p.toLowerCase()));
392
- return prefixCriterion;
393
- }
394
- /**
395
- * Construct a new column of semantic type MACROMOLECULE from the list of
396
- * specified parameters
397
- *
398
- * @param {number} len the length of the new column
399
- * @param {string} name the name of the new column
400
- * @param {string} units the units of the new column
401
- * @return {DG.Column}
402
- */
403
- static getNewColumnFromParams(len, name, units) {
404
- // WARNING: in this implementation is is impossible to verify the uniqueness
405
- // of the new column's name
406
- // TODO: verify the validity of units parameter
407
- if (!SeqHandler.unitsStringIsValid(units))
408
- throw new Error('Invalid format of \'units\' parameter');
409
- const newColumn = DG.Column.fromList('string', name, new Array(len).fill(''));
410
- newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
411
- newColumn.meta.units = units;
412
- return newColumn;
413
- }
414
- /** Gets function to split seq value to monomers */
415
- getSplitter(limit) {
416
- let splitter = null;
417
- splitter = this.notationProvider ? this.notationProvider.splitter : null;
418
- if (splitter)
419
- return splitter;
420
- if (this.units.toLowerCase().startsWith(NOTATION.FASTA)) {
421
- const alphabet = this.column.getTag(TAGS.alphabet);
422
- if (alphabet !== null && !this.getAlphabetIsMultichar())
423
- return splitterAsFastaSimple;
424
- else
425
- return splitterAsFasta;
426
- }
427
- else if (this.units.toLowerCase().startsWith(NOTATION.SEPARATOR))
428
- return getSplitterWithSeparator(this.separator, limit);
429
- else if (this.units.toLowerCase().startsWith(NOTATION.HELM))
430
- return splitterAsHelm;
431
- else
432
- throw new Error(`Unexpected units ${this.units} .`);
433
- // TODO: Splitter for HELM
434
- }
435
- split(seq) {
436
- return this.splitter(seq);
437
- }
438
- getDistanceFunctionName() {
439
- // TODO add support for helm and separator notation
440
- if (!this.isFasta())
441
- throw new Error('Only FASTA notation is supported');
442
- if (this.isMsa())
443
- return MmDistanceFunctionsNames.HAMMING;
444
- switch (this.alphabet) {
445
- case ALPHABET.DNA:
446
- case ALPHABET.RNA:
447
- // As DNA and RNA scoring matrices are same as identity matrices(mostly),
448
- // we can use very fast and optimized Levenshtein distance library
449
- return MmDistanceFunctionsNames.LEVENSHTEIN;
450
- case ALPHABET.PT:
451
- return MmDistanceFunctionsNames.LEVENSHTEIN;
452
- // For default case, let's use Levenshtein distance
453
- default:
454
- return MmDistanceFunctionsNames.LEVENSHTEIN;
455
- }
456
- }
457
- getDistanceFunction() {
458
- return mmDistanceFunctions[this.getDistanceFunctionName()]();
459
- }
460
- // checks if the separator notation is compatible with helm library
461
- async checkHelmCompatibility() {
462
- // check first for the column tag to avoid extra processing
463
- if (this.column.tags.has(TAGS.isHelmCompatible))
464
- return this.column.getTag(TAGS.isHelmCompatible) === 'true';
465
- // get the monomer lib and check against the column
466
- const monomerLibHelper = await getMonomerLibHelper();
467
- const bioLib = monomerLibHelper.getMonomerLib();
468
- // retrieve peptides
469
- const peptides = bioLib.getMonomerSymbolsByType("PEPTIDE" /* HELM_POLYMER_TYPE.PEPTIDE */);
470
- // convert the peptides list to a set for faster lookup
471
- const peptidesSet = new Set(peptides);
472
- // get splitter for given separator and check if all monomers are in the lib
473
- const splitterFunc = getSplitterWithSeparator(this.separator);
474
- // iterate over the columns, split them and check if all monomers are in the lib
475
- //TODO maybe add missing threshold so that if there are not too many missing monomers
476
- // the column is still considered helm compatible
477
- const catIdxSet = new Set();
478
- const rowCount = this.column.length;
479
- const colRawData = this.column.getRawData();
480
- for (let rowIdx = 0; rowIdx < rowCount; ++rowIdx) {
481
- const catI = colRawData[rowIdx];
482
- if (!(catI in catIdxSet)) {
483
- catIdxSet.add(catI);
484
- const seqSS = this.getSplitted(rowIdx);
485
- for (let posIdx = 0; posIdx < seqSS.length; ++posIdx) {
486
- const cm = seqSS.getCanonical(posIdx);
487
- if (!peptidesSet.has(cm)) {
488
- this.column.setTag(TAGS.isHelmCompatible, 'false');
489
- return false;
490
- }
491
- }
492
- }
493
- }
494
- this.column.setTag(TAGS.isHelmCompatible, 'true');
495
- return true;
496
- }
497
- // -- Notation Converter --
498
- get splitter() {
499
- if (this._splitter === null)
500
- this._splitter = this.getSplitter();
501
- return this._splitter;
502
- }
503
- toFasta(targetNotation) { return targetNotation === NOTATION.FASTA; }
504
- toSeparator(targetNotation) { return targetNotation === NOTATION.SEPARATOR; }
505
- toHelm(targetNotation) { return targetNotation === NOTATION.HELM; }
506
- /**
507
- * Convert HELM string to FASTA/SEPARATOR
508
- *
509
- * @param {string} srcSeq A string to be converted
510
- * @param {string} tgtNotation Target notation: FASTA or SEPARATOR
511
- * @param {string} tgtSeparator Optional target separator (for HELM ->
512
- * @param {string | null} tgtGapOriginal Optional target gap symbol
513
- * SEPARATOR)
514
- * @return {string} Converted string
515
- */
516
- convertHelmToFastaSeparator(srcSeq, tgtNotation, tgtSeparator, tgtGapOriginal) {
517
- if (!tgtGapOriginal) {
518
- tgtGapOriginal = (this.toFasta(tgtNotation)) ?
519
- GapOriginals[NOTATION.FASTA] :
520
- GapOriginals[NOTATION.SEPARATOR];
521
- }
522
- if (!tgtSeparator)
523
- tgtSeparator = (this.toFasta(tgtNotation)) ? '' : this.separator;
524
- const isNucleotide = srcSeq.startsWith('RNA');
525
- // items can be monomers or helms
526
- const helmItemsArray = this.splitter(srcSeq);
527
- const tgtMonomersArray = [];
528
- for (let posIdx = 0; posIdx < helmItemsArray.length; ++posIdx) {
529
- let om = helmItemsArray.getOriginal(posIdx);
530
- if (isNucleotide)
531
- om = om.replace(HELM_WRAPPERS_REGEXP, '');
532
- if (om === GapOriginals[NOTATION.HELM])
533
- tgtMonomersArray.push(tgtGapOriginal);
534
- else if (this.toFasta(tgtNotation) && om.length > 1) {
535
- // the case of a multi-character monomer converted to FASTA
536
- const monomer = '[' + om + ']';
537
- tgtMonomersArray.push(monomer);
538
- }
539
- else
540
- tgtMonomersArray.push(om);
541
- }
542
- return tgtMonomersArray.join(tgtSeparator);
543
- }
544
- /** Dispatcher method for notation conversion
545
- *
546
- * @param {NOTATION} tgtNotation Notation we want to convert to
547
- * @param {string | null} tgtSeparator Possible separator
548
- * @return {DG.Column} Converted column
549
- */
550
- convert(tgtNotation, tgtSeparator) {
551
- // Get joiner from the source column units handler (this) knowing about the source sequence.
552
- // For example, converting DNA Helm to fasta requires removing the r(X)p decoration.
553
- const joiner = this.getJoiner({ notation: tgtNotation, separator: tgtSeparator });
554
- const newColumn = this.getNewColumn(tgtNotation, tgtSeparator);
555
- // assign the values to the newly created empty column
556
- newColumn.init((rowIdx) => {
557
- const srcSS = this.getSplitted(rowIdx);
558
- return joiner(srcSS);
559
- });
560
- return newColumn;
561
- }
562
- /**
563
- * @param name
564
- * @param startIdx Start position index of the region (0-based)
565
- * @param endIdx End position index of the region (0-based, inclusive)
566
- */
567
- getRegion(startIdx, endIdx, name) {
568
- const regCol = this.getNewColumn(this.notation, this.separator);
569
- regCol.name = name;
570
- const startIdxVal = startIdx ?? 0;
571
- const endIdxVal = endIdx ?? this.maxLength - 1;
572
- const joiner = this.getJoiner();
573
- const regLength = endIdxVal - startIdxVal + 1;
574
- const gapOM = GapOriginals[this.notation];
575
- regCol.init((rowI) => {
576
- const seqS = this.getSplitted(rowI);
577
- // Custom slicing instead of array method to maintain gaps
578
- const regOMList = new Array(regLength);
579
- for (let regJPos = 0; regJPos < regLength; ++regJPos) {
580
- const seqJPos = startIdxVal + regJPos;
581
- regOMList[regJPos] = seqJPos < seqS.length ? seqS.getOriginal(seqJPos) : gapOM;
582
- }
583
- return joiner(new StringListSeqSplitted(regOMList, gapOM));
584
- });
585
- const getRegionOfPositionNames = (str) => {
586
- const srcPosList = str.split(',').map((p) => p.trim());
587
- const regPosList = new Array(regLength);
588
- for (let regJPos = 0; regJPos < regLength; ++regJPos) {
589
- const srcJPos = startIdxVal + regJPos;
590
- regPosList[regJPos] = srcJPos < srcPosList.length ? srcPosList[srcJPos] : '?';
591
- }
592
- return regPosList.join(positionSeparator);
593
- };
594
- const srcPositionNamesStr = this.column.getTag(TAGS.positionNames);
595
- if (srcPositionNamesStr)
596
- regCol.setTag(TAGS.positionNames, getRegionOfPositionNames(srcPositionNamesStr));
597
- const srcPositionLabelsStr = this.column.getTag(TAGS.positionLabels);
598
- if (srcPositionLabelsStr)
599
- regCol.setTag(TAGS.positionLabels, getRegionOfPositionNames(srcPositionLabelsStr));
600
- return regCol;
601
- }
602
- get joiner() {
603
- if (!this._joiner)
604
- this._joiner = this.getJoiner();
605
- return this._joiner;
606
- }
607
- getJoiner(opts) {
608
- const notation = opts ? opts.notation : this.notation;
609
- const separator = opts ? opts.separator : this.separator;
610
- let res;
611
- const srcSh = this;
612
- switch (notation) {
613
- case NOTATION.FASTA: {
614
- res = function (srcSS) { return srcSh.joinToFasta(srcSS, srcSh.isHelm()); };
615
- break;
616
- }
617
- case NOTATION.SEPARATOR: {
618
- if (!separator)
619
- throw new Error(`Separator is mandatory for notation '${notation}'.`);
620
- res = function (srcSS) { return joinToSeparator(srcSS, separator, srcSh.isHelm()); };
621
- break;
622
- }
623
- case NOTATION.HELM: {
624
- const isDnaOrRna = srcSh.alphabet === ALPHABET.DNA || srcSh.alphabet === ALPHABET.RNA;
625
- const wrappers = srcSh.getHelmWrappers();
626
- res = function (srcSS) { return joinToHelm(srcSS, wrappers, isDnaOrRna); };
627
- break;
628
- }
629
- default:
630
- throw new Error(`Unexpected notation '${notation}'.`);
631
- }
632
- return res;
633
- }
634
- getConverter(tgtUnits, tgtSeparator = undefined) {
635
- if (tgtUnits === NOTATION.SEPARATOR && !tgtSeparator)
636
- throw new Error(`Target separator is not specified for target units '${NOTATION.SEPARATOR}'.`);
637
- const srcSh = this;
638
- if (tgtUnits === NOTATION.FASTA)
639
- return function (srcSeq) { return srcSh.convertToFasta(srcSeq); };
640
- if (tgtUnits === NOTATION.HELM)
641
- return function (srcSeq) { return srcSh.convertToHelm(srcSeq); };
642
- else if (tgtUnits === NOTATION.SEPARATOR)
643
- return function (srcSeq) { return srcSh.convertToSeparator(srcSeq, tgtSeparator); };
644
- else
645
- throw new Error();
646
- }
647
- /** Gets a column's UnitsHandler object from temp slot or creates a new and stores it to the temp slot. */
648
- static forColumn(col) {
649
- // TODO: Invalidate col.temp[Temps.uh] checking column's metadata
650
- let res = col.temp[SeqTemps.seqHandler];
651
- if (!res || res.columnVersion !== col.version)
652
- res = col.temp[SeqTemps.seqHandler] = new SeqHandler(col);
653
- return res;
654
- }
655
- // -- joiners & converters --
656
- joinToFasta(seqS, isHelm) {
657
- const resMList = new Array(seqS.length);
658
- for (let posIdx = 0; posIdx < seqS.length; ++posIdx) {
659
- const cm = seqS.getOriginal(posIdx);
660
- let om = seqS.getOriginal(posIdx);
661
- if (isHelm)
662
- om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
663
- if (cm === GAP_SYMBOL)
664
- om = GapOriginals[NOTATION.FASTA];
665
- else if (cm === PHOSPHATE_SYMBOL)
666
- om = '';
667
- else if (om.length > 1)
668
- om = '[' + om + ']';
669
- resMList[posIdx] = om;
670
- }
671
- return resMList.join('');
672
- }
673
- convertToFasta(src) {
674
- const srcUhSplitter = this.splitter;
675
- const srcSS = this.isHelm() ? this.splitterAsHelmNucl(src) : srcUhSplitter(src);
676
- return this.joinToFasta(srcSS, this.isHelm());
677
- }
678
- convertToSeparator(src, tgtSeparator) {
679
- const srcSS = this.isHelm() ? this.splitterAsHelmNucl(src) : this.splitter(src);
680
- return joinToSeparator(srcSS, tgtSeparator, this.isHelm());
681
- }
682
- convertToHelm(src) {
683
- if (this.notation == NOTATION.HELM)
684
- return src;
685
- const wrappers = this.getHelmWrappers();
686
- const isDnaOrRna = src.startsWith('DNA') || src.startsWith('RNA');
687
- const srcSS = this.splitter(src);
688
- return joinToHelm(srcSS, wrappers, isDnaOrRna);
689
- }
690
- /** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus. */
691
- splitterAsHelmNucl(src) {
692
- const srcMList = this.splitter(src);
693
- const tgtMList = new Array(srcMList.length);
694
- const isDna = src.startsWith('DNA');
695
- const isRna = src.startsWith('RNA');
696
- for (let posIdx = 0; posIdx < srcMList.length; ++posIdx) {
697
- let om = srcMList.getOriginal(posIdx);
698
- if (isDna || isRna) {
699
- om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
700
- om = om === PHOSPHATE_SYMBOL ? null : om;
701
- }
702
- tgtMList[posIdx] = om ? om : null;
703
- }
704
- return new StringListSeqSplitted(tgtMList.filter((om) => !!om), GapOriginals[NOTATION.HELM]);
705
- }
706
- // Custom notation provider
707
- getRendererBack(gridCol, tableCol) {
708
- const temp = this.column.temp;
709
- let res = temp.rendererBack;
710
- if (!res)
711
- res = temp.rendererBack = this.notationProvider.createCellRendererBack(gridCol, tableCol);
712
- return res;
713
- }
714
- }
715
- // -- joiners --
716
- function joinToSeparator(seqS, tgtSeparator, isHelm) {
717
- const resMList = new Array(seqS.length);
718
- for (let posIdx = 0; posIdx < seqS.length; ++posIdx) {
719
- const cm = seqS.getCanonical(posIdx);
720
- let om = seqS.getOriginal(posIdx);
721
- if (isHelm)
722
- om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
723
- if (cm === GAP_SYMBOL)
724
- om = GapOriginals[NOTATION.SEPARATOR];
725
- else if (cm === PHOSPHATE_SYMBOL)
726
- om = '';
727
- resMList[posIdx] = om;
728
- }
729
- return resMList.join(tgtSeparator);
730
- }
731
- function joinToHelm(srcSS, wrappers, isDnaOrRna) {
732
- const [prefix, leftWrapper, rightWrapper, postfix] = wrappers;
733
- const resOMList = new Array(srcSS.length);
734
- for (let posIdx = 0; posIdx < srcSS.length; ++posIdx) {
735
- const cm = srcSS.getCanonical(posIdx);
736
- let om = srcSS.getOriginal(posIdx);
737
- if (cm === GAP_SYMBOL)
738
- om = GapOriginals[NOTATION.HELM];
739
- else {
740
- if (isDnaOrRna)
741
- om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
742
- om = om.length === 1 ? `${leftWrapper}${om}${rightWrapper}` : `${leftWrapper}[${om}]${rightWrapper}`;
743
- }
744
- resOMList[posIdx] = om;
745
- }
746
- return `${prefix}${resOMList.join('.')}${postfix}`;
747
- }
748
- //# sourceMappingURL=seq-handler.js.map