@datagrok-libraries/bio 5.39.28 → 5.40.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/.eslintrc.json +1 -1
  2. package/CHANGELOG.md +21 -0
  3. package/package.json +2 -2
  4. package/src/monomer-works/monomer-utils.d.ts.map +1 -1
  5. package/src/monomer-works/monomer-utils.js +34 -31
  6. package/src/monomer-works/monomer-utils.js.map +1 -1
  7. package/src/monomer-works/to-atomic-level.d.ts +4 -4
  8. package/src/monomer-works/to-atomic-level.d.ts.map +1 -1
  9. package/src/monomer-works/to-atomic-level.js +37 -38
  10. package/src/monomer-works/to-atomic-level.js.map +1 -1
  11. package/src/trees/consts.d.ts +1 -0
  12. package/src/trees/consts.d.ts.map +1 -1
  13. package/src/trees/consts.js +1 -0
  14. package/src/trees/consts.js.map +1 -1
  15. package/src/utils/cell-renderer-monomer-placer.d.ts +2 -3
  16. package/src/utils/cell-renderer-monomer-placer.d.ts.map +1 -1
  17. package/src/utils/cell-renderer-monomer-placer.js +13 -11
  18. package/src/utils/cell-renderer-monomer-placer.js.map +1 -1
  19. package/src/utils/cell-renderer.d.ts +2 -3
  20. package/src/utils/cell-renderer.d.ts.map +1 -1
  21. package/src/utils/cell-renderer.js +9 -8
  22. package/src/utils/cell-renderer.js.map +1 -1
  23. package/src/utils/fasta-handler.js +2 -2
  24. package/src/utils/fasta-handler.js.map +1 -1
  25. package/src/utils/macromolecule/alignment.d.ts +4 -3
  26. package/src/utils/macromolecule/alignment.d.ts.map +1 -1
  27. package/src/utils/macromolecule/alignment.js +25 -18
  28. package/src/utils/macromolecule/alignment.js.map +1 -1
  29. package/src/utils/macromolecule/consts.d.ts +2 -0
  30. package/src/utils/macromolecule/consts.d.ts.map +1 -1
  31. package/src/utils/macromolecule/consts.js +2 -0
  32. package/src/utils/macromolecule/consts.js.map +1 -1
  33. package/src/utils/macromolecule/index.d.ts +1 -1
  34. package/src/utils/macromolecule/index.d.ts.map +1 -1
  35. package/src/utils/macromolecule/index.js +1 -1
  36. package/src/utils/macromolecule/index.js.map +1 -1
  37. package/src/utils/macromolecule/scoring.d.ts +1 -1
  38. package/src/utils/macromolecule/scoring.d.ts.map +1 -1
  39. package/src/utils/macromolecule/scoring.js +7 -5
  40. package/src/utils/macromolecule/scoring.js.map +1 -1
  41. package/src/utils/macromolecule/types.d.ts +10 -2
  42. package/src/utils/macromolecule/types.d.ts.map +1 -1
  43. package/src/utils/macromolecule/types.js +2 -0
  44. package/src/utils/macromolecule/types.js.map +1 -1
  45. package/src/utils/macromolecule/utils.d.ts +30 -12
  46. package/src/utils/macromolecule/utils.d.ts.map +1 -1
  47. package/src/utils/macromolecule/utils.js +67 -40
  48. package/src/utils/macromolecule/utils.js.map +1 -1
  49. package/src/utils/{units-handler.d.ts → seq-handler.d.ts} +39 -20
  50. package/src/utils/seq-handler.d.ts.map +1 -0
  51. package/src/utils/{units-handler.js → seq-handler.js} +283 -207
  52. package/src/utils/seq-handler.js.map +1 -0
  53. package/src/utils/splitter.d.ts.map +1 -1
  54. package/src/utils/splitter.js +8 -11
  55. package/src/utils/splitter.js.map +1 -1
  56. package/src/utils/units-handler.d.ts.map +0 -1
  57. package/src/utils/units-handler.js.map +0 -1
@@ -1,7 +1,8 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import wu from 'wu';
3
- import { NOTATION, candidateAlphabets, positionSeparator } from './macromolecule';
4
- import { detectAlphabet, getSplitterForColumn, getSplitterWithSeparator, splitterAsFasta, splitterAsFastaSimple, splitterAsHelm } from './macromolecule/utils';
3
+ import { NOTATION, candidateAlphabets, positionSeparator, splitterAsFasta, getSplitterWithSeparator, splitterAsHelm, } from './macromolecule';
4
+ import { GAP_SYMBOL, } from './macromolecule/types';
5
+ import { detectAlphabet, splitterAsFastaSimple, StringListSeqSplitted } from './macromolecule/utils';
5
6
  import { mmDistanceFunctions, MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
6
7
  import { getMonomerLibHelper } from '../monomer-works/monomer-utils';
7
8
  import { HELM_WRAPPERS_REGEXP, PHOSPHATE_SYMBOL } from './const';
@@ -11,7 +12,7 @@ export const Temps = new class {
11
12
  this.uh = `units-handler.${DG.SEMTYPE.MACROMOLECULE}`;
12
13
  }
13
14
  }();
14
- export const GapSymbols = {
15
+ export const GapOriginals = {
15
16
  [NOTATION.FASTA]: '-',
16
17
  [NOTATION.SEPARATOR]: '',
17
18
  [NOTATION.HELM]: '*',
@@ -19,12 +20,66 @@ export const GapSymbols = {
19
20
  /** Class for handling notation units in Macromolecule columns and
20
21
  * conversion of notation systems in Macromolecule columns
21
22
  */
22
- export class UnitsHandler {
23
+ export class SeqHandler {
24
+ constructor(col) {
25
+ this._splitter = null;
26
+ this.cached = true;
27
+ this._splitted = null;
28
+ this.columnVersion = null;
29
+ this._stats = null;
30
+ this._maxLength = null;
31
+ this._posList = null;
32
+ this._joiner = undefined;
33
+ if (col.type !== DG.TYPE.STRING)
34
+ throw new Error(`Unexpected column type '${col.type}', must be '${DG.TYPE.STRING}'.`);
35
+ this._column = col;
36
+ this._columnVersion = col.version;
37
+ const units = this._column.getTag(DG.TAGS.UNITS);
38
+ if (units !== null && units !== undefined)
39
+ this._units = units;
40
+ else
41
+ throw new Error('Units are not specified in column');
42
+ this._notation = this.getNotation();
43
+ this._defaultGapOriginal = (this.isFasta()) ? GapOriginals[NOTATION.FASTA] :
44
+ (this.isHelm()) ? GapOriginals[NOTATION.HELM] :
45
+ GapOriginals[NOTATION.SEPARATOR];
46
+ if (!this.column.tags.has("aligned" /* TAGS.aligned */) || !this.column.tags.has("alphabet" /* TAGS.alphabet */) ||
47
+ (!this.column.tags.has(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */) && !this.isHelm() && this.alphabet === "UN" /* ALPHABET.UN */)) {
48
+ // The following detectors and setters are to be called because the column is likely
49
+ // as the UnitsHandler constructor was called on the column.
50
+ if (this.isFasta())
51
+ SeqHandler.setUnitsToFastaColumn(this);
52
+ else if (this.isSeparator()) {
53
+ const separator = col.getTag("separator" /* TAGS.separator */);
54
+ SeqHandler.setUnitsToSeparatorColumn(this, separator);
55
+ }
56
+ else if (this.isHelm())
57
+ SeqHandler.setUnitsToHelmColumn(this);
58
+ else
59
+ throw new Error(`Unexpected units '${this.column.getTag(DG.TAGS.UNITS)}'.`);
60
+ }
61
+ // if (!this.column.tags.has(TAGS.alphabetSize)) {
62
+ // if (this.isHelm())
63
+ // throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +
64
+ // `tag '${TAGS.alphabetSize}' is mandatory.`);
65
+ // else if (['UN'].includes(this.alphabet))
66
+ // throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
67
+ // `tag '${TAGS.alphabetSize}' is mandatory.`);
68
+ // }
69
+ if (!this.column.tags.has(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */)) {
70
+ if (this.isHelm())
71
+ this.column.setTag(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */, 'true');
72
+ else if (['UN'].includes(this.alphabet)) {
73
+ throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
74
+ `tag '${".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */}' is mandatory.`);
75
+ }
76
+ }
77
+ }
23
78
  static setUnitsToFastaColumn(uh) {
24
79
  if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE || uh.column.getTag(DG.TAGS.UNITS) !== NOTATION.FASTA)
25
80
  throw new Error(`The column of notation '${NOTATION.FASTA}' must be '${DG.SEMTYPE.MACROMOLECULE}'.`);
26
81
  uh.column.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
27
- UnitsHandler.setTags(uh);
82
+ SeqHandler.setTags(uh);
28
83
  }
29
84
  static setUnitsToSeparatorColumn(uh, separator) {
30
85
  if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE || uh.column.getTag(DG.TAGS.UNITS) !== NOTATION.SEPARATOR)
@@ -33,13 +88,13 @@ export class UnitsHandler {
33
88
  throw new Error(`The column of notation '${NOTATION.SEPARATOR}' must have the separator tag.`);
34
89
  uh.column.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
35
90
  uh.column.setTag("separator" /* TAGS.separator */, separator);
36
- UnitsHandler.setTags(uh);
91
+ SeqHandler.setTags(uh);
37
92
  }
38
93
  static setUnitsToHelmColumn(uh) {
39
94
  if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE)
40
95
  throw new Error(`The column of notation '${NOTATION.HELM}' must be '${DG.SEMTYPE.MACROMOLECULE}'`);
41
96
  uh.column.setTag(DG.TAGS.UNITS, NOTATION.HELM);
42
- UnitsHandler.setTags(uh);
97
+ SeqHandler.setTags(uh);
43
98
  }
44
99
  /** From detectMacromolecule */
45
100
  static setTags(uh) {
@@ -69,9 +124,10 @@ export class UnitsHandler {
69
124
  }
70
125
  }
71
126
  get column() { return this._column; }
127
+ get length() { return this._column.length; }
72
128
  get units() { return this._units; }
73
129
  get notation() { return this._notation; }
74
- get defaultGapSymbol() { return this._defaultGapSymbol; }
130
+ get defaultGapOriginal() { return this._defaultGapOriginal; }
75
131
  get separator() {
76
132
  const separator = this.column.getTag("separator" /* TAGS.separator */) ?? undefined;
77
133
  if (this.notation === NOTATION.SEPARATOR && separator === undefined)
@@ -132,35 +188,57 @@ export class UnitsHandler {
132
188
  else
133
189
  return this.column.getTag(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */) === 'true';
134
190
  }
135
- /** */
136
- get splitted() {
137
- if (this._splitted === null) {
138
- const splitter = this.getSplitter();
139
- const colLength = this._column.length;
140
- this._splitted = new Array(colLength);
141
- const catIdxList = this._column.getRawData();
142
- const catList = this._column.categories;
143
- for (let rowI = 0; rowI < colLength; rowI++) {
144
- const seq = catList[catIdxList[rowI]];
145
- this._splitted[rowI] = splitter(seq);
191
+ // /** */
192
+ // public get splitted(): ISeqSplitted[] {
193
+ // // TODO: Disable cache or invalidate on changing data
194
+ // if (this._splitted === null) {
195
+ // const splitter = this.splitter;
196
+ // const colLength: number = this._column.length;
197
+ // this._splitted = new Array(colLength);
198
+ // const catIdxList = this._column.getRawData();
199
+ // const catList: string[] = this._column.categories;
200
+ // for (let rowIdx: number = 0; rowIdx < colLength; rowIdx++) {
201
+ // const seq: string = catList[catIdxList[rowIdx]];
202
+ // this._splitted[rowIdx] = splitter(seq);
203
+ // }
204
+ // }
205
+ // return this._splitted;
206
+ // }
207
+ getSplitted(rowIdx) {
208
+ if (!this.cached) {
209
+ const seq = this.column.get(rowIdx);
210
+ return this.splitter(seq);
211
+ }
212
+ else {
213
+ if (this.column.version !== this.columnVersion || this._splitted === null) {
214
+ this.columnVersion = this.column.version;
215
+ this._splitted = new Array(this.column.length);
146
216
  }
217
+ let resSS = this._splitted[rowIdx] ? this._splitted[rowIdx].deref() : undefined;
218
+ if (!resSS) {
219
+ const seq = this.column.get(rowIdx);
220
+ resSS = this.splitter(seq);
221
+ this._splitted[rowIdx] = new WeakRef(resSS);
222
+ }
223
+ return resSS;
147
224
  }
148
- return this._splitted;
149
225
  }
150
226
  get stats() {
151
227
  if (this._stats === null) {
152
228
  const freq = {};
153
229
  let sameLength = true;
154
230
  let firstLength = null;
155
- for (const mSeq of this.splitted) {
231
+ const colLen = this.column.length;
232
+ for (let rowIdx = 0; rowIdx < colLen; ++rowIdx) {
233
+ const mSeq = this.getSplitted(rowIdx);
156
234
  if (firstLength == null)
157
235
  firstLength = mSeq.length;
158
236
  else if (mSeq.length !== firstLength)
159
237
  sameLength = false;
160
- for (const m of mSeq) {
161
- if (!(m in freq))
162
- freq[m] = 0;
163
- freq[m] += 1;
238
+ for (const cm of mSeq.canonicals) {
239
+ if (!(cm in freq))
240
+ freq[cm] = 0;
241
+ freq[cm] += 1;
164
242
  }
165
243
  }
166
244
  this._stats = { freq: freq, sameLength: sameLength };
@@ -169,8 +247,8 @@ export class UnitsHandler {
169
247
  }
170
248
  get maxLength() {
171
249
  if (this._maxLength === null) {
172
- this._maxLength = this.splitted.length === 0 ? 0 :
173
- Math.max(...this.splitted.map((seqS) => seqS.length));
250
+ this._maxLength = this.column.length === 0 ? 0 :
251
+ Math.max(...wu.count(0).take(this.column.length).map((rowIdx) => this.getSplitted(rowIdx).length));
174
252
  }
175
253
  return this._maxLength;
176
254
  }
@@ -190,9 +268,12 @@ export class UnitsHandler {
190
268
  isPeptide() { return this.alphabet === "PT" /* ALPHABET.PT */; }
191
269
  isMsa() { return this.aligned ? this.aligned.toUpperCase().includes('MSA') : false; }
192
270
  isHelmCompatible() { return this.helmCompatible === 'true'; }
193
- isGap(m) {
194
- return !m || (this.units === NOTATION.FASTA && m === GapSymbols[NOTATION.FASTA]) ||
195
- (this.units === NOTATION.HELM && m === GapSymbols[NOTATION.HELM]);
271
+ /** Checks {@link om} for being a gap
272
+ * @param {string} om Original monomer of sequence symbol
273
+ * @return {boolean}
274
+ */
275
+ isGap(om) {
276
+ return !om || om === this._defaultGapOriginal;
196
277
  }
197
278
  /** Associate notation types with the corresponding units */
198
279
  /**
@@ -246,7 +327,9 @@ export class UnitsHandler {
246
327
  const srcAligned = col.getTag("aligned" /* TAGS.aligned */);
247
328
  if (srcAligned)
248
329
  newColumn.setTag("aligned" /* TAGS.aligned */, srcAligned);
249
- const srcAlphabet = col.getTag("alphabet" /* TAGS.alphabet */);
330
+ let srcAlphabet = col.getTag("alphabet" /* TAGS.alphabet */);
331
+ if (!srcAlphabet && this.notation === NOTATION.HELM && tgtNotation !== NOTATION.HELM)
332
+ srcAlphabet = "UN" /* ALPHABET.UN */;
250
333
  if (srcAlphabet != null)
251
334
  newColumn.setTag("alphabet" /* TAGS.alphabet */, srcAlphabet);
252
335
  let srcAlphabetSize = col.getTag(".alphabetSize" /* TAGS.alphabetSize */);
@@ -261,8 +344,9 @@ export class UnitsHandler {
261
344
  }
262
345
  return newColumn;
263
346
  }
264
- getNewColumnFromList(name, list) {
265
- return this.getNewColumn(this.notation, this.separator, name, list);
347
+ /** Creates a new column on data of {@link seqList} with the same tags */
348
+ getNewColumnFromList(name, seqList) {
349
+ return this.getNewColumn(this.notation, this.separator, name, seqList);
266
350
  }
267
351
  /**
268
352
  * Create a new empty column using templateCol as a template
@@ -272,7 +356,7 @@ export class UnitsHandler {
272
356
  * @return {DG.Column}
273
357
  */
274
358
  static getNewColumn(templateCol) {
275
- const col = UnitsHandler.getOrCreate(templateCol);
359
+ const col = SeqHandler.forColumn(templateCol);
276
360
  const targetNotation = col.notation;
277
361
  return col.getNewColumn(targetNotation);
278
362
  }
@@ -302,7 +386,7 @@ export class UnitsHandler {
302
386
  // WARNING: in this implementation is is impossible to verify the uniqueness
303
387
  // of the new column's name
304
388
  // TODO: verify the validity of units parameter
305
- if (!UnitsHandler.unitsStringIsValid(units))
389
+ if (!SeqHandler.unitsStringIsValid(units))
306
390
  throw new Error('Invalid format of \'units\' parameter');
307
391
  const newColumn = DG.Column.fromList('string', name, new Array(len).fill(''));
308
392
  newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
@@ -326,6 +410,9 @@ export class UnitsHandler {
326
410
  throw new Error(`Unexpected units ${this.units} .`);
327
411
  // TODO: Splitter for HELM
328
412
  }
413
+ split(seq) {
414
+ return this.splitter(seq);
415
+ }
329
416
  getDistanceFunctionName() {
330
417
  // TODO add support for helm and separator notation
331
418
  if (!this.isFasta())
@@ -333,10 +420,10 @@ export class UnitsHandler {
333
420
  if (this.isMsa())
334
421
  return MmDistanceFunctionsNames.HAMMING;
335
422
  switch (this.alphabet) {
336
- // As DNA and RNA scoring matrices are same as identity matrices(mostly),
337
- // we can use very fast and optimized Levenshtein distance library
338
423
  case "DNA" /* ALPHABET.DNA */:
339
424
  case "RNA" /* ALPHABET.RNA */:
425
+ // As DNA and RNA scoring matrices are same as identity matrices(mostly),
426
+ // we can use very fast and optimized Levenshtein distance library
340
427
  return MmDistanceFunctionsNames.LEVENSHTEIN;
341
428
  case "PT" /* ALPHABET.PT */:
342
429
  return MmDistanceFunctionsNames.LEVENSHTEIN;
@@ -353,7 +440,7 @@ export class UnitsHandler {
353
440
  // check first for the column tag to avoid extra processing
354
441
  if (this.column.tags.has(".isHelmCompatible" /* TAGS.isHelmCompatible */))
355
442
  return this.column.getTag(".isHelmCompatible" /* TAGS.isHelmCompatible */) === 'true';
356
- // get the monolmer lib and check against the column
443
+ // get the monomer lib and check against the column
357
444
  const monomerLibHelper = await getMonomerLibHelper();
358
445
  const bioLib = monomerLibHelper.getBioLib();
359
446
  // retrieve peptides
@@ -363,14 +450,21 @@ export class UnitsHandler {
363
450
  // get splitter for given separator and check if all monomers are in the lib
364
451
  const splitterFunc = getSplitterWithSeparator(this.separator);
365
452
  // iterate over the columns, split them and check if all monomers are in the lib
366
- //TODO maybe add missing threshhold so that if there are not too many missing monomers
453
+ //TODO maybe add missing threshold so that if there are not too many missing monomers
367
454
  // the column is still considered helm compatible
368
- for (const row of this.column.categories) {
369
- const monomers = splitterFunc(row);
370
- for (const monomer of monomers) {
371
- if (!peptidesSet.has(monomer)) {
372
- this.column.setTag(".isHelmCompatible" /* TAGS.isHelmCompatible */, 'false');
373
- return false;
455
+ const catIdxSet = new Set();
456
+ const rowCount = this.column.length;
457
+ const colRawData = this.column.getRawData();
458
+ for (let rowIdx = 0; rowIdx < rowCount; ++rowIdx) {
459
+ const catI = colRawData[rowIdx];
460
+ if (!(catI in catIdxSet)) {
461
+ catIdxSet.add(catI);
462
+ const monomers = this.getSplitted(rowIdx);
463
+ for (const cm of monomers.canonicals) {
464
+ if (!peptidesSet.has(cm)) {
465
+ this.column.setTag(".isHelmCompatible" /* TAGS.isHelmCompatible */, 'false');
466
+ return false;
467
+ }
374
468
  }
375
469
  }
376
470
  }
@@ -380,7 +474,7 @@ export class UnitsHandler {
380
474
  // -- Notation Converter --
381
475
  get splitter() {
382
476
  if (this._splitter === null)
383
- this._splitter = getSplitterForColumn(this.column);
477
+ this._splitter = this.getSplitter();
384
478
  return this._splitter;
385
479
  }
386
480
  toFasta(targetNotation) { return targetNotation === NOTATION.FASTA; }
@@ -389,38 +483,38 @@ export class UnitsHandler {
389
483
  /**
390
484
  * Convert HELM string to FASTA/SEPARATOR
391
485
  *
392
- * @param {string} helmPolymer A string to be converted
486
+ * @param {string} srcSeq A string to be converted
393
487
  * @param {string} tgtNotation Target notation: FASTA or SEPARATOR
394
488
  * @param {string} tgtSeparator Optional target separator (for HELM ->
395
- * @param {string | null} tgtGapSymbol Optional target gap symbol
489
+ * @param {string | null} tgtGapOriginal Optional target gap symbol
396
490
  * SEPARATOR)
397
491
  * @return {string} Converted string
398
492
  */
399
- convertHelmToFastaSeparator(helmPolymer, tgtNotation, tgtSeparator, tgtGapSymbol) {
400
- if (!tgtGapSymbol) {
401
- tgtGapSymbol = (this.toFasta(tgtNotation)) ?
402
- GapSymbols[NOTATION.FASTA] :
403
- GapSymbols[NOTATION.SEPARATOR];
493
+ convertHelmToFastaSeparator(srcSeq, tgtNotation, tgtSeparator, tgtGapOriginal) {
494
+ if (!tgtGapOriginal) {
495
+ tgtGapOriginal = (this.toFasta(tgtNotation)) ?
496
+ GapOriginals[NOTATION.FASTA] :
497
+ GapOriginals[NOTATION.SEPARATOR];
404
498
  }
405
499
  if (!tgtSeparator)
406
500
  tgtSeparator = (this.toFasta(tgtNotation)) ? '' : this.separator;
407
- const isNucleotide = helmPolymer.startsWith('RNA');
501
+ const isNucleotide = srcSeq.startsWith('RNA');
408
502
  // items can be monomers or helms
409
- const helmItemsArray = this.splitter(helmPolymer);
503
+ const helmItemsArray = this.splitter(srcSeq);
410
504
  const tgtMonomersArray = [];
411
- for (let i = 0; i < helmItemsArray.length; i++) {
412
- let item = helmItemsArray[i];
505
+ for (let posIdx = 0; posIdx < helmItemsArray.length; ++posIdx) {
506
+ let om = helmItemsArray.getOriginal(posIdx);
413
507
  if (isNucleotide)
414
- item = item.replace(HELM_WRAPPERS_REGEXP, '');
415
- if (item === GapSymbols[NOTATION.HELM])
416
- tgtMonomersArray.push(tgtGapSymbol);
417
- else if (this.toFasta(tgtNotation) && item.length > 1) {
508
+ om = om.replace(HELM_WRAPPERS_REGEXP, '');
509
+ if (om === GapOriginals[NOTATION.HELM])
510
+ tgtMonomersArray.push(tgtGapOriginal);
511
+ else if (this.toFasta(tgtNotation) && om.length > 1) {
418
512
  // the case of a multi-character monomer converted to FASTA
419
- const monomer = '[' + item + ']';
513
+ const monomer = '[' + om + ']';
420
514
  tgtMonomersArray.push(monomer);
421
515
  }
422
516
  else
423
- tgtMonomersArray.push(item);
517
+ tgtMonomersArray.push(om);
424
518
  }
425
519
  return tgtMonomersArray.join(tgtSeparator);
426
520
  }
@@ -431,14 +525,15 @@ export class UnitsHandler {
431
525
  * @return {DG.Column} Converted column
432
526
  */
433
527
  convert(tgtNotation, tgtSeparator) {
434
- const convert = this.getConverter(tgtNotation, tgtSeparator);
528
+ // Get joiner from the source column units handler (this) knowing about the source sequence.
529
+ // For example, converting DNA Helm to fasta requires removing the r(X)p decoration.
530
+ const joiner = this.getJoiner({ notation: tgtNotation, separator: tgtSeparator });
435
531
  const newColumn = this.getNewColumn(tgtNotation, tgtSeparator);
436
532
  // assign the values to the newly created empty column
437
- newColumn.init((rowI) => {
438
- const sourceSequence = this.column.get(rowI);
439
- return sourceSequence ? convert(sourceSequence) : sourceSequence;
533
+ newColumn.init((rowIdx) => {
534
+ const srcSS = this.getSplitted(rowIdx);
535
+ return joiner(srcSS);
440
536
  });
441
- // newColumn.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
442
537
  return newColumn;
443
538
  }
444
539
  /**
@@ -449,20 +544,20 @@ export class UnitsHandler {
449
544
  getRegion(startIdx, endIdx, name) {
450
545
  const regCol = this.getNewColumn(this.notation, this.separator);
451
546
  regCol.name = name;
452
- const maxLength = Math.max(...this.splitted.map((seqS) => seqS.length));
453
547
  const startIdxVal = startIdx ?? 0;
454
548
  const endIdxVal = endIdx ?? this.maxLength - 1;
455
549
  const join = this.getJoiner();
456
550
  const regLength = endIdxVal - startIdxVal + 1;
457
551
  regCol.init((rowI) => {
458
- const seqS = this.splitted[rowI];
552
+ const seqS = this.getSplitted(rowI);
459
553
  // Custom slicing instead of array method to maintain gaps
460
- const regMList = new Array(regLength);
554
+ const regOMList = new Array(regLength);
461
555
  for (let regJPos = 0; regJPos < regLength; ++regJPos) {
462
556
  const seqJPos = startIdxVal + regJPos;
463
- regMList[regJPos] = seqJPos < seqS.length ? seqS[seqJPos] : GapSymbols[this.notation];
557
+ const seqOM = seqS.getOriginal(seqJPos);
558
+ regOMList[regJPos] = seqJPos < seqS.length ? seqOM : GapOriginals[this.notation];
464
559
  }
465
- return join(regMList);
560
+ return join(new StringListSeqSplitted(regOMList, GapOriginals[this.notation]));
466
561
  });
467
562
  const getRegionOfPositionNames = (str) => {
468
563
  const srcPosList = str.split(',').map((p) => p.trim());
@@ -481,159 +576,140 @@ export class UnitsHandler {
481
576
  regCol.setTag(".positionLabels" /* TAGS.positionLabels */, getRegionOfPositionNames(srcPositionLabelsStr));
482
577
  return regCol;
483
578
  }
484
- getJoiner() {
485
- if (this._joiner === undefined) {
486
- const srcUh = this;
487
- if (this.notation === NOTATION.FASTA)
488
- this._joiner = function (srcS) { return joinToFasta(srcUh, srcS); };
489
- else if (this.notation === NOTATION.SEPARATOR)
490
- this._joiner = function (srcS) { return joinToSeparator(srcUh, srcS, srcUh.separator); };
491
- else if (this.notation === NOTATION.HELM) {
492
- const isDnaOrRna = srcUh.alphabet === "DNA" /* ALPHABET.DNA */ || srcUh.alphabet === "RNA" /* ALPHABET.RNA */;
493
- this._joiner = function (srcS) { return joinToHelm(srcUh, srcS, isDnaOrRna); };
579
+ get joiner() {
580
+ if (!this._joiner)
581
+ this._joiner = this.getJoiner();
582
+ return this._joiner;
583
+ }
584
+ getJoiner(opts) {
585
+ const notation = opts ? opts.notation : this.notation;
586
+ const separator = opts ? opts.separator : this.separator;
587
+ let res;
588
+ const srcSh = this;
589
+ switch (notation) {
590
+ case NOTATION.FASTA: {
591
+ res = function (srcSS) { return srcSh.joinToFasta(srcSS, srcSh.isHelm()); };
592
+ break;
494
593
  }
495
- else
496
- throw new Error();
594
+ case NOTATION.SEPARATOR: {
595
+ if (!separator)
596
+ throw new Error(`Separator is mandatory for notation '${notation}'.`);
597
+ res = function (srcSS) { return joinToSeparator(srcSS, separator, srcSh.isHelm()); };
598
+ break;
599
+ }
600
+ case NOTATION.HELM: {
601
+ const isDnaOrRna = srcSh.alphabet === "DNA" /* ALPHABET.DNA */ || srcSh.alphabet === "RNA" /* ALPHABET.RNA */;
602
+ const wrappers = srcSh.getHelmWrappers();
603
+ res = function (srcSS) { return joinToHelm(srcSS, wrappers, isDnaOrRna); };
604
+ break;
605
+ }
606
+ default:
607
+ throw new Error(`Unexpected notation '${notation}'.`);
497
608
  }
498
- return this._joiner;
609
+ return res;
499
610
  }
500
611
  getConverter(tgtUnits, tgtSeparator = undefined) {
501
612
  if (tgtUnits === NOTATION.SEPARATOR && !tgtSeparator)
502
613
  throw new Error(`Target separator is not specified for target units '${NOTATION.SEPARATOR}'.`);
503
- const srcUh = this;
614
+ const srcSh = this;
504
615
  if (tgtUnits === NOTATION.FASTA)
505
- return function (src) { return convertToFasta(srcUh, src); };
616
+ return function (srcSeq) { return srcSh.convertToFasta(srcSeq); };
506
617
  if (tgtUnits === NOTATION.HELM)
507
- return function (src) { return convertToHelm(srcUh, src); };
618
+ return function (srcSeq) { return srcSh.convertToHelm(srcSeq); };
508
619
  else if (tgtUnits === NOTATION.SEPARATOR)
509
- return function (src) { return convertToSeparator(srcUh, src, tgtSeparator); };
620
+ return function (srcSeq) { return srcSh.convertToSeparator(srcSeq, tgtSeparator); };
510
621
  else
511
622
  throw new Error();
512
623
  }
513
- constructor(col) {
514
- this._splitter = null;
515
- this._splitted = null;
516
- this._stats = null;
517
- this._maxLength = null;
518
- this._posList = null;
519
- this._joiner = undefined;
520
- if (col.type !== DG.TYPE.STRING)
521
- throw new Error(`Unexpected column type '${col.type}', must be '${DG.TYPE.STRING}'.`);
522
- this._column = col;
523
- const units = this._column.getTag(DG.TAGS.UNITS);
524
- if (units !== null && units !== undefined)
525
- this._units = units;
526
- else
527
- throw new Error('Units are not specified in column');
528
- this._notation = this.getNotation();
529
- this._defaultGapSymbol = (this.isFasta()) ? GapSymbols[NOTATION.FASTA] :
530
- (this.isHelm()) ? GapSymbols[NOTATION.HELM] :
531
- GapSymbols[NOTATION.SEPARATOR];
532
- if (!this.column.tags.has("aligned" /* TAGS.aligned */) || !this.column.tags.has("alphabet" /* TAGS.alphabet */) ||
533
- (!this.column.tags.has(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */) && !this.isHelm() && this.alphabet === "UN" /* ALPHABET.UN */)) {
534
- // The following detectors and setters are to be called because the column is likely
535
- // as the UnitsHandler constructor was called on the column.
536
- if (this.isFasta())
537
- UnitsHandler.setUnitsToFastaColumn(this);
538
- else if (this.isSeparator()) {
539
- const separator = col.getTag("separator" /* TAGS.separator */);
540
- UnitsHandler.setUnitsToSeparatorColumn(this, separator);
541
- }
542
- else if (this.isHelm())
543
- UnitsHandler.setUnitsToHelmColumn(this);
544
- else
545
- throw new Error(`Unexpected units '${this.column.getTag(DG.TAGS.UNITS)}'.`);
546
- }
547
- // if (!this.column.tags.has(TAGS.alphabetSize)) {
548
- // if (this.isHelm())
549
- // throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +
550
- // `tag '${TAGS.alphabetSize}' is mandatory.`);
551
- // else if (['UN'].includes(this.alphabet))
552
- // throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
553
- // `tag '${TAGS.alphabetSize}' is mandatory.`);
554
- // }
555
- if (!this.column.tags.has(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */)) {
556
- if (this.isHelm())
557
- this.column.setTag(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */, 'true');
558
- else if (['UN'].includes(this.alphabet)) {
559
- throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
560
- `tag '${".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */}' is mandatory.`);
561
- }
562
- }
563
- }
564
624
  /** Gets a column's UnitsHandler object from temp slot or creates a new and stores it to the temp slot. */
565
- static getOrCreate(col) {
625
+ static forColumn(col) {
626
+ // TODO: Invalidate col.temp[Temps.uh] checking column's metadata
566
627
  let res = col.temp[Temps.uh];
567
- if (!res)
568
- res = col.temp[Temps.uh] = new UnitsHandler(col);
628
+ if (!res || res.columnVersion !== col.version)
629
+ res = col.temp[Temps.uh] = new SeqHandler(col);
569
630
  return res;
570
631
  }
571
- }
572
- function joinToFasta(srcUh, seqS) {
573
- const resMList = new Array(seqS.length);
574
- for (const [srcM, mI] of wu.enumerate(seqS)) {
575
- let m = srcM;
576
- if (srcUh.isHelm())
577
- m = srcM.replace(HELM_WRAPPERS_REGEXP, '$1');
578
- if (srcUh.isGap(m))
579
- m = GapSymbols[NOTATION.FASTA];
580
- else if (m.length > 1)
581
- m = '[' + seqS[mI] + ']';
582
- resMList[mI] = m;
583
- }
584
- return resMList.join('');
585
- }
586
- function convertToFasta(srcUh, src) {
587
- const srcMList = srcUh.isHelm() ? splitterAsHelmNucl(srcUh, src) : srcUh.getSplitter()(src);
588
- return joinToFasta(srcUh, srcMList);
589
- }
590
- function joinToSeparator(srcUh, seqS, tgtSeparator) {
591
- const resMList = new Array(seqS.length);
592
- for (const [srcM, mI] of wu.enumerate(seqS)) {
593
- let m = srcM;
594
- if (srcUh.isGap(m))
595
- m = GapSymbols[NOTATION.SEPARATOR];
596
- resMList[mI] = m;
632
+ // -- joiners & converters --
633
+ joinToFasta(seqS, isHelm) {
634
+ const resMList = new Array(seqS.length);
635
+ for (let posIdx = 0; posIdx < seqS.length; ++posIdx) {
636
+ const cm = seqS.getOriginal(posIdx);
637
+ let om = seqS.getOriginal(posIdx);
638
+ if (isHelm)
639
+ om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
640
+ if (cm === GAP_SYMBOL)
641
+ om = GapOriginals[NOTATION.FASTA];
642
+ else if (cm === PHOSPHATE_SYMBOL)
643
+ om = '';
644
+ else if (om.length > 1)
645
+ om = '[' + om + ']';
646
+ resMList[posIdx] = om;
647
+ }
648
+ return resMList.join('');
649
+ }
650
+ convertToFasta(src) {
651
+ const srcUhSplitter = this.splitter;
652
+ const srcSS = this.isHelm() ? this.splitterAsHelmNucl(src) : srcUhSplitter(src);
653
+ return this.joinToFasta(srcSS, this.isHelm());
654
+ }
655
+ convertToSeparator(src, tgtSeparator) {
656
+ const srcSS = this.isHelm() ? this.splitterAsHelmNucl(src) : this.splitter(src);
657
+ return joinToSeparator(srcSS, tgtSeparator, this.isHelm());
658
+ }
659
+ convertToHelm(src) {
660
+ const wrappers = this.getHelmWrappers();
661
+ const isDnaOrRna = src.startsWith('DNA') || src.startsWith('RNA');
662
+ const srcSS = this.splitter(src);
663
+ return joinToHelm(srcSS, wrappers, isDnaOrRna);
664
+ }
665
+ /** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus. */
666
+ splitterAsHelmNucl(src) {
667
+ const srcMList = this.splitter(src);
668
+ const tgtMList = new Array(srcMList.length);
669
+ const isDna = src.startsWith('DNA');
670
+ const isRna = src.startsWith('RNA');
671
+ for (let posIdx = 0; posIdx < srcMList.length; ++posIdx) {
672
+ let om = srcMList.getOriginal(posIdx);
673
+ if (isDna || isRna) {
674
+ om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
675
+ om = om === PHOSPHATE_SYMBOL ? null : om;
676
+ }
677
+ tgtMList[posIdx] = om ? om : null;
678
+ }
679
+ return new StringListSeqSplitted(tgtMList.filter((om) => !!om), GapOriginals[NOTATION.HELM]);
597
680
  }
598
- return resMList.map((m) => m ?? '').join(tgtSeparator);
599
681
  }
600
- function convertToSeparator(srcUh, src, tgtSeparator) {
601
- const srcMList = srcUh.isHelm() ? splitterAsHelmNucl(srcUh, src) : srcUh.getSplitter()(src);
602
- return joinToSeparator(srcUh, srcMList, tgtSeparator);
603
- }
604
- function joinToHelm(srcUh, seqS, isDnaOrRna) {
605
- const [prefix, leftWrapper, rightWrapper, postfix] = srcUh.getHelmWrappers();
606
- const resMList = wu(seqS).map((srcM) => {
607
- let m = srcM;
608
- if (srcUh.isGap(m))
609
- m = GapSymbols[NOTATION.HELM];
610
- else if (isDnaOrRna)
611
- m = m.replace(HELM_WRAPPERS_REGEXP, '$1');
612
- else
613
- m = srcM.length == 1 ? `${leftWrapper}${srcM}${rightWrapper}` : `${leftWrapper}[${srcM}]${rightWrapper}`;
614
- return m;
615
- }).toArray();
616
- return `${prefix}${resMList.join('.')}${postfix}`;
617
- }
618
- function convertToHelm(srcUh, src) {
619
- const isDnaOrRna = src.startsWith('DNA') || src.startsWith('RNA');
620
- const srcS = srcUh.getSplitter()(src);
621
- return joinToHelm(srcUh, srcS, isDnaOrRna);
682
+ // -- joiners --
683
+ function joinToSeparator(seqS, tgtSeparator, isHelm) {
684
+ const resMList = new Array(seqS.length);
685
+ for (let posIdx = 0; posIdx < seqS.length; ++posIdx) {
686
+ const cm = seqS.getCanonical(posIdx);
687
+ let om = seqS.getOriginal(posIdx);
688
+ if (isHelm)
689
+ om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
690
+ if (cm === GAP_SYMBOL)
691
+ om = GapOriginals[NOTATION.SEPARATOR];
692
+ else if (cm === PHOSPHATE_SYMBOL)
693
+ om = '';
694
+ resMList[posIdx] = om;
695
+ }
696
+ return resMList.join(tgtSeparator);
622
697
  }
623
- /** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus. */
624
- function splitterAsHelmNucl(srcUh, src) {
625
- const srcMList = srcUh.getSplitter()(src);
626
- const tgtMList = new Array(srcMList.length);
627
- const isDna = src.startsWith('DNA');
628
- const isRna = src.startsWith('RNA');
629
- for (const [srcM, mI] of wu.enumerate(srcMList)) {
630
- let m = srcM;
631
- if (isDna || isRna) {
632
- m = m.replace(HELM_WRAPPERS_REGEXP, '$1');
633
- m = m === PHOSPHATE_SYMBOL ? null : m;
698
+ function joinToHelm(srcSS, wrappers, isDnaOrRna) {
699
+ const [prefix, leftWrapper, rightWrapper, postfix] = wrappers;
700
+ const resOMList = new Array(srcSS.length);
701
+ for (let posIdx = 0; posIdx < srcSS.length; ++posIdx) {
702
+ const cm = srcSS.getCanonical(posIdx);
703
+ let om = srcSS.getOriginal(posIdx);
704
+ if (cm === GAP_SYMBOL)
705
+ om = GapOriginals[NOTATION.HELM];
706
+ else {
707
+ if (isDnaOrRna)
708
+ om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
709
+ om = om.length === 1 ? `${leftWrapper}${om}${rightWrapper}` : `${leftWrapper}[${om}]${rightWrapper}`;
634
710
  }
635
- tgtMList[mI] = m;
711
+ resOMList[posIdx] = om;
636
712
  }
637
- return tgtMList.filter((m) => m !== null);
713
+ return `${prefix}${resOMList.join('.')}${postfix}`;
638
714
  }
639
- //# sourceMappingURL=units-handler.js.map
715
+ //# sourceMappingURL=seq-handler.js.map