@datagrok-libraries/bio 5.39.29 → 5.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/package.json +2 -2
  3. package/src/monomer-works/monomer-utils.d.ts.map +1 -1
  4. package/src/monomer-works/monomer-utils.js +34 -31
  5. package/src/monomer-works/monomer-utils.js.map +1 -1
  6. package/src/monomer-works/to-atomic-level.d.ts +4 -4
  7. package/src/monomer-works/to-atomic-level.d.ts.map +1 -1
  8. package/src/monomer-works/to-atomic-level.js +37 -38
  9. package/src/monomer-works/to-atomic-level.js.map +1 -1
  10. package/src/utils/cell-renderer-monomer-placer.d.ts +2 -4
  11. package/src/utils/cell-renderer-monomer-placer.d.ts.map +1 -1
  12. package/src/utils/cell-renderer-monomer-placer.js +13 -16
  13. package/src/utils/cell-renderer-monomer-placer.js.map +1 -1
  14. package/src/utils/cell-renderer.d.ts +2 -3
  15. package/src/utils/cell-renderer.d.ts.map +1 -1
  16. package/src/utils/cell-renderer.js +9 -8
  17. package/src/utils/cell-renderer.js.map +1 -1
  18. package/src/utils/fasta-handler.js +2 -2
  19. package/src/utils/fasta-handler.js.map +1 -1
  20. package/src/utils/macromolecule/alignment.d.ts +4 -3
  21. package/src/utils/macromolecule/alignment.d.ts.map +1 -1
  22. package/src/utils/macromolecule/alignment.js +25 -18
  23. package/src/utils/macromolecule/alignment.js.map +1 -1
  24. package/src/utils/macromolecule/consts.d.ts +2 -0
  25. package/src/utils/macromolecule/consts.d.ts.map +1 -1
  26. package/src/utils/macromolecule/consts.js +2 -0
  27. package/src/utils/macromolecule/consts.js.map +1 -1
  28. package/src/utils/macromolecule/index.d.ts +1 -1
  29. package/src/utils/macromolecule/index.d.ts.map +1 -1
  30. package/src/utils/macromolecule/index.js +1 -1
  31. package/src/utils/macromolecule/index.js.map +1 -1
  32. package/src/utils/macromolecule/scoring.d.ts +1 -1
  33. package/src/utils/macromolecule/scoring.d.ts.map +1 -1
  34. package/src/utils/macromolecule/scoring.js +7 -5
  35. package/src/utils/macromolecule/scoring.js.map +1 -1
  36. package/src/utils/macromolecule/types.d.ts +14 -2
  37. package/src/utils/macromolecule/types.d.ts.map +1 -1
  38. package/src/utils/macromolecule/types.js +2 -0
  39. package/src/utils/macromolecule/types.js.map +1 -1
  40. package/src/utils/macromolecule/utils.d.ts +30 -12
  41. package/src/utils/macromolecule/utils.d.ts.map +1 -1
  42. package/src/utils/macromolecule/utils.js +81 -40
  43. package/src/utils/macromolecule/utils.js.map +1 -1
  44. package/src/utils/{units-handler.d.ts → seq-handler.d.ts} +45 -24
  45. package/src/utils/seq-handler.d.ts.map +1 -0
  46. package/src/utils/{units-handler.js → seq-handler.js} +293 -211
  47. package/src/utils/seq-handler.js.map +1 -0
  48. package/src/utils/splitter.d.ts.map +1 -1
  49. package/src/utils/splitter.js +8 -11
  50. package/src/utils/splitter.js.map +1 -1
  51. package/src/utils/units-handler.d.ts.map +0 -1
  52. package/src/utils/units-handler.js.map +0 -1
@@ -1,17 +1,19 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import wu from 'wu';
3
- import { NOTATION, candidateAlphabets, positionSeparator } from './macromolecule';
4
- import { detectAlphabet, getSplitterForColumn, getSplitterWithSeparator, splitterAsFasta, splitterAsFastaSimple, splitterAsHelm } from './macromolecule/utils';
3
+ import { NOTATION, candidateAlphabets, positionSeparator, splitterAsFasta, getSplitterWithSeparator, splitterAsHelm, } from './macromolecule';
4
+ import { GAP_SYMBOL, } from './macromolecule/types';
5
+ import { detectAlphabet, splitterAsFastaSimple, StringListSeqSplitted } from './macromolecule/utils';
5
6
  import { mmDistanceFunctions, MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
6
7
  import { getMonomerLibHelper } from '../monomer-works/monomer-utils';
7
8
  import { HELM_WRAPPERS_REGEXP, PHOSPHATE_SYMBOL } from './const';
8
- export const Temps = new class {
9
+ export const SeqTemps = new class {
9
10
  constructor() {
10
- /** Column's temp slot name for a UnitsHandler object */
11
- this.uh = `units-handler.${DG.SEMTYPE.MACROMOLECULE}`;
11
+ /** Column's temp slot name for a SeqHandler object */
12
+ this.seqHandler = `seq-handler`;
13
+ this.notationProvider = `seq-handler.notation-provider`;
12
14
  }
13
15
  }();
14
- export const GapSymbols = {
16
+ export const GapOriginals = {
15
17
  [NOTATION.FASTA]: '-',
16
18
  [NOTATION.SEPARATOR]: '',
17
19
  [NOTATION.HELM]: '*',
@@ -19,12 +21,67 @@ export const GapSymbols = {
19
21
  /** Class for handling notation units in Macromolecule columns and
20
22
  * conversion of notation systems in Macromolecule columns
21
23
  */
22
- export class UnitsHandler {
24
+ export class SeqHandler {
25
+ constructor(col) {
26
+ this._splitter = null;
27
+ this.cached = true;
28
+ this._splitted = null;
29
+ this.columnVersion = null;
30
+ this._stats = null;
31
+ this._maxLength = null;
32
+ this._posList = null;
33
+ this._joiner = undefined;
34
+ if (col.type !== DG.TYPE.STRING)
35
+ throw new Error(`Unexpected column type '${col.type}', must be '${DG.TYPE.STRING}'.`);
36
+ this._column = col;
37
+ this._columnVersion = col.version;
38
+ const units = this._column.getTag(DG.TAGS.UNITS);
39
+ if (units !== null && units !== undefined)
40
+ this._units = units;
41
+ else
42
+ throw new Error('Units are not specified in column');
43
+ this._notation = this.getNotation();
44
+ this._defaultGapOriginal = (this.isFasta()) ? GapOriginals[NOTATION.FASTA] :
45
+ (this.isHelm()) ? GapOriginals[NOTATION.HELM] :
46
+ GapOriginals[NOTATION.SEPARATOR];
47
+ if (!this.column.tags.has("aligned" /* TAGS.aligned */) || !this.column.tags.has("alphabet" /* TAGS.alphabet */) ||
48
+ (!this.column.tags.has(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */) && !this.isHelm() && this.alphabet === "UN" /* ALPHABET.UN */)) {
49
+ // The following detectors and setters are to be called because the column is likely
50
+ // as the UnitsHandler constructor was called on the column.
51
+ if (this.isFasta())
52
+ SeqHandler.setUnitsToFastaColumn(this);
53
+ else if (this.isSeparator()) {
54
+ const separator = col.getTag("separator" /* TAGS.separator */);
55
+ SeqHandler.setUnitsToSeparatorColumn(this, separator);
56
+ }
57
+ else if (this.isHelm())
58
+ SeqHandler.setUnitsToHelmColumn(this);
59
+ else
60
+ throw new Error(`Unexpected units '${this.column.getTag(DG.TAGS.UNITS)}'.`);
61
+ }
62
+ // if (!this.column.tags.has(TAGS.alphabetSize)) {
63
+ // if (this.isHelm())
64
+ // throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +
65
+ // `tag '${TAGS.alphabetSize}' is mandatory.`);
66
+ // else if (['UN'].includes(this.alphabet))
67
+ // throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
68
+ // `tag '${TAGS.alphabetSize}' is mandatory.`);
69
+ // }
70
+ if (!this.column.tags.has(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */)) {
71
+ if (this.isHelm())
72
+ this.column.setTag(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */, 'true');
73
+ else if (['UN'].includes(this.alphabet)) {
74
+ throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
75
+ `tag '${".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */}' is mandatory.`);
76
+ }
77
+ }
78
+ this.notationProvider = this.column.temp[SeqTemps.notationProvider] ?? null;
79
+ }
23
80
  static setUnitsToFastaColumn(uh) {
24
81
  if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE || uh.column.getTag(DG.TAGS.UNITS) !== NOTATION.FASTA)
25
82
  throw new Error(`The column of notation '${NOTATION.FASTA}' must be '${DG.SEMTYPE.MACROMOLECULE}'.`);
26
83
  uh.column.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
27
- UnitsHandler.setTags(uh);
84
+ SeqHandler.setTags(uh);
28
85
  }
29
86
  static setUnitsToSeparatorColumn(uh, separator) {
30
87
  if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE || uh.column.getTag(DG.TAGS.UNITS) !== NOTATION.SEPARATOR)
@@ -33,13 +90,13 @@ export class UnitsHandler {
33
90
  throw new Error(`The column of notation '${NOTATION.SEPARATOR}' must have the separator tag.`);
34
91
  uh.column.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
35
92
  uh.column.setTag("separator" /* TAGS.separator */, separator);
36
- UnitsHandler.setTags(uh);
93
+ SeqHandler.setTags(uh);
37
94
  }
38
95
  static setUnitsToHelmColumn(uh) {
39
96
  if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE)
40
97
  throw new Error(`The column of notation '${NOTATION.HELM}' must be '${DG.SEMTYPE.MACROMOLECULE}'`);
41
98
  uh.column.setTag(DG.TAGS.UNITS, NOTATION.HELM);
42
- UnitsHandler.setTags(uh);
99
+ SeqHandler.setTags(uh);
43
100
  }
44
101
  /** From detectMacromolecule */
45
102
  static setTags(uh) {
@@ -69,9 +126,10 @@ export class UnitsHandler {
69
126
  }
70
127
  }
71
128
  get column() { return this._column; }
129
+ get length() { return this._column.length; }
72
130
  get units() { return this._units; }
73
131
  get notation() { return this._notation; }
74
- get defaultGapSymbol() { return this._defaultGapSymbol; }
132
+ get defaultGapOriginal() { return this._defaultGapOriginal; }
75
133
  get separator() {
76
134
  const separator = this.column.getTag("separator" /* TAGS.separator */) ?? undefined;
77
135
  if (this.notation === NOTATION.SEPARATOR && separator === undefined)
@@ -132,35 +190,57 @@ export class UnitsHandler {
132
190
  else
133
191
  return this.column.getTag(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */) === 'true';
134
192
  }
135
- /** */
136
- get splitted() {
137
- if (this._splitted === null) {
138
- const splitter = this.getSplitter();
139
- const colLength = this._column.length;
140
- this._splitted = new Array(colLength);
141
- const catIdxList = this._column.getRawData();
142
- const catList = this._column.categories;
143
- for (let rowI = 0; rowI < colLength; rowI++) {
144
- const seq = catList[catIdxList[rowI]];
145
- this._splitted[rowI] = splitter(seq);
193
+ // /** */
194
+ // public get splitted(): ISeqSplitted[] {
195
+ // // TODO: Disable cache or invalidate on changing data
196
+ // if (this._splitted === null) {
197
+ // const splitter = this.splitter;
198
+ // const colLength: number = this._column.length;
199
+ // this._splitted = new Array(colLength);
200
+ // const catIdxList = this._column.getRawData();
201
+ // const catList: string[] = this._column.categories;
202
+ // for (let rowIdx: number = 0; rowIdx < colLength; rowIdx++) {
203
+ // const seq: string = catList[catIdxList[rowIdx]];
204
+ // this._splitted[rowIdx] = splitter(seq);
205
+ // }
206
+ // }
207
+ // return this._splitted;
208
+ // }
209
+ getSplitted(rowIdx) {
210
+ if (!this.cached) {
211
+ const seq = this.column.get(rowIdx);
212
+ return this.splitter(seq);
213
+ }
214
+ else {
215
+ if (this.column.version !== this.columnVersion || this._splitted === null) {
216
+ this.columnVersion = this.column.version;
217
+ this._splitted = new Array(this.column.length);
218
+ }
219
+ let resSS = this._splitted[rowIdx] ? this._splitted[rowIdx].deref() : undefined;
220
+ if (!resSS) {
221
+ const seq = this.column.get(rowIdx);
222
+ resSS = this.splitter(seq);
223
+ this._splitted[rowIdx] = new WeakRef(resSS);
146
224
  }
225
+ return resSS;
147
226
  }
148
- return this._splitted;
149
227
  }
150
228
  get stats() {
151
229
  if (this._stats === null) {
152
230
  const freq = {};
153
231
  let sameLength = true;
154
232
  let firstLength = null;
155
- for (const mSeq of this.splitted) {
233
+ const colLen = this.column.length;
234
+ for (let rowIdx = 0; rowIdx < colLen; ++rowIdx) {
235
+ const mSeq = this.getSplitted(rowIdx);
156
236
  if (firstLength == null)
157
237
  firstLength = mSeq.length;
158
238
  else if (mSeq.length !== firstLength)
159
239
  sameLength = false;
160
- for (const m of mSeq) {
161
- if (!(m in freq))
162
- freq[m] = 0;
163
- freq[m] += 1;
240
+ for (const cm of mSeq.canonicals) {
241
+ if (!(cm in freq))
242
+ freq[cm] = 0;
243
+ freq[cm] += 1;
164
244
  }
165
245
  }
166
246
  this._stats = { freq: freq, sameLength: sameLength };
@@ -169,8 +249,8 @@ export class UnitsHandler {
169
249
  }
170
250
  get maxLength() {
171
251
  if (this._maxLength === null) {
172
- this._maxLength = this.splitted.length === 0 ? 0 :
173
- Math.max(...this.splitted.map((seqS) => seqS.length));
252
+ this._maxLength = this.column.length === 0 ? 0 :
253
+ Math.max(...wu.count(0).take(this.column.length).map((rowIdx) => this.getSplitted(rowIdx).length));
174
254
  }
175
255
  return this._maxLength;
176
256
  }
@@ -190,9 +270,12 @@ export class UnitsHandler {
190
270
  isPeptide() { return this.alphabet === "PT" /* ALPHABET.PT */; }
191
271
  isMsa() { return this.aligned ? this.aligned.toUpperCase().includes('MSA') : false; }
192
272
  isHelmCompatible() { return this.helmCompatible === 'true'; }
193
- isGap(m) {
194
- return !m || (this.units === NOTATION.FASTA && m === GapSymbols[NOTATION.FASTA]) ||
195
- (this.units === NOTATION.HELM && m === GapSymbols[NOTATION.HELM]);
273
+ /** Checks {@link om} for being a gap
274
+ * @param {string} om Original monomer of sequence symbol
275
+ * @return {boolean}
276
+ */
277
+ isGap(om) {
278
+ return !om || om === this._defaultGapOriginal;
196
279
  }
197
280
  /** Associate notation types with the corresponding units */
198
281
  /**
@@ -246,7 +329,9 @@ export class UnitsHandler {
246
329
  const srcAligned = col.getTag("aligned" /* TAGS.aligned */);
247
330
  if (srcAligned)
248
331
  newColumn.setTag("aligned" /* TAGS.aligned */, srcAligned);
249
- const srcAlphabet = col.getTag("alphabet" /* TAGS.alphabet */);
332
+ let srcAlphabet = col.getTag("alphabet" /* TAGS.alphabet */);
333
+ if (!srcAlphabet && this.notation === NOTATION.HELM && tgtNotation !== NOTATION.HELM)
334
+ srcAlphabet = "UN" /* ALPHABET.UN */;
250
335
  if (srcAlphabet != null)
251
336
  newColumn.setTag("alphabet" /* TAGS.alphabet */, srcAlphabet);
252
337
  let srcAlphabetSize = col.getTag(".alphabetSize" /* TAGS.alphabetSize */);
@@ -261,8 +346,9 @@ export class UnitsHandler {
261
346
  }
262
347
  return newColumn;
263
348
  }
264
- getNewColumnFromList(name, list) {
265
- return this.getNewColumn(this.notation, this.separator, name, list);
349
+ /** Creates a new column on data of {@link seqList} with the same tags */
350
+ getNewColumnFromList(name, seqList) {
351
+ return this.getNewColumn(this.notation, this.separator, name, seqList);
266
352
  }
267
353
  /**
268
354
  * Create a new empty column using templateCol as a template
@@ -272,7 +358,7 @@ export class UnitsHandler {
272
358
  * @return {DG.Column}
273
359
  */
274
360
  static getNewColumn(templateCol) {
275
- const col = UnitsHandler.getOrCreate(templateCol);
361
+ const col = SeqHandler.forColumn(templateCol);
276
362
  const targetNotation = col.notation;
277
363
  return col.getNewColumn(targetNotation);
278
364
  }
@@ -302,7 +388,7 @@ export class UnitsHandler {
302
388
  // WARNING: in this implementation is is impossible to verify the uniqueness
303
389
  // of the new column's name
304
390
  // TODO: verify the validity of units parameter
305
- if (!UnitsHandler.unitsStringIsValid(units))
391
+ if (!SeqHandler.unitsStringIsValid(units))
306
392
  throw new Error('Invalid format of \'units\' parameter');
307
393
  const newColumn = DG.Column.fromList('string', name, new Array(len).fill(''));
308
394
  newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
@@ -311,6 +397,10 @@ export class UnitsHandler {
311
397
  }
312
398
  /** Gets function to split seq value to monomers */
313
399
  getSplitter(limit) {
400
+ let splitter = null;
401
+ splitter = this.notationProvider ? this.notationProvider.splitter : null;
402
+ if (splitter)
403
+ return splitter;
314
404
  if (this.units.toLowerCase().startsWith(NOTATION.FASTA)) {
315
405
  const alphabet = this.column.getTag("alphabet" /* TAGS.alphabet */);
316
406
  if (alphabet !== null && !this.getAlphabetIsMultichar())
@@ -326,6 +416,9 @@ export class UnitsHandler {
326
416
  throw new Error(`Unexpected units ${this.units} .`);
327
417
  // TODO: Splitter for HELM
328
418
  }
419
+ split(seq) {
420
+ return this.splitter(seq);
421
+ }
329
422
  getDistanceFunctionName() {
330
423
  // TODO add support for helm and separator notation
331
424
  if (!this.isFasta())
@@ -333,10 +426,10 @@ export class UnitsHandler {
333
426
  if (this.isMsa())
334
427
  return MmDistanceFunctionsNames.HAMMING;
335
428
  switch (this.alphabet) {
336
- // As DNA and RNA scoring matrices are same as identity matrices(mostly),
337
- // we can use very fast and optimized Levenshtein distance library
338
429
  case "DNA" /* ALPHABET.DNA */:
339
430
  case "RNA" /* ALPHABET.RNA */:
431
+ // As DNA and RNA scoring matrices are same as identity matrices(mostly),
432
+ // we can use very fast and optimized Levenshtein distance library
340
433
  return MmDistanceFunctionsNames.LEVENSHTEIN;
341
434
  case "PT" /* ALPHABET.PT */:
342
435
  return MmDistanceFunctionsNames.LEVENSHTEIN;
@@ -353,7 +446,7 @@ export class UnitsHandler {
353
446
  // check first for the column tag to avoid extra processing
354
447
  if (this.column.tags.has(".isHelmCompatible" /* TAGS.isHelmCompatible */))
355
448
  return this.column.getTag(".isHelmCompatible" /* TAGS.isHelmCompatible */) === 'true';
356
- // get the monolmer lib and check against the column
449
+ // get the monomer lib and check against the column
357
450
  const monomerLibHelper = await getMonomerLibHelper();
358
451
  const bioLib = monomerLibHelper.getBioLib();
359
452
  // retrieve peptides
@@ -363,14 +456,21 @@ export class UnitsHandler {
363
456
  // get splitter for given separator and check if all monomers are in the lib
364
457
  const splitterFunc = getSplitterWithSeparator(this.separator);
365
458
  // iterate over the columns, split them and check if all monomers are in the lib
366
- //TODO maybe add missing threshhold so that if there are not too many missing monomers
459
+ //TODO maybe add missing threshold so that if there are not too many missing monomers
367
460
  // the column is still considered helm compatible
368
- for (const row of this.column.categories) {
369
- const monomers = splitterFunc(row);
370
- for (const monomer of monomers) {
371
- if (!peptidesSet.has(monomer)) {
372
- this.column.setTag(".isHelmCompatible" /* TAGS.isHelmCompatible */, 'false');
373
- return false;
461
+ const catIdxSet = new Set();
462
+ const rowCount = this.column.length;
463
+ const colRawData = this.column.getRawData();
464
+ for (let rowIdx = 0; rowIdx < rowCount; ++rowIdx) {
465
+ const catI = colRawData[rowIdx];
466
+ if (!(catI in catIdxSet)) {
467
+ catIdxSet.add(catI);
468
+ const monomers = this.getSplitted(rowIdx);
469
+ for (const cm of monomers.canonicals) {
470
+ if (!peptidesSet.has(cm)) {
471
+ this.column.setTag(".isHelmCompatible" /* TAGS.isHelmCompatible */, 'false');
472
+ return false;
473
+ }
374
474
  }
375
475
  }
376
476
  }
@@ -380,7 +480,7 @@ export class UnitsHandler {
380
480
  // -- Notation Converter --
381
481
  get splitter() {
382
482
  if (this._splitter === null)
383
- this._splitter = getSplitterForColumn(this.column);
483
+ this._splitter = this.getSplitter();
384
484
  return this._splitter;
385
485
  }
386
486
  toFasta(targetNotation) { return targetNotation === NOTATION.FASTA; }
@@ -389,38 +489,38 @@ export class UnitsHandler {
389
489
  /**
390
490
  * Convert HELM string to FASTA/SEPARATOR
391
491
  *
392
- * @param {string} helmPolymer A string to be converted
492
+ * @param {string} srcSeq A string to be converted
393
493
  * @param {string} tgtNotation Target notation: FASTA or SEPARATOR
394
494
  * @param {string} tgtSeparator Optional target separator (for HELM ->
395
- * @param {string | null} tgtGapSymbol Optional target gap symbol
495
+ * @param {string | null} tgtGapOriginal Optional target gap symbol
396
496
  * SEPARATOR)
397
497
  * @return {string} Converted string
398
498
  */
399
- convertHelmToFastaSeparator(helmPolymer, tgtNotation, tgtSeparator, tgtGapSymbol) {
400
- if (!tgtGapSymbol) {
401
- tgtGapSymbol = (this.toFasta(tgtNotation)) ?
402
- GapSymbols[NOTATION.FASTA] :
403
- GapSymbols[NOTATION.SEPARATOR];
499
+ convertHelmToFastaSeparator(srcSeq, tgtNotation, tgtSeparator, tgtGapOriginal) {
500
+ if (!tgtGapOriginal) {
501
+ tgtGapOriginal = (this.toFasta(tgtNotation)) ?
502
+ GapOriginals[NOTATION.FASTA] :
503
+ GapOriginals[NOTATION.SEPARATOR];
404
504
  }
405
505
  if (!tgtSeparator)
406
506
  tgtSeparator = (this.toFasta(tgtNotation)) ? '' : this.separator;
407
- const isNucleotide = helmPolymer.startsWith('RNA');
507
+ const isNucleotide = srcSeq.startsWith('RNA');
408
508
  // items can be monomers or helms
409
- const helmItemsArray = this.splitter(helmPolymer);
509
+ const helmItemsArray = this.splitter(srcSeq);
410
510
  const tgtMonomersArray = [];
411
- for (let i = 0; i < helmItemsArray.length; i++) {
412
- let item = helmItemsArray[i];
511
+ for (let posIdx = 0; posIdx < helmItemsArray.length; ++posIdx) {
512
+ let om = helmItemsArray.getOriginal(posIdx);
413
513
  if (isNucleotide)
414
- item = item.replace(HELM_WRAPPERS_REGEXP, '');
415
- if (item === GapSymbols[NOTATION.HELM])
416
- tgtMonomersArray.push(tgtGapSymbol);
417
- else if (this.toFasta(tgtNotation) && item.length > 1) {
514
+ om = om.replace(HELM_WRAPPERS_REGEXP, '');
515
+ if (om === GapOriginals[NOTATION.HELM])
516
+ tgtMonomersArray.push(tgtGapOriginal);
517
+ else if (this.toFasta(tgtNotation) && om.length > 1) {
418
518
  // the case of a multi-character monomer converted to FASTA
419
- const monomer = '[' + item + ']';
519
+ const monomer = '[' + om + ']';
420
520
  tgtMonomersArray.push(monomer);
421
521
  }
422
522
  else
423
- tgtMonomersArray.push(item);
523
+ tgtMonomersArray.push(om);
424
524
  }
425
525
  return tgtMonomersArray.join(tgtSeparator);
426
526
  }
@@ -431,14 +531,15 @@ export class UnitsHandler {
431
531
  * @return {DG.Column} Converted column
432
532
  */
433
533
  convert(tgtNotation, tgtSeparator) {
434
- const convert = this.getConverter(tgtNotation, tgtSeparator);
534
+ // Get joiner from the source column units handler (this) knowing about the source sequence.
535
+ // For example, converting DNA Helm to fasta requires removing the r(X)p decoration.
536
+ const joiner = this.getJoiner({ notation: tgtNotation, separator: tgtSeparator });
435
537
  const newColumn = this.getNewColumn(tgtNotation, tgtSeparator);
436
538
  // assign the values to the newly created empty column
437
- newColumn.init((rowI) => {
438
- const sourceSequence = this.column.get(rowI);
439
- return sourceSequence ? convert(sourceSequence) : sourceSequence;
539
+ newColumn.init((rowIdx) => {
540
+ const srcSS = this.getSplitted(rowIdx);
541
+ return joiner(srcSS);
440
542
  });
441
- // newColumn.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
442
543
  return newColumn;
443
544
  }
444
545
  /**
@@ -449,20 +550,20 @@ export class UnitsHandler {
449
550
  getRegion(startIdx, endIdx, name) {
450
551
  const regCol = this.getNewColumn(this.notation, this.separator);
451
552
  regCol.name = name;
452
- const maxLength = Math.max(...this.splitted.map((seqS) => seqS.length));
453
553
  const startIdxVal = startIdx ?? 0;
454
554
  const endIdxVal = endIdx ?? this.maxLength - 1;
455
- const join = this.getJoiner();
555
+ const joiner = this.getJoiner();
456
556
  const regLength = endIdxVal - startIdxVal + 1;
557
+ const gapOM = GapOriginals[this.notation];
457
558
  regCol.init((rowI) => {
458
- const seqS = this.splitted[rowI];
559
+ const seqS = this.getSplitted(rowI);
459
560
  // Custom slicing instead of array method to maintain gaps
460
- const regMList = new Array(regLength);
561
+ const regOMList = new Array(regLength);
461
562
  for (let regJPos = 0; regJPos < regLength; ++regJPos) {
462
563
  const seqJPos = startIdxVal + regJPos;
463
- regMList[regJPos] = seqJPos < seqS.length ? seqS[seqJPos] : GapSymbols[this.notation];
564
+ regOMList[regJPos] = seqJPos < seqS.length ? seqS.getOriginal(seqJPos) : gapOM;
464
565
  }
465
- return join(regMList);
566
+ return joiner(new StringListSeqSplitted(regOMList, gapOM));
466
567
  });
467
568
  const getRegionOfPositionNames = (str) => {
468
569
  const srcPosList = str.split(',').map((p) => p.trim());
@@ -481,159 +582,140 @@ export class UnitsHandler {
481
582
  regCol.setTag(".positionLabels" /* TAGS.positionLabels */, getRegionOfPositionNames(srcPositionLabelsStr));
482
583
  return regCol;
483
584
  }
484
- getJoiner() {
485
- if (this._joiner === undefined) {
486
- const srcUh = this;
487
- if (this.notation === NOTATION.FASTA)
488
- this._joiner = function (srcS) { return joinToFasta(srcUh, srcS); };
489
- else if (this.notation === NOTATION.SEPARATOR)
490
- this._joiner = function (srcS) { return joinToSeparator(srcUh, srcS, srcUh.separator); };
491
- else if (this.notation === NOTATION.HELM) {
492
- const isDnaOrRna = srcUh.alphabet === "DNA" /* ALPHABET.DNA */ || srcUh.alphabet === "RNA" /* ALPHABET.RNA */;
493
- this._joiner = function (srcS) { return joinToHelm(srcUh, srcS, isDnaOrRna); };
585
+ get joiner() {
586
+ if (!this._joiner)
587
+ this._joiner = this.getJoiner();
588
+ return this._joiner;
589
+ }
590
+ getJoiner(opts) {
591
+ const notation = opts ? opts.notation : this.notation;
592
+ const separator = opts ? opts.separator : this.separator;
593
+ let res;
594
+ const srcSh = this;
595
+ switch (notation) {
596
+ case NOTATION.FASTA: {
597
+ res = function (srcSS) { return srcSh.joinToFasta(srcSS, srcSh.isHelm()); };
598
+ break;
494
599
  }
495
- else
496
- throw new Error();
600
+ case NOTATION.SEPARATOR: {
601
+ if (!separator)
602
+ throw new Error(`Separator is mandatory for notation '${notation}'.`);
603
+ res = function (srcSS) { return joinToSeparator(srcSS, separator, srcSh.isHelm()); };
604
+ break;
605
+ }
606
+ case NOTATION.HELM: {
607
+ const isDnaOrRna = srcSh.alphabet === "DNA" /* ALPHABET.DNA */ || srcSh.alphabet === "RNA" /* ALPHABET.RNA */;
608
+ const wrappers = srcSh.getHelmWrappers();
609
+ res = function (srcSS) { return joinToHelm(srcSS, wrappers, isDnaOrRna); };
610
+ break;
611
+ }
612
+ default:
613
+ throw new Error(`Unexpected notation '${notation}'.`);
497
614
  }
498
- return this._joiner;
615
+ return res;
499
616
  }
500
617
  getConverter(tgtUnits, tgtSeparator = undefined) {
501
618
  if (tgtUnits === NOTATION.SEPARATOR && !tgtSeparator)
502
619
  throw new Error(`Target separator is not specified for target units '${NOTATION.SEPARATOR}'.`);
503
- const srcUh = this;
620
+ const srcSh = this;
504
621
  if (tgtUnits === NOTATION.FASTA)
505
- return function (src) { return convertToFasta(srcUh, src); };
622
+ return function (srcSeq) { return srcSh.convertToFasta(srcSeq); };
506
623
  if (tgtUnits === NOTATION.HELM)
507
- return function (src) { return convertToHelm(srcUh, src); };
624
+ return function (srcSeq) { return srcSh.convertToHelm(srcSeq); };
508
625
  else if (tgtUnits === NOTATION.SEPARATOR)
509
- return function (src) { return convertToSeparator(srcUh, src, tgtSeparator); };
626
+ return function (srcSeq) { return srcSh.convertToSeparator(srcSeq, tgtSeparator); };
510
627
  else
511
628
  throw new Error();
512
629
  }
513
- constructor(col) {
514
- this._splitter = null;
515
- this._splitted = null;
516
- this._stats = null;
517
- this._maxLength = null;
518
- this._posList = null;
519
- this._joiner = undefined;
520
- if (col.type !== DG.TYPE.STRING)
521
- throw new Error(`Unexpected column type '${col.type}', must be '${DG.TYPE.STRING}'.`);
522
- this._column = col;
523
- const units = this._column.getTag(DG.TAGS.UNITS);
524
- if (units !== null && units !== undefined)
525
- this._units = units;
526
- else
527
- throw new Error('Units are not specified in column');
528
- this._notation = this.getNotation();
529
- this._defaultGapSymbol = (this.isFasta()) ? GapSymbols[NOTATION.FASTA] :
530
- (this.isHelm()) ? GapSymbols[NOTATION.HELM] :
531
- GapSymbols[NOTATION.SEPARATOR];
532
- if (!this.column.tags.has("aligned" /* TAGS.aligned */) || !this.column.tags.has("alphabet" /* TAGS.alphabet */) ||
533
- (!this.column.tags.has(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */) && !this.isHelm() && this.alphabet === "UN" /* ALPHABET.UN */)) {
534
- // The following detectors and setters are to be called because the column is likely
535
- // as the UnitsHandler constructor was called on the column.
536
- if (this.isFasta())
537
- UnitsHandler.setUnitsToFastaColumn(this);
538
- else if (this.isSeparator()) {
539
- const separator = col.getTag("separator" /* TAGS.separator */);
540
- UnitsHandler.setUnitsToSeparatorColumn(this, separator);
541
- }
542
- else if (this.isHelm())
543
- UnitsHandler.setUnitsToHelmColumn(this);
544
- else
545
- throw new Error(`Unexpected units '${this.column.getTag(DG.TAGS.UNITS)}'.`);
630
+ /** Gets a column's UnitsHandler object from temp slot or creates a new and stores it to the temp slot. */
631
+ static forColumn(col) {
632
+ // TODO: Invalidate col.temp[Temps.uh] checking column's metadata
633
+ let res = col.temp[SeqTemps.seqHandler];
634
+ if (!res || res.columnVersion !== col.version)
635
+ res = col.temp[SeqTemps.seqHandler] = new SeqHandler(col);
636
+ return res;
637
+ }
638
+ // -- joiners & converters --
639
+ joinToFasta(seqS, isHelm) {
640
+ const resMList = new Array(seqS.length);
641
+ for (let posIdx = 0; posIdx < seqS.length; ++posIdx) {
642
+ const cm = seqS.getOriginal(posIdx);
643
+ let om = seqS.getOriginal(posIdx);
644
+ if (isHelm)
645
+ om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
646
+ if (cm === GAP_SYMBOL)
647
+ om = GapOriginals[NOTATION.FASTA];
648
+ else if (cm === PHOSPHATE_SYMBOL)
649
+ om = '';
650
+ else if (om.length > 1)
651
+ om = '[' + om + ']';
652
+ resMList[posIdx] = om;
546
653
  }
547
- // if (!this.column.tags.has(TAGS.alphabetSize)) {
548
- // if (this.isHelm())
549
- // throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +
550
- // `tag '${TAGS.alphabetSize}' is mandatory.`);
551
- // else if (['UN'].includes(this.alphabet))
552
- // throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
553
- // `tag '${TAGS.alphabetSize}' is mandatory.`);
554
- // }
555
- if (!this.column.tags.has(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */)) {
556
- if (this.isHelm())
557
- this.column.setTag(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */, 'true');
558
- else if (['UN'].includes(this.alphabet)) {
559
- throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
560
- `tag '${".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */}' is mandatory.`);
654
+ return resMList.join('');
655
+ }
656
+ convertToFasta(src) {
657
+ const srcUhSplitter = this.splitter;
658
+ const srcSS = this.isHelm() ? this.splitterAsHelmNucl(src) : srcUhSplitter(src);
659
+ return this.joinToFasta(srcSS, this.isHelm());
660
+ }
661
+ convertToSeparator(src, tgtSeparator) {
662
+ const srcSS = this.isHelm() ? this.splitterAsHelmNucl(src) : this.splitter(src);
663
+ return joinToSeparator(srcSS, tgtSeparator, this.isHelm());
664
+ }
665
+ convertToHelm(src) {
666
+ const wrappers = this.getHelmWrappers();
667
+ const isDnaOrRna = src.startsWith('DNA') || src.startsWith('RNA');
668
+ const srcSS = this.splitter(src);
669
+ return joinToHelm(srcSS, wrappers, isDnaOrRna);
670
+ }
671
+ /** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus. */
672
+ splitterAsHelmNucl(src) {
673
+ const srcMList = this.splitter(src);
674
+ const tgtMList = new Array(srcMList.length);
675
+ const isDna = src.startsWith('DNA');
676
+ const isRna = src.startsWith('RNA');
677
+ for (let posIdx = 0; posIdx < srcMList.length; ++posIdx) {
678
+ let om = srcMList.getOriginal(posIdx);
679
+ if (isDna || isRna) {
680
+ om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
681
+ om = om === PHOSPHATE_SYMBOL ? null : om;
561
682
  }
683
+ tgtMList[posIdx] = om ? om : null;
562
684
  }
685
+ return new StringListSeqSplitted(tgtMList.filter((om) => !!om), GapOriginals[NOTATION.HELM]);
563
686
  }
564
- /** Gets a column's UnitsHandler object from temp slot or creates a new and stores it to the temp slot. */
565
- static getOrCreate(col) {
566
- let res = col.temp[Temps.uh];
567
- if (!res)
568
- res = col.temp[Temps.uh] = new UnitsHandler(col);
569
- return res;
570
- }
571
- }
572
- function joinToFasta(srcUh, seqS) {
573
- const resMList = new Array(seqS.length);
574
- for (const [srcM, mI] of wu.enumerate(seqS)) {
575
- let m = srcM;
576
- if (srcUh.isHelm())
577
- m = srcM.replace(HELM_WRAPPERS_REGEXP, '$1');
578
- if (srcUh.isGap(m))
579
- m = GapSymbols[NOTATION.FASTA];
580
- else if (m.length > 1)
581
- m = '[' + seqS[mI] + ']';
582
- resMList[mI] = m;
583
- }
584
- return resMList.join('');
585
687
  }
586
- function convertToFasta(srcUh, src) {
587
- const srcMList = srcUh.isHelm() ? splitterAsHelmNucl(srcUh, src) : srcUh.getSplitter()(src);
588
- return joinToFasta(srcUh, srcMList);
589
- }
590
- function joinToSeparator(srcUh, seqS, tgtSeparator) {
688
+ // -- joiners --
689
+ function joinToSeparator(seqS, tgtSeparator, isHelm) {
591
690
  const resMList = new Array(seqS.length);
592
- for (const [srcM, mI] of wu.enumerate(seqS)) {
593
- let m = srcM;
594
- if (srcUh.isGap(m))
595
- m = GapSymbols[NOTATION.SEPARATOR];
596
- resMList[mI] = m;
597
- }
598
- return resMList.map((m) => m ?? '').join(tgtSeparator);
691
+ for (let posIdx = 0; posIdx < seqS.length; ++posIdx) {
692
+ const cm = seqS.getCanonical(posIdx);
693
+ let om = seqS.getOriginal(posIdx);
694
+ if (isHelm)
695
+ om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
696
+ if (cm === GAP_SYMBOL)
697
+ om = GapOriginals[NOTATION.SEPARATOR];
698
+ else if (cm === PHOSPHATE_SYMBOL)
699
+ om = '';
700
+ resMList[posIdx] = om;
701
+ }
702
+ return resMList.join(tgtSeparator);
599
703
  }
600
- function convertToSeparator(srcUh, src, tgtSeparator) {
601
- const srcMList = srcUh.isHelm() ? splitterAsHelmNucl(srcUh, src) : srcUh.getSplitter()(src);
602
- return joinToSeparator(srcUh, srcMList, tgtSeparator);
603
- }
604
- function joinToHelm(srcUh, seqS, isDnaOrRna) {
605
- const [prefix, leftWrapper, rightWrapper, postfix] = srcUh.getHelmWrappers();
606
- const resMList = wu(seqS).map((srcM) => {
607
- let m = srcM;
608
- if (srcUh.isGap(m))
609
- m = GapSymbols[NOTATION.HELM];
610
- else if (isDnaOrRna)
611
- m = m.replace(HELM_WRAPPERS_REGEXP, '$1');
612
- else
613
- m = srcM.length == 1 ? `${leftWrapper}${srcM}${rightWrapper}` : `${leftWrapper}[${srcM}]${rightWrapper}`;
614
- return m;
615
- }).toArray();
616
- return `${prefix}${resMList.join('.')}${postfix}`;
617
- }
618
- function convertToHelm(srcUh, src) {
619
- const isDnaOrRna = src.startsWith('DNA') || src.startsWith('RNA');
620
- const srcS = srcUh.getSplitter()(src);
621
- return joinToHelm(srcUh, srcS, isDnaOrRna);
622
- }
623
- /** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus. */
624
- function splitterAsHelmNucl(srcUh, src) {
625
- const srcMList = srcUh.getSplitter()(src);
626
- const tgtMList = new Array(srcMList.length);
627
- const isDna = src.startsWith('DNA');
628
- const isRna = src.startsWith('RNA');
629
- for (const [srcM, mI] of wu.enumerate(srcMList)) {
630
- let m = srcM;
631
- if (isDna || isRna) {
632
- m = m.replace(HELM_WRAPPERS_REGEXP, '$1');
633
- m = m === PHOSPHATE_SYMBOL ? null : m;
704
+ function joinToHelm(srcSS, wrappers, isDnaOrRna) {
705
+ const [prefix, leftWrapper, rightWrapper, postfix] = wrappers;
706
+ const resOMList = new Array(srcSS.length);
707
+ for (let posIdx = 0; posIdx < srcSS.length; ++posIdx) {
708
+ const cm = srcSS.getCanonical(posIdx);
709
+ let om = srcSS.getOriginal(posIdx);
710
+ if (cm === GAP_SYMBOL)
711
+ om = GapOriginals[NOTATION.HELM];
712
+ else {
713
+ if (isDnaOrRna)
714
+ om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
715
+ om = om.length === 1 ? `${leftWrapper}${om}${rightWrapper}` : `${leftWrapper}[${om}]${rightWrapper}`;
634
716
  }
635
- tgtMList[mI] = m;
717
+ resOMList[posIdx] = om;
636
718
  }
637
- return tgtMList.filter((m) => m !== null);
719
+ return `${prefix}${resOMList.join('.')}${postfix}`;
638
720
  }
639
- //# sourceMappingURL=units-handler.js.map
721
+ //# sourceMappingURL=seq-handler.js.map