@datagrok/bio 1.8.1 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/detectors.js CHANGED
@@ -52,7 +52,9 @@ class BioPackageDetectors extends DG.Package {
52
52
  !(col.categories.length == 1 && !col.categories[0]) && // TODO: Remove with tests for single empty category value
53
53
  DG.Detector.sampleCategories(col, (s) => BioPackageDetectors.isHelm(s), 1)
54
54
  ) {
55
+ const statsAsHelm = BioPackageDetectors.getStats(col, 5, BioPackageDetectors.splitterAsHelm);
55
56
  col.setTag(DG.TAGS.UNITS, 'helm');
57
+ col.setTag('alphabetSize', statsAsHelm.freq.length);
56
58
  return DG.SEMTYPE.MACROMOLECULE;
57
59
  }
58
60
 
@@ -123,6 +125,7 @@ class BioPackageDetectors extends DG.Package {
123
125
  col.setTag('aligned', seqType);
124
126
  col.setTag('alphabet', alphabet);
125
127
  if (separator) col.setTag('separator', separator);
128
+ if (alphabet === 'UN') col.setTag('alphabetSize', stats.freq.length);
126
129
  return DG.SEMTYPE.MACROMOLECULE;
127
130
  }
128
131
  }
@@ -297,4 +300,28 @@ class BioPackageDetectors extends DG.Package {
297
300
  '[MeNle]': 'L', // Nle - norleucine
298
301
  '[MeA]': 'A', '[MeG]': 'G', '[MeF]': 'F',
299
302
  };
303
+
304
+ static helmRe = /(PEPTIDE1|DNA1|RNA1)\{([^}]+)}/g;
305
+ static helmPp1Re = /\[([^\[\]]+)]/g;
306
+
307
+ /** Splits Helm string to monomers, but does not replace monomer names to other notation (e.g. for RNA). */
308
+ static splitterAsHelm(seq) {
309
+ BioPackageDetectors.helmRe.lastIndex = 0;
310
+ const ea = BioPackageDetectors.helmRe.exec(seq.toString());
311
+ const inSeq = ea ? ea[2] : null;
312
+
313
+ const mmPostProcess = (mm) => {
314
+ BioPackageDetectors.helmPp1Re.lastIndex = 0;
315
+ const pp1M = BioPackageDetectors.helmPp1Re.exec(mm);
316
+ if (pp1M && pp1M.length >= 2) {
317
+ return pp1M[1];
318
+ } else {
319
+ return mm;
320
+ }
321
+ };
322
+
323
+ const mmList = inSeq ? inSeq.split('.') : [];
324
+ const mmListRes = mmList.map(mmPostProcess);
325
+ return mmListRes;
326
+ }
300
327
  }