@datagrok/bio 1.8.2 → 1.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/detectors.js CHANGED
@@ -52,7 +52,14 @@ class BioPackageDetectors extends DG.Package {
52
52
  !(col.categories.length == 1 && !col.categories[0]) && // TODO: Remove with tests for single empty category value
53
53
  DG.Detector.sampleCategories(col, (s) => BioPackageDetectors.isHelm(s), 1)
54
54
  ) {
55
+ const statsAsHelm = BioPackageDetectors.getStats(col, 2, BioPackageDetectors.splitterAsHelm);
55
56
  col.setTag(DG.TAGS.UNITS, 'helm');
57
+
58
+ const alphabetSize = Object.keys(statsAsHelm.freq).length;
59
+ const alphabetIsMultichar = Object.keys(statsAsHelm.freq).some((m) => m.length > 1);
60
+ col.setTag('.alphabetSize', alphabetSize.toString());
61
+ col.setTag('.alphabetIsMultichar', alphabetIsMultichar ? 'true' : 'false');
62
+
56
63
  return DG.SEMTYPE.MACROMOLECULE;
57
64
  }
58
65
 
@@ -123,6 +130,12 @@ class BioPackageDetectors extends DG.Package {
123
130
  col.setTag('aligned', seqType);
124
131
  col.setTag('alphabet', alphabet);
125
132
  if (separator) col.setTag('separator', separator);
133
+ if (alphabet === 'UN') {
134
+ const alphabetSize = Object.keys(stats.freq).length;
135
+ const alphabetIsMultichar = Object.keys(stats.freq).some((m) => m.length > 1);
136
+ col.setTag('.alphabetSize', alphabetSize.toString());
137
+ col.setTag('.alphabetIsMultichar', alphabetIsMultichar ? 'true' : 'false');
138
+ }
126
139
  return DG.SEMTYPE.MACROMOLECULE;
127
140
  }
128
141
  }
@@ -297,4 +310,28 @@ class BioPackageDetectors extends DG.Package {
297
310
  '[MeNle]': 'L', // Nle - norleucine
298
311
  '[MeA]': 'A', '[MeG]': 'G', '[MeF]': 'F',
299
312
  };
313
+
314
+ static helmRe = /(PEPTIDE1|DNA1|RNA1)\{([^}]+)}/g;
315
+ static helmPp1Re = /\[([^\[\]]+)]/g;
316
+
317
+ /** Splits Helm string to monomers, but does not replace monomer names to other notation (e.g. for RNA). */
318
+ static splitterAsHelm(seq) {
319
+ BioPackageDetectors.helmRe.lastIndex = 0;
320
+ const ea = BioPackageDetectors.helmRe.exec(seq.toString());
321
+ const inSeq = ea ? ea[2] : null;
322
+
323
+ const mmPostProcess = (mm) => {
324
+ BioPackageDetectors.helmPp1Re.lastIndex = 0;
325
+ const pp1M = BioPackageDetectors.helmPp1Re.exec(mm);
326
+ if (pp1M && pp1M.length >= 2) {
327
+ return pp1M[1];
328
+ } else {
329
+ return mm;
330
+ }
331
+ };
332
+
333
+ const mmList = inSeq ? inSeq.split('.') : [];
334
+ const mmListRes = mmList.map(mmPostProcess);
335
+ return mmListRes;
336
+ }
300
337
  }