@datagrok/bio 1.7.21 → 1.7.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/detectors.js CHANGED
@@ -26,6 +26,15 @@ class BioPackageDetectors extends DG.Package {
26
26
  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
27
27
  '+', '-', '.', , '/', '\\', '@', '[', ']', '(', ')', '#', '%', '=']);
28
28
 
29
+ static SmartsRawAlphabet = new Set([
30
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
31
+ '!', '#', '$', '&', '(', ')', '*', '+', ',', '-', '.', ':', ';', '=', '@', '~', '[', ']',
32
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M',
33
+ 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
34
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'l', 'm',
35
+ 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'y',
36
+ ]);
37
+
29
38
  /** @param s {String} - string to check
30
39
  * @returns {boolean} */
31
40
  static isHelm(s) {
@@ -48,13 +57,14 @@ class BioPackageDetectors extends DG.Package {
48
57
  }
49
58
 
50
59
  const decoyAlphabets = [
51
- ['SMILES', BioPackageDetectors.SmilesRawAlphabet],
60
+ ['SMILES', BioPackageDetectors.SmilesRawAlphabet, 0.30],
61
+ ['SMARTS', BioPackageDetectors.SmartsRawAlphabet, 0.45],
52
62
  ];
53
63
 
54
64
  const candidateAlphabets = [
55
- ['PT', BioPackageDetectors.PeptideFastaAlphabet],
56
- ['DNA', BioPackageDetectors.DnaFastaAlphabet],
57
- ['RNA', BioPackageDetectors.RnaFastaAlphabet],
65
+ ['PT', BioPackageDetectors.PeptideFastaAlphabet, 0.55],
66
+ ['DNA', BioPackageDetectors.DnaFastaAlphabet, 0.55],
67
+ ['RNA', BioPackageDetectors.RnaFastaAlphabet, 0.55],
58
68
  ];
59
69
 
60
70
  // Check for url column, maybe it is too heavy check
@@ -76,7 +86,7 @@ class BioPackageDetectors extends DG.Package {
76
86
  const statsAsChars = BioPackageDetectors.getStats(col, 5, BioPackageDetectors.splitterAsChars);
77
87
  // if (Object.keys(statsAsChars.freq).length === 0) return;
78
88
 
79
- const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null, 0.30);
89
+ const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null);
80
90
  if (decoy != 'UN') return null;
81
91
 
82
92
  if (statsAsChars.sameLength) {
@@ -148,9 +158,11 @@ class BioPackageDetectors extends DG.Package {
148
158
  return sepFreq / otherSumFreq > freqThreshold ? sep : null;
149
159
  }
150
160
 
151
- /** With a separator, spaces are nor allowed in monomer names. */
161
+ /** With a separator, spaces are nor allowed in monomer names.
162
+ * The monomer name/label cannot contain digits only.
163
+ */
152
164
  static checkForbiddenWithSeparators(freq) {
153
- const forbiddenRe = /[ ]/i;
165
+ const forbiddenRe = /[ ]|^\d+$/i;
154
166
  return Object.keys(freq).filter((m) => forbiddenRe.test(m)).length > 0;
155
167
  }
156
168
 
@@ -191,16 +203,16 @@ class BioPackageDetectors extends DG.Package {
191
203
  * @param freq frequencies of monomers in sequence set
192
204
  * @param candidates an array of pairs [name, monomer set]
193
205
  * */
194
- static detectAlphabet(freq, candidates, gapSymbol, cut = 0.55) {
206
+ static detectAlphabet(freq, candidates, gapSymbol) {
195
207
  const candidatesSims = candidates.map((c) => {
196
208
  const sim = BioPackageDetectors.getAlphabetSimilarity(freq, c[1], gapSymbol);
197
- return [c[0], c[1], freq, sim];
209
+ return [c[0], c[1], c[2], freq, sim];
198
210
  });
199
211
 
200
212
  let alphabetName;
201
- const maxSim = Math.max(...candidatesSims.map((cs) => cs[3]));
202
- if (maxSim > cut) {
203
- const sim = candidatesSims.find((cs) => cs[3] == maxSim);
213
+ const maxSim = Math.max(...candidatesSims.map((cs) => cs[4] > cs[2] ? cs[4] : -1));
214
+ if (maxSim > 0) {
215
+ const sim = candidatesSims.find((cs) => cs[4] == maxSim);
204
216
  alphabetName = sim[0];
205
217
  } else {
206
218
  alphabetName = 'UN';