@datagrok/bio 1.7.21 → 1.7.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/detectors.js
CHANGED
|
@@ -26,6 +26,15 @@ class BioPackageDetectors extends DG.Package {
|
|
|
26
26
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
|
27
27
|
'+', '-', '.', , '/', '\\', '@', '[', ']', '(', ')', '#', '%', '=']);
|
|
28
28
|
|
|
29
|
+
static SmartsRawAlphabet = new Set([
|
|
30
|
+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
|
31
|
+
'!', '#', '$', '&', '(', ')', '*', '+', ',', '-', '.', ':', ';', '=', '@', '~', '[', ']',
|
|
32
|
+
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M',
|
|
33
|
+
'N', 'O', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
|
34
|
+
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'l', 'm',
|
|
35
|
+
'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'y',
|
|
36
|
+
]);
|
|
37
|
+
|
|
29
38
|
/** @param s {String} - string to check
|
|
30
39
|
* @returns {boolean} */
|
|
31
40
|
static isHelm(s) {
|
|
@@ -48,13 +57,14 @@ class BioPackageDetectors extends DG.Package {
|
|
|
48
57
|
}
|
|
49
58
|
|
|
50
59
|
const decoyAlphabets = [
|
|
51
|
-
['SMILES', BioPackageDetectors.SmilesRawAlphabet],
|
|
60
|
+
['SMILES', BioPackageDetectors.SmilesRawAlphabet, 0.30],
|
|
61
|
+
['SMARTS', BioPackageDetectors.SmartsRawAlphabet, 0.45],
|
|
52
62
|
];
|
|
53
63
|
|
|
54
64
|
const candidateAlphabets = [
|
|
55
|
-
['PT', BioPackageDetectors.PeptideFastaAlphabet],
|
|
56
|
-
['DNA', BioPackageDetectors.DnaFastaAlphabet],
|
|
57
|
-
['RNA', BioPackageDetectors.RnaFastaAlphabet],
|
|
65
|
+
['PT', BioPackageDetectors.PeptideFastaAlphabet, 0.55],
|
|
66
|
+
['DNA', BioPackageDetectors.DnaFastaAlphabet, 0.55],
|
|
67
|
+
['RNA', BioPackageDetectors.RnaFastaAlphabet, 0.55],
|
|
58
68
|
];
|
|
59
69
|
|
|
60
70
|
// Check for url column, maybe it is too heavy check
|
|
@@ -76,7 +86,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
76
86
|
const statsAsChars = BioPackageDetectors.getStats(col, 5, BioPackageDetectors.splitterAsChars);
|
|
77
87
|
// if (Object.keys(statsAsChars.freq).length === 0) return;
|
|
78
88
|
|
|
79
|
-
const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null
|
|
89
|
+
const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null);
|
|
80
90
|
if (decoy != 'UN') return null;
|
|
81
91
|
|
|
82
92
|
if (statsAsChars.sameLength) {
|
|
@@ -148,9 +158,11 @@ class BioPackageDetectors extends DG.Package {
|
|
|
148
158
|
return sepFreq / otherSumFreq > freqThreshold ? sep : null;
|
|
149
159
|
}
|
|
150
160
|
|
|
151
|
-
/** With a separator, spaces are nor allowed in monomer names.
|
|
161
|
+
/** With a separator, spaces are nor allowed in monomer names.
|
|
162
|
+
* The monomer name/label cannot contain digits only.
|
|
163
|
+
*/
|
|
152
164
|
static checkForbiddenWithSeparators(freq) {
|
|
153
|
-
const forbiddenRe = /[ ]
|
|
165
|
+
const forbiddenRe = /[ ]|^\d+$/i;
|
|
154
166
|
return Object.keys(freq).filter((m) => forbiddenRe.test(m)).length > 0;
|
|
155
167
|
}
|
|
156
168
|
|
|
@@ -191,16 +203,16 @@ class BioPackageDetectors extends DG.Package {
|
|
|
191
203
|
* @param freq frequencies of monomers in sequence set
|
|
192
204
|
* @param candidates an array of pairs [name, monomer set]
|
|
193
205
|
* */
|
|
194
|
-
static detectAlphabet(freq, candidates, gapSymbol
|
|
206
|
+
static detectAlphabet(freq, candidates, gapSymbol) {
|
|
195
207
|
const candidatesSims = candidates.map((c) => {
|
|
196
208
|
const sim = BioPackageDetectors.getAlphabetSimilarity(freq, c[1], gapSymbol);
|
|
197
|
-
return [c[0], c[1], freq, sim];
|
|
209
|
+
return [c[0], c[1], c[2], freq, sim];
|
|
198
210
|
});
|
|
199
211
|
|
|
200
212
|
let alphabetName;
|
|
201
|
-
const maxSim = Math.max(...candidatesSims.map((cs) => cs[
|
|
202
|
-
if (maxSim >
|
|
203
|
-
const sim = candidatesSims.find((cs) => cs[
|
|
213
|
+
const maxSim = Math.max(...candidatesSims.map((cs) => cs[4] > cs[2] ? cs[4] : -1));
|
|
214
|
+
if (maxSim > 0) {
|
|
215
|
+
const sim = candidatesSims.find((cs) => cs[4] == maxSim);
|
|
204
216
|
alphabetName = sim[0];
|
|
205
217
|
} else {
|
|
206
218
|
alphabetName = 'UN';
|