@datagrok/bio 2.4.14 → 2.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/detectors.js CHANGED
@@ -120,7 +120,7 @@ class BioPackageDetectors extends DG.Package {
120
120
  const decoyAlphabets = [
121
121
  ['NUMBERS', this.numbersRawAlphabet, 0.25],
122
122
  ['SMILES', this.smilesRawAlphabet, 0.25],
123
- ['SMARTS', this.smartsRawAlphabet, 0.43],
123
+ ['SMARTS', this.smartsRawAlphabet, 0.45],
124
124
  ];
125
125
 
126
126
  const candidateAlphabets = [
@@ -151,7 +151,7 @@ class BioPackageDetectors extends DG.Package {
151
151
  // Empty statsAsShars.freq alphabet means no strings of enough length presented in the data
152
152
  if (Object.keys(statsAsChars.freq).length === 0) return null;
153
153
 
154
- const decoy = this.detectAlphabet(statsAsChars.freq, decoyAlphabets, null);
154
+ const decoy = this.detectAlphabet(statsAsChars.freq, decoyAlphabets, null, colNameLikely ? -0.05 : 0);
155
155
  if (decoy !== ALPHABET.UN) return null;
156
156
 
157
157
  const separator = this.detectSeparator(statsAsChars.freq);
@@ -164,7 +164,7 @@ class BioPackageDetectors extends DG.Package {
164
164
 
165
165
  if (statsAsChars.sameLength) {
166
166
  const stats = this.getStats(categoriesSample, seqMinLength, splitter);
167
- const alphabet = this.detectAlphabet(stats.freq, candidateAlphabets, '-', colNameLikely);
167
+ const alphabet = this.detectAlphabet(stats.freq, candidateAlphabets, '-', colNameLikely ? 0.15 : 0);
168
168
  if (alphabet === ALPHABET.UN) return null;
169
169
 
170
170
  col.setTag(DG.TAGS.UNITS, units);
@@ -192,7 +192,7 @@ class BioPackageDetectors extends DG.Package {
192
192
  const aligned = stats.sameLength ? ALIGNMENT.SEQ_MSA : ALIGNMENT.SEQ;
193
193
 
194
194
  // TODO: If separator detected, then extra efforts to detect alphabet are allowed.
195
- const alphabet = this.detectAlphabet(stats.freq, candidateAlphabets, gapSymbol, colNameLikely);
195
+ const alphabet = this.detectAlphabet(stats.freq, candidateAlphabets, gapSymbol, colNameLikely ? 0.15 : 0);
196
196
  if (units === NOTATION.FASTA && alphabet === ALPHABET.UN && !alphabetIsMultichar) return null;
197
197
 
198
198
  // const forbidden = this.checkForbiddenWoSeparator(stats.freq);
@@ -304,9 +304,9 @@ class BioPackageDetectors extends DG.Package {
304
304
  * @param candidates an array of pairs [name, monomer set]
305
305
  * @param {boolean} colNameLikely The column name suggests the column is Macromolecule more likely
306
306
  */
307
- detectAlphabet(freq, candidates, gapSymbol, colNameLikely = false) {
307
+ detectAlphabet(freq, candidates, gapSymbol, simAdj = 0) {
308
308
  const candidatesSims = candidates.map((c) => {
309
- const sim = this.getAlphabetSimilarity(freq, c[1], gapSymbol) + (colNameLikely ? 0.15 : 0);
309
+ const sim = this.getAlphabetSimilarity(freq, c[1], gapSymbol) + simAdj;
310
310
  return [c[0], c[1], c[2], freq, sim];
311
311
  });
312
312