@datagrok/bio 2.1.9 → 2.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -0
- package/detectors.js +12 -12
- package/dist/package-test.js +7 -2
- package/dist/package.js +3 -2
- package/files/tests/testSmilesShort.csv +11 -0
- package/package.json +1 -1
- package/src/package.ts +3 -1
- package/src/tests/detectors-tests.ts +6 -0
- package/{test-Bio-62cc009524f3-820f73b0.html → test-Bio-91c83d8913ff-bb573307.html} +100 -99
package/.eslintrc.json
CHANGED
package/detectors.js
CHANGED
|
@@ -138,7 +138,8 @@ class BioPackageDetectors extends DG.Package {
|
|
|
138
138
|
// TODO: Lazy calculations could be helpful for performance and convenient for expressing classification logic.
|
|
139
139
|
const statsAsChars = this.getStats(categoriesSample, 5,
|
|
140
140
|
this.getSplitterAsChars(SEQ_SAMPLE_LENGTH_LIMIT));
|
|
141
|
-
//
|
|
141
|
+
// Empty statsAsShars.freq alphabet means no strings of enough length presented in the data
|
|
142
|
+
if (Object.keys(statsAsChars.freq).length === 0) return null;
|
|
142
143
|
|
|
143
144
|
const decoy = this.detectAlphabet(statsAsChars.freq, decoyAlphabets, null);
|
|
144
145
|
if (decoy != ALPHABET.UN) return null;
|
|
@@ -149,19 +150,16 @@ class BioPackageDetectors extends DG.Package {
|
|
|
149
150
|
const splitter = separator ? this.getSplitterWithSeparator(separator, SEQ_SAMPLE_LENGTH_LIMIT) :
|
|
150
151
|
this.getSplitterAsFasta(SEQ_SAMPLE_LENGTH_LIMIT);
|
|
151
152
|
|
|
152
|
-
col.setTag(DG.TAGS.UNITS, units);
|
|
153
|
-
if (separator) col.setTag(UnitsHandler.TAGS.separator, separator);
|
|
154
|
-
|
|
155
153
|
if (statsAsChars.sameLength) {
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
if (alphabet === ALPHABET.UN) return null;
|
|
154
|
+
const stats = this.getStats(categoriesSample, 5, splitter);
|
|
155
|
+
const alphabet = this.detectAlphabet(stats.freq, candidateAlphabets, '-');
|
|
156
|
+
if (alphabet === ALPHABET.UN) return null;
|
|
160
157
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
158
|
+
col.setTag(DG.TAGS.UNITS, units);
|
|
159
|
+
if (separator) col.setTag(UnitsHandler.TAGS.separator, separator);
|
|
160
|
+
col.setTag(UnitsHandler.TAGS.aligned, ALIGNMENT.SEQ_MSA);
|
|
161
|
+
col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
|
|
162
|
+
return DG.SEMTYPE.MACROMOLECULE;
|
|
165
163
|
} else {
|
|
166
164
|
const stats = this.getStats(categoriesSample, 5, splitter);
|
|
167
165
|
// Empty monomer alphabet is not allowed
|
|
@@ -176,6 +174,8 @@ class BioPackageDetectors extends DG.Package {
|
|
|
176
174
|
|
|
177
175
|
// const forbidden = this.checkForbiddenWoSeparator(stats.freq);
|
|
178
176
|
if (separator || alphabet != 'UN') {
|
|
177
|
+
col.setTag(DG.TAGS.UNITS, units);
|
|
178
|
+
if (separator) col.setTag(UnitsHandler.TAGS.separator, separator);
|
|
179
179
|
col.setTag(UnitsHandler.TAGS.aligned, aligned);
|
|
180
180
|
col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
|
|
181
181
|
if (alphabet === ALPHABET.UN) {
|