@datagrok/bio 1.7.13 → 1.7.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +8 -6
- package/dist/package-test.js +106 -68
- package/dist/package.js +99 -68
- package/package.json +8 -8
- package/setup-unlink-clean.cmd +15 -0
- package/setup.cmd +15 -19
- package/src/package.ts +42 -14
- package/src/tests/detectors-test.ts +8 -0
- package/{test-Bio-34f75e5127b8-781e9df9.html → test-Bio-4f0c8bae6479-dda97945.html} +3 -2
package/detectors.js
CHANGED
|
@@ -11,8 +11,6 @@
|
|
|
11
11
|
|
|
12
12
|
class BioPackageDetectors extends DG.Package {
|
|
13
13
|
|
|
14
|
-
static mmSemType = 'Macromolecule';
|
|
15
|
-
|
|
16
14
|
static PeptideFastaAlphabet = new Set([
|
|
17
15
|
'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
|
|
18
16
|
'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',
|
|
@@ -40,9 +38,12 @@ class BioPackageDetectors extends DG.Package {
|
|
|
40
38
|
detectMacromolecule(col) {
|
|
41
39
|
// To collect alphabet freq three strategies can be used:
|
|
42
40
|
// as chars, as fasta (single or within square brackets), as with the separator.
|
|
43
|
-
if (
|
|
41
|
+
if (
|
|
42
|
+
!(col.categories.length == 1 && !col.categories[0]) && // TODO: Remove with tests for single empty category value
|
|
43
|
+
DG.Detector.sampleCategories(col, (s) => BioPackageDetectors.isHelm(s), 1)
|
|
44
|
+
) {
|
|
44
45
|
col.setTag(DG.TAGS.UNITS, 'HELM');
|
|
45
|
-
return
|
|
46
|
+
return DG.SEMTYPE.MACROMOLECULE;
|
|
46
47
|
}
|
|
47
48
|
|
|
48
49
|
const decoyAlphabets = [
|
|
@@ -72,6 +73,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
72
73
|
// TODO: Detect HELM sequence
|
|
73
74
|
// TODO: Lazy calculations could be helpful for performance and convenient for expressing classification logic.
|
|
74
75
|
const statsAsChars = BioPackageDetectors.getStats(col, 5, BioPackageDetectors.splitterAsChars);
|
|
76
|
+
// if (Object.keys(statsAsChars.freq).length === 0) return;
|
|
75
77
|
|
|
76
78
|
const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null, 0.35);
|
|
77
79
|
if (decoy != 'UN') return null;
|
|
@@ -83,7 +85,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
83
85
|
|
|
84
86
|
const units = `fasta:SEQ.MSA:${alphabet}`;
|
|
85
87
|
col.setTag(DG.TAGS.UNITS, units);
|
|
86
|
-
return
|
|
88
|
+
return DG.SEMTYPE.MACROMOLECULE;
|
|
87
89
|
}
|
|
88
90
|
} else {
|
|
89
91
|
const separator = BioPackageDetectors.detectSeparator(statsAsChars.freq);
|
|
@@ -107,7 +109,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
107
109
|
const units = `${format}:${seqType}:${alphabet}`;
|
|
108
110
|
col.setTag(DG.TAGS.UNITS, units);
|
|
109
111
|
if (separator) col.setTag('separator', separator);
|
|
110
|
-
return
|
|
112
|
+
return DG.SEMTYPE.MACROMOLECULE;
|
|
111
113
|
}
|
|
112
114
|
}
|
|
113
115
|
}
|