@datagrok/bio 1.9.1 → 1.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/detectors.js +53 -18
- package/dist/package-test.js +672 -383
- package/dist/package.js +486 -314
- package/package.json +6 -4
- package/src/package-test.ts +1 -0
- package/src/package.ts +83 -63
- package/src/tests/WebLogo-positions-test.ts +10 -9
- package/src/tests/checkInputColumn-tests.ts +69 -0
- package/src/tests/detectors-test.ts +24 -21
- package/src/tests/renderers-test.ts +29 -12
- package/src/tests/splitters-test.ts +15 -0
- package/src/tests/test-sequnces-generators.ts +37 -19
- package/src/utils/atomic-works.ts +1 -1
- package/src/utils/cell-renderer.ts +31 -23
- package/src/utils/constants.ts +3 -5
- package/src/utils/convert.ts +1 -1
- package/src/utils/multiple-sequence-alignment.ts +5 -4
- package/src/utils/sequence-activity-cliffs.ts +120 -8
- package/src/utils/sequence-space.ts +6 -4
- package/src/utils/utils.ts +3 -2
- package/test-Bio-f1ac5a5eade4-e2085bf5.html +261 -0
- package/src/utils/split-to-monomers.ts +0 -8
- package/test-Bio-fe2f9610d4ed-cdfe422f.html +0 -355
package/README.md
CHANGED
package/detectors.js
CHANGED
|
@@ -8,6 +8,30 @@
|
|
|
8
8
|
* TODO: Use detectors from WebLogo pickUp.. methods
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
+
/** enum type to simplify setting "user-friendly" notation if necessary */
|
|
12
|
+
const NOTATION = {
|
|
13
|
+
FASTA: 'fasta',
|
|
14
|
+
SEPARATOR: 'separator',
|
|
15
|
+
HELM: 'helm',
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const ALPHABET = {
|
|
19
|
+
DNA: 'DNA',
|
|
20
|
+
RNA: 'RNA',
|
|
21
|
+
PT: 'PT',
|
|
22
|
+
UN: 'UN',
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/** Class for handling notation units in Macromolecule columns */
|
|
26
|
+
class UnitsHandler {
|
|
27
|
+
static TAGS = {
|
|
28
|
+
aligned: 'aligned',
|
|
29
|
+
alphabet: 'alphabet',
|
|
30
|
+
alphabetSize: '.alphabetSize',
|
|
31
|
+
alphabetIsMultichar: '.alphabetIsMultichar',
|
|
32
|
+
separator: 'separator',
|
|
33
|
+
};
|
|
34
|
+
}
|
|
11
35
|
|
|
12
36
|
class BioPackageDetectors extends DG.Package {
|
|
13
37
|
|
|
@@ -52,9 +76,14 @@ class BioPackageDetectors extends DG.Package {
|
|
|
52
76
|
!(col.categories.length == 1 && !col.categories[0]) && // TODO: Remove with tests for single empty category value
|
|
53
77
|
DG.Detector.sampleCategories(col, (s) => BioPackageDetectors.isHelm(s), 1)
|
|
54
78
|
) {
|
|
55
|
-
const statsAsHelm = BioPackageDetectors.getStats(col,
|
|
56
|
-
col.setTag(DG.TAGS.UNITS,
|
|
57
|
-
|
|
79
|
+
const statsAsHelm = BioPackageDetectors.getStats(col, 2, BioPackageDetectors.splitterAsHelm);
|
|
80
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
81
|
+
|
|
82
|
+
const alphabetSize = Object.keys(statsAsHelm.freq).length;
|
|
83
|
+
const alphabetIsMultichar = Object.keys(statsAsHelm.freq).some((m) => m.length > 1);
|
|
84
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
|
|
85
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
|
|
86
|
+
|
|
58
87
|
return DG.SEMTYPE.MACROMOLECULE;
|
|
59
88
|
}
|
|
60
89
|
|
|
@@ -64,9 +93,9 @@ class BioPackageDetectors extends DG.Package {
|
|
|
64
93
|
];
|
|
65
94
|
|
|
66
95
|
const candidateAlphabets = [
|
|
67
|
-
[
|
|
68
|
-
[
|
|
69
|
-
[
|
|
96
|
+
[ALPHABET.PT, BioPackageDetectors.PeptideFastaAlphabet, 0.55],
|
|
97
|
+
[ALPHABET.DNA, BioPackageDetectors.DnaFastaAlphabet, 0.55],
|
|
98
|
+
[ALPHABET.RNA, BioPackageDetectors.RnaFastaAlphabet, 0.55],
|
|
70
99
|
];
|
|
71
100
|
|
|
72
101
|
// Check for url column, maybe it is too heavy check
|
|
@@ -89,23 +118,24 @@ class BioPackageDetectors extends DG.Package {
|
|
|
89
118
|
// if (Object.keys(statsAsChars.freq).length === 0) return;
|
|
90
119
|
|
|
91
120
|
const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null);
|
|
92
|
-
if (decoy !=
|
|
121
|
+
if (decoy != ALPHABET.UN) return null;
|
|
93
122
|
|
|
94
123
|
if (statsAsChars.sameLength) {
|
|
95
124
|
if (Object.keys(statsAsChars.freq).length > 0) { // require non empty alphabet
|
|
96
125
|
const alphabet = BioPackageDetectors.detectAlphabet(statsAsChars.freq, candidateAlphabets, '-');
|
|
97
|
-
if (alphabet ===
|
|
126
|
+
if (alphabet === ALPHABET.UN) return null;
|
|
98
127
|
|
|
99
|
-
const units =
|
|
128
|
+
const units = NOTATION.FASTA;
|
|
100
129
|
col.setTag(DG.TAGS.UNITS, units);
|
|
101
|
-
col.setTag(
|
|
102
|
-
col.setTag(
|
|
130
|
+
col.setTag(UnitsHandler.TAGS.aligned, 'SEQ.MSA');
|
|
131
|
+
col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
|
|
103
132
|
return DG.SEMTYPE.MACROMOLECULE;
|
|
104
133
|
}
|
|
105
134
|
} else {
|
|
106
135
|
const separator = BioPackageDetectors.detectSeparator(statsAsChars.freq);
|
|
107
136
|
const gapSymbol = separator ? '' : '-';
|
|
108
|
-
const splitter = separator ? BioPackageDetectors.getSplitterWithSeparator(separator) :
|
|
137
|
+
const splitter = separator ? BioPackageDetectors.getSplitterWithSeparator(separator) :
|
|
138
|
+
BioPackageDetectors.splitterAsFasta;
|
|
109
139
|
|
|
110
140
|
const stats = BioPackageDetectors.getStats(col, 5, splitter);
|
|
111
141
|
// Empty monomer alphabet is not allowed
|
|
@@ -113,7 +143,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
113
143
|
// Long monomer names for sequences with separators have constraints
|
|
114
144
|
if (separator && BioPackageDetectors.checkForbiddenWithSeparators(stats.freq)) return null;
|
|
115
145
|
|
|
116
|
-
const format = separator ?
|
|
146
|
+
const format = separator ? NOTATION.SEPARATOR : NOTATION.FASTA;
|
|
117
147
|
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
118
148
|
|
|
119
149
|
// TODO: If separator detected, then extra efforts to detect alphabet are allowed.
|
|
@@ -122,10 +152,15 @@ class BioPackageDetectors extends DG.Package {
|
|
|
122
152
|
// const forbidden = BioPackageDetectors.checkForbiddenWoSeparator(stats.freq);
|
|
123
153
|
if (separator || alphabet != 'UN') {
|
|
124
154
|
col.setTag(DG.TAGS.UNITS, format);
|
|
125
|
-
col.setTag(
|
|
126
|
-
col.setTag(
|
|
127
|
-
if (separator) col.setTag(
|
|
128
|
-
if (alphabet ===
|
|
155
|
+
col.setTag(UnitsHandler.TAGS.aligned, seqType);
|
|
156
|
+
col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
|
|
157
|
+
if (separator) col.setTag(UnitsHandler.TAGS.separator, separator);
|
|
158
|
+
if (alphabet === ALPHABET.UN) {
|
|
159
|
+
const alphabetSize = Object.keys(stats.freq).length;
|
|
160
|
+
const alphabetIsMultichar = Object.keys(stats.freq).some((m) => m.length > 1);
|
|
161
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
|
|
162
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
|
|
163
|
+
}
|
|
129
164
|
return DG.SEMTYPE.MACROMOLECULE;
|
|
130
165
|
}
|
|
131
166
|
}
|
|
@@ -221,7 +256,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
221
256
|
const sim = candidatesSims.find((cs) => cs[4] == maxSim);
|
|
222
257
|
alphabetName = sim[0];
|
|
223
258
|
} else {
|
|
224
|
-
alphabetName =
|
|
259
|
+
alphabetName = ALPHABET.UN;
|
|
225
260
|
}
|
|
226
261
|
return alphabetName;
|
|
227
262
|
}
|