@datagrok/bio 1.10.2 → 1.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/detectors.js +45 -20
- package/dist/package-test.js +600 -323
- package/dist/package.js +404 -264
- package/package.json +5 -4
- package/src/package-test.ts +1 -0
- package/src/package.ts +73 -30
- package/src/tests/WebLogo-positions-test.ts +10 -9
- package/src/tests/checkInputColumn-tests.ts +72 -0
- package/src/tests/convert-test.ts +6 -3
- package/src/tests/detectors-test.ts +3 -3
- package/src/tests/renderers-test.ts +56 -22
- package/src/tests/sequence-space-utils.ts +8 -3
- package/src/tests/splitters-test.ts +15 -0
- package/src/tests/test-sequnces-generators.ts +16 -21
- package/src/utils/cell-renderer.ts +18 -17
- package/src/utils/constants.ts +3 -5
- package/src/utils/convert.ts +5 -2
- package/src/utils/multiple-sequence-alignment.ts +5 -4
- package/src/utils/sequence-activity-cliffs.ts +120 -8
- package/src/utils/sequence-space.ts +1 -1
- package/src/utils/utils.ts +3 -2
- package/test-Bio-f1ac5a5eade4-a0f7e8c0.html +261 -0
- package/src/utils/split-to-monomers.ts +0 -8
- package/test-Bio-eb4783c07294-0aa1538b.html +0 -355
package/README.md
CHANGED
package/detectors.js
CHANGED
|
@@ -8,6 +8,30 @@
|
|
|
8
8
|
* TODO: Use detectors from WebLogo pickUp.. methods
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
+
/** enum type to simplify setting "user-friendly" notation if necessary */
|
|
12
|
+
const NOTATION = {
|
|
13
|
+
FASTA: 'fasta',
|
|
14
|
+
SEPARATOR: 'separator',
|
|
15
|
+
HELM: 'helm',
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const ALPHABET = {
|
|
19
|
+
DNA: 'DNA',
|
|
20
|
+
RNA: 'RNA',
|
|
21
|
+
PT: 'PT',
|
|
22
|
+
UN: 'UN',
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/** Class for handling notation units in Macromolecule columns */
|
|
26
|
+
class UnitsHandler {
|
|
27
|
+
static TAGS = {
|
|
28
|
+
aligned: 'aligned',
|
|
29
|
+
alphabet: 'alphabet',
|
|
30
|
+
alphabetSize: '.alphabetSize',
|
|
31
|
+
alphabetIsMultichar: '.alphabetIsMultichar',
|
|
32
|
+
separator: 'separator',
|
|
33
|
+
};
|
|
34
|
+
}
|
|
11
35
|
|
|
12
36
|
class BioPackageDetectors extends DG.Package {
|
|
13
37
|
|
|
@@ -53,12 +77,12 @@ class BioPackageDetectors extends DG.Package {
|
|
|
53
77
|
DG.Detector.sampleCategories(col, (s) => BioPackageDetectors.isHelm(s), 1)
|
|
54
78
|
) {
|
|
55
79
|
const statsAsHelm = BioPackageDetectors.getStats(col, 2, BioPackageDetectors.splitterAsHelm);
|
|
56
|
-
col.setTag(DG.TAGS.UNITS,
|
|
80
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
57
81
|
|
|
58
82
|
const alphabetSize = Object.keys(statsAsHelm.freq).length;
|
|
59
83
|
const alphabetIsMultichar = Object.keys(statsAsHelm.freq).some((m) => m.length > 1);
|
|
60
|
-
col.setTag(
|
|
61
|
-
col.setTag(
|
|
84
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
|
|
85
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
|
|
62
86
|
|
|
63
87
|
return DG.SEMTYPE.MACROMOLECULE;
|
|
64
88
|
}
|
|
@@ -69,9 +93,9 @@ class BioPackageDetectors extends DG.Package {
|
|
|
69
93
|
];
|
|
70
94
|
|
|
71
95
|
const candidateAlphabets = [
|
|
72
|
-
[
|
|
73
|
-
[
|
|
74
|
-
[
|
|
96
|
+
[ALPHABET.PT, BioPackageDetectors.PeptideFastaAlphabet, 0.55],
|
|
97
|
+
[ALPHABET.DNA, BioPackageDetectors.DnaFastaAlphabet, 0.55],
|
|
98
|
+
[ALPHABET.RNA, BioPackageDetectors.RnaFastaAlphabet, 0.55],
|
|
75
99
|
];
|
|
76
100
|
|
|
77
101
|
// Check for url column, maybe it is too heavy check
|
|
@@ -94,23 +118,24 @@ class BioPackageDetectors extends DG.Package {
|
|
|
94
118
|
// if (Object.keys(statsAsChars.freq).length === 0) return;
|
|
95
119
|
|
|
96
120
|
const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null);
|
|
97
|
-
if (decoy !=
|
|
121
|
+
if (decoy != ALPHABET.UN) return null;
|
|
98
122
|
|
|
99
123
|
if (statsAsChars.sameLength) {
|
|
100
124
|
if (Object.keys(statsAsChars.freq).length > 0) { // require non empty alphabet
|
|
101
125
|
const alphabet = BioPackageDetectors.detectAlphabet(statsAsChars.freq, candidateAlphabets, '-');
|
|
102
|
-
if (alphabet ===
|
|
126
|
+
if (alphabet === ALPHABET.UN) return null;
|
|
103
127
|
|
|
104
|
-
const units =
|
|
128
|
+
const units = NOTATION.FASTA;
|
|
105
129
|
col.setTag(DG.TAGS.UNITS, units);
|
|
106
|
-
col.setTag(
|
|
107
|
-
col.setTag(
|
|
130
|
+
col.setTag(UnitsHandler.TAGS.aligned, 'SEQ.MSA');
|
|
131
|
+
col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
|
|
108
132
|
return DG.SEMTYPE.MACROMOLECULE;
|
|
109
133
|
}
|
|
110
134
|
} else {
|
|
111
135
|
const separator = BioPackageDetectors.detectSeparator(statsAsChars.freq);
|
|
112
136
|
const gapSymbol = separator ? '' : '-';
|
|
113
|
-
const splitter = separator ? BioPackageDetectors.getSplitterWithSeparator(separator) :
|
|
137
|
+
const splitter = separator ? BioPackageDetectors.getSplitterWithSeparator(separator) :
|
|
138
|
+
BioPackageDetectors.splitterAsFasta;
|
|
114
139
|
|
|
115
140
|
const stats = BioPackageDetectors.getStats(col, 5, splitter);
|
|
116
141
|
// Empty monomer alphabet is not allowed
|
|
@@ -118,7 +143,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
118
143
|
// Long monomer names for sequences with separators have constraints
|
|
119
144
|
if (separator && BioPackageDetectors.checkForbiddenWithSeparators(stats.freq)) return null;
|
|
120
145
|
|
|
121
|
-
const format = separator ?
|
|
146
|
+
const format = separator ? NOTATION.SEPARATOR : NOTATION.FASTA;
|
|
122
147
|
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
123
148
|
|
|
124
149
|
// TODO: If separator detected, then extra efforts to detect alphabet are allowed.
|
|
@@ -127,14 +152,14 @@ class BioPackageDetectors extends DG.Package {
|
|
|
127
152
|
// const forbidden = BioPackageDetectors.checkForbiddenWoSeparator(stats.freq);
|
|
128
153
|
if (separator || alphabet != 'UN') {
|
|
129
154
|
col.setTag(DG.TAGS.UNITS, format);
|
|
130
|
-
col.setTag(
|
|
131
|
-
col.setTag(
|
|
132
|
-
if (separator) col.setTag(
|
|
133
|
-
if (alphabet ===
|
|
155
|
+
col.setTag(UnitsHandler.TAGS.aligned, seqType);
|
|
156
|
+
col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
|
|
157
|
+
if (separator) col.setTag(UnitsHandler.TAGS.separator, separator);
|
|
158
|
+
if (alphabet === ALPHABET.UN) {
|
|
134
159
|
const alphabetSize = Object.keys(stats.freq).length;
|
|
135
160
|
const alphabetIsMultichar = Object.keys(stats.freq).some((m) => m.length > 1);
|
|
136
|
-
col.setTag(
|
|
137
|
-
col.setTag(
|
|
161
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
|
|
162
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
|
|
138
163
|
}
|
|
139
164
|
return DG.SEMTYPE.MACROMOLECULE;
|
|
140
165
|
}
|
|
@@ -231,7 +256,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
231
256
|
const sim = candidatesSims.find((cs) => cs[4] == maxSim);
|
|
232
257
|
alphabetName = sim[0];
|
|
233
258
|
} else {
|
|
234
|
-
alphabetName =
|
|
259
|
+
alphabetName = ALPHABET.UN;
|
|
235
260
|
}
|
|
236
261
|
return alphabetName;
|
|
237
262
|
}
|