@datagrok/bio 1.5.7 → 1.5.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/css/helm.css +3 -0
- package/detectors.js +33 -22
- package/dist/package-test.js +1225 -453
- package/dist/package.js +979 -287
- package/files/samples/sample_FASTA.csv +66 -66
- package/files/samples/testDemog.csv +5851 -0
- package/files/samples/testHelm.csv +6 -0
- package/files/samples/{id.csv → testId.csv} +0 -0
- package/files/samples/{sar-small.csv → testSmiles.csv} +0 -0
- package/files/samples/testSmiles2.csv +12248 -0
- package/helm/JSDraw/Pistoia.HELM-uncompressed.js +9694 -0
- package/helm/JSDraw/Pistoia.HELM.js +27 -0
- package/helm/JSDraw/ReadMe.txt +8 -0
- package/helm/JSDraw/Scilligence.JSDraw2.Lite-uncompressed.js +31126 -0
- package/helm/JSDraw/Scilligence.JSDraw2.Lite.js +12 -0
- package/helm/JSDraw/Scilligence.JSDraw2.Resources.js +762 -0
- package/helm/JSDraw/dojo.js +250 -0
- package/helm/JSDraw/test.html +21 -0
- package/package.json +9 -2
- package/src/monomer-library.ts +199 -0
- package/src/package-test.ts +2 -0
- package/src/package.ts +71 -20
- package/src/tests/convert-test.ts +143 -22
- package/src/tests/detectors-test.ts +139 -156
- package/src/tests/renderer-test.ts +58 -0
- package/src/tests/splitter-test.ts +22 -0
- package/src/tests/types.ts +7 -0
- package/src/utils/atomic-works.ts +218 -97
- package/src/utils/cell-renderer.ts +211 -0
- package/src/utils/chem-palette.ts +280 -0
- package/src/utils/convert.ts +25 -16
- package/src/utils/misc.ts +29 -0
- package/src/utils/multiple-sequence-alignment.ts +5 -3
- package/src/utils/notation-converter.ts +120 -84
- package/src/utils/sequence-activity-cliffs.ts +2 -2
- package/src/utils/types.ts +13 -0
- package/src/utils/utils.ts +35 -30
- package/test-Bio-34f75e5127b8-936bf89b.html +256 -0
- package/files/sample_FASTA.csv +0 -66
- package/files/sample_FASTA_with_activities.csv +0 -66
- package/files/sample_MSA.csv +0 -541
package/css/helm.css
ADDED
package/detectors.js
CHANGED
|
@@ -13,17 +13,19 @@ class BioPackageDetectors extends DG.Package {
|
|
|
13
13
|
|
|
14
14
|
static mmSemType = 'Macromolecule';
|
|
15
15
|
|
|
16
|
-
static
|
|
17
|
-
FastaSeqPt: 'fasta:SEQ:PT', FastaSeqNt: 'fasta:SEQ:NT', FastaMsaPt: 'fasta:MSA:PT', FastaMsaNt: 'fasta:MSA:NT',
|
|
18
|
-
};
|
|
19
|
-
|
|
20
|
-
static AminoacidsFastaAlphabet = new Set([
|
|
16
|
+
static PeptideFastaAlphabet = new Set([
|
|
21
17
|
'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
|
|
22
18
|
'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',
|
|
23
19
|
]);
|
|
24
20
|
|
|
25
|
-
static
|
|
21
|
+
static DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
|
|
22
|
+
|
|
23
|
+
static RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);
|
|
26
24
|
|
|
25
|
+
static SmilesRawAlphabet = new Set([
|
|
26
|
+
'O', 'C', 'c', 'N', 'S', 'F', '(', ')',
|
|
27
|
+
'1', '2', '3', '4', '5', '6', '7',
|
|
28
|
+
'+', '-', '@', '[', ']', '/', '\\', '#', '=']);
|
|
27
29
|
|
|
28
30
|
/** @param s {String} - string to check
|
|
29
31
|
* @returns {boolean} */
|
|
@@ -42,17 +44,26 @@ class BioPackageDetectors extends DG.Package {
|
|
|
42
44
|
return BioPackageDetectors.mmSemType;
|
|
43
45
|
}
|
|
44
46
|
|
|
45
|
-
const
|
|
46
|
-
['
|
|
47
|
-
|
|
47
|
+
const decoyAlphabets = [
|
|
48
|
+
['SMILES', BioPackageDetectors.SmilesRawAlphabet],
|
|
49
|
+
];
|
|
50
|
+
|
|
51
|
+
const candidateAlphabets = [
|
|
52
|
+
['PT', BioPackageDetectors.PeptideFastaAlphabet],
|
|
53
|
+
['DNA', BioPackageDetectors.DnaFastaAlphabet],
|
|
54
|
+
['RNA', BioPackageDetectors.RnaFastaAlphabet],
|
|
48
55
|
];
|
|
49
56
|
|
|
50
57
|
// TODO: Detect HELM sequence
|
|
51
58
|
// TODO: Lazy calculations could be helpful for performance and convenient for expressing classification logic.
|
|
52
59
|
const statsAsChars = BioPackageDetectors.getStats(col, 5, BioPackageDetectors.splitterAsChars);
|
|
60
|
+
|
|
61
|
+
const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null, 0.5);
|
|
62
|
+
if (decoy != 'UN') return null;
|
|
63
|
+
|
|
53
64
|
if (statsAsChars.sameLength) {
|
|
54
65
|
if (Object.keys(statsAsChars.freq).length > 0) { // require non empty alphabet
|
|
55
|
-
const alphabet = BioPackageDetectors.detectAlphabet(statsAsChars.freq,
|
|
66
|
+
const alphabet = BioPackageDetectors.detectAlphabet(statsAsChars.freq, candidateAlphabets, '-');
|
|
56
67
|
if (alphabet === 'UN') return null;
|
|
57
68
|
|
|
58
69
|
const units = `fasta:SEQ.MSA:${alphabet}`;
|
|
@@ -74,10 +85,10 @@ class BioPackageDetectors extends DG.Package {
|
|
|
74
85
|
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
75
86
|
|
|
76
87
|
// TODO: If separator detected, then extra efforts to detect alphabet are allowed.
|
|
77
|
-
const alphabet = BioPackageDetectors.detectAlphabet(stats.freq,
|
|
88
|
+
const alphabet = BioPackageDetectors.detectAlphabet(stats.freq, candidateAlphabets, gapSymbol);
|
|
78
89
|
|
|
79
|
-
const forbidden = BioPackageDetectors.checkForbiddenWoSeparator(stats.freq);
|
|
80
|
-
if (separator ||
|
|
90
|
+
// const forbidden = BioPackageDetectors.checkForbiddenWoSeparator(stats.freq);
|
|
91
|
+
if (separator || alphabet != 'UN') {
|
|
81
92
|
const units = `${format}:${seqType}:${alphabet}`;
|
|
82
93
|
col.setTag(DG.TAGS.UNITS, units);
|
|
83
94
|
if (separator) col.setTag('separator', separator);
|
|
@@ -103,8 +114,8 @@ class BioPackageDetectors extends DG.Package {
|
|
|
103
114
|
const cleanFreq = Object.assign({}, ...Object.entries(freq)
|
|
104
115
|
.filter(([m, f]) => m != ' ' &&
|
|
105
116
|
!noSeparatorChemRe.test(m) && !noSeparatorAlphaDigitRe.test(m) &&
|
|
106
|
-
!BioPackageDetectors.
|
|
107
|
-
!BioPackageDetectors.
|
|
117
|
+
!BioPackageDetectors.PeptideFastaAlphabet.has(m) &&
|
|
118
|
+
!BioPackageDetectors.DnaFastaAlphabet.has(m))
|
|
108
119
|
.map(([m, f]) => ({[m]: f})));
|
|
109
120
|
if (Object.keys(cleanFreq).length == 0) return null;
|
|
110
121
|
|
|
@@ -124,11 +135,11 @@ class BioPackageDetectors extends DG.Package {
|
|
|
124
135
|
return Object.keys(freq).filter((m) => forbiddenRe.test(m)).length > 0;
|
|
125
136
|
}
|
|
126
137
|
|
|
127
|
-
/** Without a separator, special symbols or digits are not allowed as monomers. */
|
|
128
|
-
static checkForbiddenWoSeparator(freq) {
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
}
|
|
138
|
+
// /** Without a separator, special symbols or digits are not allowed as monomers. */
|
|
139
|
+
// static checkForbiddenWoSeparator(freq) {
|
|
140
|
+
// const forbiddenRe = /[\d!@#$%^&*()_+\-=\[\]{};':"\\|,.<>\/?]/i;
|
|
141
|
+
// return Object.keys(freq).filter((m) => forbiddenRe.test(m)).length > 0;
|
|
142
|
+
// }
|
|
132
143
|
|
|
133
144
|
/** Stats of sequences with specified splitter func, returns { freq, sameLength } */
|
|
134
145
|
static getStats(seqCol, minLength, splitter) {
|
|
@@ -161,7 +172,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
161
172
|
* @param freq frequencies of monomers in sequence set
|
|
162
173
|
* @param candidates an array of pairs [name, monomer set]
|
|
163
174
|
* */
|
|
164
|
-
static detectAlphabet(freq, candidates, gapSymbol) {
|
|
175
|
+
static detectAlphabet(freq, candidates, gapSymbol, cut = 0.65) {
|
|
165
176
|
const candidatesSims = candidates.map((c) => {
|
|
166
177
|
const sim = BioPackageDetectors.getAlphabetSimilarity(freq, c[1], gapSymbol);
|
|
167
178
|
return [c[0], c[1], freq, sim];
|
|
@@ -169,7 +180,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
169
180
|
|
|
170
181
|
let alphabetName;
|
|
171
182
|
const maxSim = Math.max(...candidatesSims.map((cs) => cs[3]));
|
|
172
|
-
if (maxSim >
|
|
183
|
+
if (maxSim > cut) {
|
|
173
184
|
const sim = candidatesSims.find((cs) => cs[3] == maxSim);
|
|
174
185
|
alphabetName = sim[0];
|
|
175
186
|
} else {
|