@datagrok/bio 1.9.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +77 -15
- package/dist/package-test.js +662 -5567
- package/dist/package.js +481 -5495
- package/package.json +8 -10
- package/src/package-test.ts +1 -0
- package/src/package.ts +94 -37
- package/src/tests/WebLogo-positions-test.ts +18 -13
- package/src/tests/checkInputColumn-tests.ts +69 -0
- package/src/tests/detectors-test.ts +38 -23
- package/src/tests/msa-tests.ts +2 -1
- package/src/tests/renderers-test.ts +12 -12
- package/src/tests/splitters-test.ts +15 -0
- package/src/tests/test-sequnces-generators.ts +45 -0
- package/src/utils/atomic-works.ts +2 -0
- package/src/utils/cell-renderer.ts +40 -24
- package/src/utils/constants.ts +3 -5
- package/src/utils/convert.ts +1 -1
- package/src/utils/multiple-sequence-alignment.ts +5 -4
- package/src/utils/sequence-space.ts +6 -4
- package/src/utils/utils.ts +3 -2
- package/src/widgets/representations.ts +1 -1
- package/test-Bio-eb4783c07294-8e35df79.html +359 -0
- package/webpack.config.js +1 -0
- package/src/utils/split-to-monomers.ts +0 -8
- package/test-Bio-d4ef1f35c295-90ae719f.html +0 -245
package/detectors.js
CHANGED
|
@@ -8,6 +8,30 @@
|
|
|
8
8
|
* TODO: Use detectors from WebLogo pickUp.. methods
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
+
/** enum type to simplify setting "user-friendly" notation if necessary */
|
|
12
|
+
const NOTATION = {
|
|
13
|
+
FASTA: 'fasta',
|
|
14
|
+
SEPARATOR: 'separator',
|
|
15
|
+
HELM: 'helm',
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const ALPHABET = {
|
|
19
|
+
DNA: 'DNA',
|
|
20
|
+
RNA: 'RNA',
|
|
21
|
+
PT: 'PT',
|
|
22
|
+
UN: 'UN',
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/** Class for handling notation units in Macromolecule columns */
|
|
26
|
+
class UnitsHandler {
|
|
27
|
+
static TAGS = {
|
|
28
|
+
aligned: 'aligned',
|
|
29
|
+
alphabet: 'alphabet',
|
|
30
|
+
alphabetSize: '.alphabetSize',
|
|
31
|
+
alphabetIsMultichar: '.alphabetIsMultichar',
|
|
32
|
+
separator: 'separator',
|
|
33
|
+
};
|
|
34
|
+
}
|
|
11
35
|
|
|
12
36
|
class BioPackageDetectors extends DG.Package {
|
|
13
37
|
|
|
@@ -52,7 +76,14 @@ class BioPackageDetectors extends DG.Package {
|
|
|
52
76
|
!(col.categories.length == 1 && !col.categories[0]) && // TODO: Remove with tests for single empty category value
|
|
53
77
|
DG.Detector.sampleCategories(col, (s) => BioPackageDetectors.isHelm(s), 1)
|
|
54
78
|
) {
|
|
55
|
-
|
|
79
|
+
const statsAsHelm = BioPackageDetectors.getStats(col, 2, BioPackageDetectors.splitterAsHelm);
|
|
80
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
81
|
+
|
|
82
|
+
const alphabetSize = Object.keys(statsAsHelm.freq).length;
|
|
83
|
+
const alphabetIsMultichar = Object.keys(statsAsHelm.freq).some((m) => m.length > 1);
|
|
84
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
|
|
85
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
|
|
86
|
+
|
|
56
87
|
return DG.SEMTYPE.MACROMOLECULE;
|
|
57
88
|
}
|
|
58
89
|
|
|
@@ -62,9 +93,9 @@ class BioPackageDetectors extends DG.Package {
|
|
|
62
93
|
];
|
|
63
94
|
|
|
64
95
|
const candidateAlphabets = [
|
|
65
|
-
[
|
|
66
|
-
[
|
|
67
|
-
[
|
|
96
|
+
[ALPHABET.PT, BioPackageDetectors.PeptideFastaAlphabet, 0.55],
|
|
97
|
+
[ALPHABET.DNA, BioPackageDetectors.DnaFastaAlphabet, 0.55],
|
|
98
|
+
[ALPHABET.RNA, BioPackageDetectors.RnaFastaAlphabet, 0.55],
|
|
68
99
|
];
|
|
69
100
|
|
|
70
101
|
// Check for url column, maybe it is too heavy check
|
|
@@ -87,23 +118,24 @@ class BioPackageDetectors extends DG.Package {
|
|
|
87
118
|
// if (Object.keys(statsAsChars.freq).length === 0) return;
|
|
88
119
|
|
|
89
120
|
const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null);
|
|
90
|
-
if (decoy !=
|
|
121
|
+
if (decoy != ALPHABET.UN) return null;
|
|
91
122
|
|
|
92
123
|
if (statsAsChars.sameLength) {
|
|
93
124
|
if (Object.keys(statsAsChars.freq).length > 0) { // require non empty alphabet
|
|
94
125
|
const alphabet = BioPackageDetectors.detectAlphabet(statsAsChars.freq, candidateAlphabets, '-');
|
|
95
|
-
if (alphabet ===
|
|
126
|
+
if (alphabet === ALPHABET.UN) return null;
|
|
96
127
|
|
|
97
|
-
const units =
|
|
128
|
+
const units = NOTATION.FASTA;
|
|
98
129
|
col.setTag(DG.TAGS.UNITS, units);
|
|
99
|
-
col.setTag(
|
|
100
|
-
col.setTag(
|
|
130
|
+
col.setTag(UnitsHandler.TAGS.aligned, 'SEQ.MSA');
|
|
131
|
+
col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
|
|
101
132
|
return DG.SEMTYPE.MACROMOLECULE;
|
|
102
133
|
}
|
|
103
134
|
} else {
|
|
104
135
|
const separator = BioPackageDetectors.detectSeparator(statsAsChars.freq);
|
|
105
136
|
const gapSymbol = separator ? '' : '-';
|
|
106
|
-
const splitter = separator ? BioPackageDetectors.getSplitterWithSeparator(separator) :
|
|
137
|
+
const splitter = separator ? BioPackageDetectors.getSplitterWithSeparator(separator) :
|
|
138
|
+
BioPackageDetectors.splitterAsFasta;
|
|
107
139
|
|
|
108
140
|
const stats = BioPackageDetectors.getStats(col, 5, splitter);
|
|
109
141
|
// Empty monomer alphabet is not allowed
|
|
@@ -111,7 +143,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
111
143
|
// Long monomer names for sequences with separators have constraints
|
|
112
144
|
if (separator && BioPackageDetectors.checkForbiddenWithSeparators(stats.freq)) return null;
|
|
113
145
|
|
|
114
|
-
const format = separator ?
|
|
146
|
+
const format = separator ? NOTATION.SEPARATOR : NOTATION.FASTA;
|
|
115
147
|
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
116
148
|
|
|
117
149
|
// TODO: If separator detected, then extra efforts to detect alphabet are allowed.
|
|
@@ -120,9 +152,15 @@ class BioPackageDetectors extends DG.Package {
|
|
|
120
152
|
// const forbidden = BioPackageDetectors.checkForbiddenWoSeparator(stats.freq);
|
|
121
153
|
if (separator || alphabet != 'UN') {
|
|
122
154
|
col.setTag(DG.TAGS.UNITS, format);
|
|
123
|
-
col.setTag(
|
|
124
|
-
col.setTag(
|
|
125
|
-
if (separator) col.setTag(
|
|
155
|
+
col.setTag(UnitsHandler.TAGS.aligned, seqType);
|
|
156
|
+
col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
|
|
157
|
+
if (separator) col.setTag(UnitsHandler.TAGS.separator, separator);
|
|
158
|
+
if (alphabet === ALPHABET.UN) {
|
|
159
|
+
const alphabetSize = Object.keys(stats.freq).length;
|
|
160
|
+
const alphabetIsMultichar = Object.keys(stats.freq).some((m) => m.length > 1);
|
|
161
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
|
|
162
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
|
|
163
|
+
}
|
|
126
164
|
return DG.SEMTYPE.MACROMOLECULE;
|
|
127
165
|
}
|
|
128
166
|
}
|
|
@@ -218,7 +256,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
218
256
|
const sim = candidatesSims.find((cs) => cs[4] == maxSim);
|
|
219
257
|
alphabetName = sim[0];
|
|
220
258
|
} else {
|
|
221
|
-
alphabetName =
|
|
259
|
+
alphabetName = ALPHABET.UN;
|
|
222
260
|
}
|
|
223
261
|
return alphabetName;
|
|
224
262
|
}
|
|
@@ -297,4 +335,28 @@ class BioPackageDetectors extends DG.Package {
|
|
|
297
335
|
'[MeNle]': 'L', // Nle - norleucine
|
|
298
336
|
'[MeA]': 'A', '[MeG]': 'G', '[MeF]': 'F',
|
|
299
337
|
};
|
|
338
|
+
|
|
339
|
+
static helmRe = /(PEPTIDE1|DNA1|RNA1)\{([^}]+)}/g;
|
|
340
|
+
static helmPp1Re = /\[([^\[\]]+)]/g;
|
|
341
|
+
|
|
342
|
+
/** Splits Helm string to monomers, but does not replace monomer names to other notation (e.g. for RNA). */
|
|
343
|
+
static splitterAsHelm(seq) {
|
|
344
|
+
BioPackageDetectors.helmRe.lastIndex = 0;
|
|
345
|
+
const ea = BioPackageDetectors.helmRe.exec(seq.toString());
|
|
346
|
+
const inSeq = ea ? ea[2] : null;
|
|
347
|
+
|
|
348
|
+
const mmPostProcess = (mm) => {
|
|
349
|
+
BioPackageDetectors.helmPp1Re.lastIndex = 0;
|
|
350
|
+
const pp1M = BioPackageDetectors.helmPp1Re.exec(mm);
|
|
351
|
+
if (pp1M && pp1M.length >= 2) {
|
|
352
|
+
return pp1M[1];
|
|
353
|
+
} else {
|
|
354
|
+
return mm;
|
|
355
|
+
}
|
|
356
|
+
};
|
|
357
|
+
|
|
358
|
+
const mmList = inSeq ? inSeq.split('.') : [];
|
|
359
|
+
const mmListRes = mmList.map(mmPostProcess);
|
|
360
|
+
return mmListRes;
|
|
361
|
+
}
|
|
300
362
|
}
|