@datagrok/bio 1.9.1 → 1.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -43,4 +43,4 @@ is not specified, then the Logo will be plotted from the first (till the last) p
43
43
  See also:
44
44
 
45
45
  * [Viewers](../viewers.md)
46
- * [Table view](../../overview/table-view.md)
46
+ * [Table view](../../datagrok/table-view.md)
package/detectors.js CHANGED
@@ -8,6 +8,30 @@
8
8
  * TODO: Use detectors from WebLogo pickUp.. methods
9
9
  */
10
10
 
11
+ /** enum type to simplify setting "user-friendly" notation if necessary */
12
+ const NOTATION = {
13
+ FASTA: 'fasta',
14
+ SEPARATOR: 'separator',
15
+ HELM: 'helm',
16
+ };
17
+
18
+ const ALPHABET = {
19
+ DNA: 'DNA',
20
+ RNA: 'RNA',
21
+ PT: 'PT',
22
+ UN: 'UN',
23
+ };
24
+
25
+ /** Class for handling notation units in Macromolecule columns */
26
+ class UnitsHandler {
27
+ static TAGS = {
28
+ aligned: 'aligned',
29
+ alphabet: 'alphabet',
30
+ alphabetSize: '.alphabetSize',
31
+ alphabetIsMultichar: '.alphabetIsMultichar',
32
+ separator: 'separator',
33
+ };
34
+ }
11
35
 
12
36
  class BioPackageDetectors extends DG.Package {
13
37
 
@@ -52,9 +76,14 @@ class BioPackageDetectors extends DG.Package {
52
76
  !(col.categories.length == 1 && !col.categories[0]) && // TODO: Remove with tests for single empty category value
53
77
  DG.Detector.sampleCategories(col, (s) => BioPackageDetectors.isHelm(s), 1)
54
78
  ) {
55
- const statsAsHelm = BioPackageDetectors.getStats(col, 5, BioPackageDetectors.splitterAsHelm);
56
- col.setTag(DG.TAGS.UNITS, 'helm');
57
- col.setTag('alphabetSize', statsAsHelm.freq.length);
79
+ const statsAsHelm = BioPackageDetectors.getStats(col, 2, BioPackageDetectors.splitterAsHelm);
80
+ col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
81
+
82
+ const alphabetSize = Object.keys(statsAsHelm.freq).length;
83
+ const alphabetIsMultichar = Object.keys(statsAsHelm.freq).some((m) => m.length > 1);
84
+ col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
85
+ col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
86
+
58
87
  return DG.SEMTYPE.MACROMOLECULE;
59
88
  }
60
89
 
@@ -64,9 +93,9 @@ class BioPackageDetectors extends DG.Package {
64
93
  ];
65
94
 
66
95
  const candidateAlphabets = [
67
- ['PT', BioPackageDetectors.PeptideFastaAlphabet, 0.55],
68
- ['DNA', BioPackageDetectors.DnaFastaAlphabet, 0.55],
69
- ['RNA', BioPackageDetectors.RnaFastaAlphabet, 0.55],
96
+ [ALPHABET.PT, BioPackageDetectors.PeptideFastaAlphabet, 0.55],
97
+ [ALPHABET.DNA, BioPackageDetectors.DnaFastaAlphabet, 0.55],
98
+ [ALPHABET.RNA, BioPackageDetectors.RnaFastaAlphabet, 0.55],
70
99
  ];
71
100
 
72
101
  // Check for url column, maybe it is too heavy check
@@ -89,23 +118,24 @@ class BioPackageDetectors extends DG.Package {
89
118
  // if (Object.keys(statsAsChars.freq).length === 0) return;
90
119
 
91
120
  const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null);
92
- if (decoy != 'UN') return null;
121
+ if (decoy != ALPHABET.UN) return null;
93
122
 
94
123
  if (statsAsChars.sameLength) {
95
124
  if (Object.keys(statsAsChars.freq).length > 0) { // require non empty alphabet
96
125
  const alphabet = BioPackageDetectors.detectAlphabet(statsAsChars.freq, candidateAlphabets, '-');
97
- if (alphabet === 'UN') return null;
126
+ if (alphabet === ALPHABET.UN) return null;
98
127
 
99
- const units = 'fasta';
128
+ const units = NOTATION.FASTA;
100
129
  col.setTag(DG.TAGS.UNITS, units);
101
- col.setTag('aligned', 'SEQ.MSA');
102
- col.setTag('alphabet', alphabet);
130
+ col.setTag(UnitsHandler.TAGS.aligned, 'SEQ.MSA');
131
+ col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
103
132
  return DG.SEMTYPE.MACROMOLECULE;
104
133
  }
105
134
  } else {
106
135
  const separator = BioPackageDetectors.detectSeparator(statsAsChars.freq);
107
136
  const gapSymbol = separator ? '' : '-';
108
- const splitter = separator ? BioPackageDetectors.getSplitterWithSeparator(separator) : BioPackageDetectors.splitterAsFasta;
137
+ const splitter = separator ? BioPackageDetectors.getSplitterWithSeparator(separator) :
138
+ BioPackageDetectors.splitterAsFasta;
109
139
 
110
140
  const stats = BioPackageDetectors.getStats(col, 5, splitter);
111
141
  // Empty monomer alphabet is not allowed
@@ -113,7 +143,7 @@ class BioPackageDetectors extends DG.Package {
113
143
  // Long monomer names for sequences with separators have constraints
114
144
  if (separator && BioPackageDetectors.checkForbiddenWithSeparators(stats.freq)) return null;
115
145
 
116
- const format = separator ? 'separator' : 'fasta';
146
+ const format = separator ? NOTATION.SEPARATOR : NOTATION.FASTA;
117
147
  const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
118
148
 
119
149
  // TODO: If separator detected, then extra efforts to detect alphabet are allowed.
@@ -122,10 +152,15 @@ class BioPackageDetectors extends DG.Package {
122
152
  // const forbidden = BioPackageDetectors.checkForbiddenWoSeparator(stats.freq);
123
153
  if (separator || alphabet != 'UN') {
124
154
  col.setTag(DG.TAGS.UNITS, format);
125
- col.setTag('aligned', seqType);
126
- col.setTag('alphabet', alphabet);
127
- if (separator) col.setTag('separator', separator);
128
- if (alphabet === 'UN') col.setTag('alphabetSize', stats.freq.length);
155
+ col.setTag(UnitsHandler.TAGS.aligned, seqType);
156
+ col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
157
+ if (separator) col.setTag(UnitsHandler.TAGS.separator, separator);
158
+ if (alphabet === ALPHABET.UN) {
159
+ const alphabetSize = Object.keys(stats.freq).length;
160
+ const alphabetIsMultichar = Object.keys(stats.freq).some((m) => m.length > 1);
161
+ col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
162
+ col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
163
+ }
129
164
  return DG.SEMTYPE.MACROMOLECULE;
130
165
  }
131
166
  }
@@ -221,7 +256,7 @@ class BioPackageDetectors extends DG.Package {
221
256
  const sim = candidatesSims.find((cs) => cs[4] == maxSim);
222
257
  alphabetName = sim[0];
223
258
  } else {
224
- alphabetName = 'UN';
259
+ alphabetName = ALPHABET.UN;
225
260
  }
226
261
  return alphabetName;
227
262
  }