@datagrok/bio 1.5.7 → 1.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/css/helm.css +3 -0
  2. package/detectors.js +33 -22
  3. package/dist/package-test.js +1225 -453
  4. package/dist/package.js +979 -287
  5. package/files/samples/sample_FASTA.csv +66 -66
  6. package/files/samples/testDemog.csv +5851 -0
  7. package/files/samples/testHelm.csv +6 -0
  8. package/files/samples/{id.csv → testId.csv} +0 -0
  9. package/files/samples/{sar-small.csv → testSmiles.csv} +0 -0
  10. package/files/samples/testSmiles2.csv +12248 -0
  11. package/helm/JSDraw/Pistoia.HELM-uncompressed.js +9694 -0
  12. package/helm/JSDraw/Pistoia.HELM.js +27 -0
  13. package/helm/JSDraw/ReadMe.txt +8 -0
  14. package/helm/JSDraw/Scilligence.JSDraw2.Lite-uncompressed.js +31126 -0
  15. package/helm/JSDraw/Scilligence.JSDraw2.Lite.js +12 -0
  16. package/helm/JSDraw/Scilligence.JSDraw2.Resources.js +762 -0
  17. package/helm/JSDraw/dojo.js +250 -0
  18. package/helm/JSDraw/test.html +21 -0
  19. package/package.json +9 -2
  20. package/src/monomer-library.ts +199 -0
  21. package/src/package-test.ts +2 -0
  22. package/src/package.ts +71 -20
  23. package/src/tests/convert-test.ts +143 -22
  24. package/src/tests/detectors-test.ts +139 -156
  25. package/src/tests/renderer-test.ts +58 -0
  26. package/src/tests/splitter-test.ts +22 -0
  27. package/src/tests/types.ts +7 -0
  28. package/src/utils/atomic-works.ts +218 -97
  29. package/src/utils/cell-renderer.ts +211 -0
  30. package/src/utils/chem-palette.ts +280 -0
  31. package/src/utils/convert.ts +25 -16
  32. package/src/utils/misc.ts +29 -0
  33. package/src/utils/multiple-sequence-alignment.ts +5 -3
  34. package/src/utils/notation-converter.ts +120 -84
  35. package/src/utils/sequence-activity-cliffs.ts +2 -2
  36. package/src/utils/types.ts +13 -0
  37. package/src/utils/utils.ts +35 -30
  38. package/test-Bio-34f75e5127b8-936bf89b.html +256 -0
  39. package/files/sample_FASTA.csv +0 -66
  40. package/files/sample_FASTA_with_activities.csv +0 -66
  41. package/files/sample_MSA.csv +0 -541
package/css/helm.css ADDED
@@ -0,0 +1,3 @@
1
+ .d4-g-cell[semType="Macromolecule"] * {
2
+ pointer-events: none !important;
3
+ }
package/detectors.js CHANGED
@@ -13,17 +13,19 @@ class BioPackageDetectors extends DG.Package {
13
13
 
14
14
  static mmSemType = 'Macromolecule';
15
15
 
16
- static Units = {
17
- FastaSeqPt: 'fasta:SEQ:PT', FastaSeqNt: 'fasta:SEQ:NT', FastaMsaPt: 'fasta:MSA:PT', FastaMsaNt: 'fasta:MSA:NT',
18
- };
19
-
20
- static AminoacidsFastaAlphabet = new Set([
16
+ static PeptideFastaAlphabet = new Set([
21
17
  'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
22
18
  'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',
23
19
  ]);
24
20
 
25
- static NucleotidesFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
21
+ static DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
22
+
23
+ static RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);
26
24
 
25
+ static SmilesRawAlphabet = new Set([
26
+ 'O', 'C', 'c', 'N', 'S', 'F', '(', ')',
27
+ '1', '2', '3', '4', '5', '6', '7',
28
+ '+', '-', '@', '[', ']', '/', '\\', '#', '=']);
27
29
 
28
30
  /** @param s {String} - string to check
29
31
  * @returns {boolean} */
@@ -42,17 +44,26 @@ class BioPackageDetectors extends DG.Package {
42
44
  return BioPackageDetectors.mmSemType;
43
45
  }
44
46
 
45
- const alphabetCandidates = [
46
- ['NT', BioPackageDetectors.NucleotidesFastaAlphabet],
47
- ['PT', BioPackageDetectors.AminoacidsFastaAlphabet],
47
+ const decoyAlphabets = [
48
+ ['SMILES', BioPackageDetectors.SmilesRawAlphabet],
49
+ ];
50
+
51
+ const candidateAlphabets = [
52
+ ['PT', BioPackageDetectors.PeptideFastaAlphabet],
53
+ ['DNA', BioPackageDetectors.DnaFastaAlphabet],
54
+ ['RNA', BioPackageDetectors.RnaFastaAlphabet],
48
55
  ];
49
56
 
50
57
  // TODO: Detect HELM sequence
51
58
  // TODO: Lazy calculations could be helpful for performance and convenient for expressing classification logic.
52
59
  const statsAsChars = BioPackageDetectors.getStats(col, 5, BioPackageDetectors.splitterAsChars);
60
+
61
+ const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null, 0.5);
62
+ if (decoy != 'UN') return null;
63
+
53
64
  if (statsAsChars.sameLength) {
54
65
  if (Object.keys(statsAsChars.freq).length > 0) { // require non empty alphabet
55
- const alphabet = BioPackageDetectors.detectAlphabet(statsAsChars.freq, alphabetCandidates, '-');
66
+ const alphabet = BioPackageDetectors.detectAlphabet(statsAsChars.freq, candidateAlphabets, '-');
56
67
  if (alphabet === 'UN') return null;
57
68
 
58
69
  const units = `fasta:SEQ.MSA:${alphabet}`;
@@ -74,10 +85,10 @@ class BioPackageDetectors extends DG.Package {
74
85
  const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
75
86
 
76
87
  // TODO: If separator detected, then extra efforts to detect alphabet are allowed.
77
- const alphabet = BioPackageDetectors.detectAlphabet(stats.freq, alphabetCandidates, gapSymbol);
88
+ const alphabet = BioPackageDetectors.detectAlphabet(stats.freq, candidateAlphabets, gapSymbol);
78
89
 
79
- const forbidden = BioPackageDetectors.checkForbiddenWoSeparator(stats.freq);
80
- if (separator || !forbidden) {
90
+ // const forbidden = BioPackageDetectors.checkForbiddenWoSeparator(stats.freq);
91
+ if (separator || alphabet != 'UN') {
81
92
  const units = `${format}:${seqType}:${alphabet}`;
82
93
  col.setTag(DG.TAGS.UNITS, units);
83
94
  if (separator) col.setTag('separator', separator);
@@ -103,8 +114,8 @@ class BioPackageDetectors extends DG.Package {
103
114
  const cleanFreq = Object.assign({}, ...Object.entries(freq)
104
115
  .filter(([m, f]) => m != ' ' &&
105
116
  !noSeparatorChemRe.test(m) && !noSeparatorAlphaDigitRe.test(m) &&
106
- !BioPackageDetectors.AminoacidsFastaAlphabet.has(m) &&
107
- !BioPackageDetectors.NucleotidesFastaAlphabet.has(m))
117
+ !BioPackageDetectors.PeptideFastaAlphabet.has(m) &&
118
+ !BioPackageDetectors.DnaFastaAlphabet.has(m))
108
119
  .map(([m, f]) => ({[m]: f})));
109
120
  if (Object.keys(cleanFreq).length == 0) return null;
110
121
 
@@ -124,11 +135,11 @@ class BioPackageDetectors extends DG.Package {
124
135
  return Object.keys(freq).filter((m) => forbiddenRe.test(m)).length > 0;
125
136
  }
126
137
 
127
- /** Without a separator, special symbols or digits are not allowed as monomers. */
128
- static checkForbiddenWoSeparator(freq) {
129
- const forbiddenRe = /[\d!@#$%^&*()_+\-=\[\]{};':"\\|,.<>\/?]/i;
130
- return Object.keys(freq).filter((m) => forbiddenRe.test(m)).length > 0;
131
- }
138
+ // /** Without a separator, special symbols or digits are not allowed as monomers. */
139
+ // static checkForbiddenWoSeparator(freq) {
140
+ // const forbiddenRe = /[\d!@#$%^&*()_+\-=\[\]{};':"\\|,.<>\/?]/i;
141
+ // return Object.keys(freq).filter((m) => forbiddenRe.test(m)).length > 0;
142
+ // }
132
143
 
133
144
  /** Stats of sequences with specified splitter func, returns { freq, sameLength } */
134
145
  static getStats(seqCol, minLength, splitter) {
@@ -161,7 +172,7 @@ class BioPackageDetectors extends DG.Package {
161
172
  * @param freq frequencies of monomers in sequence set
162
173
  * @param candidates an array of pairs [name, monomer set]
163
174
  * */
164
- static detectAlphabet(freq, candidates, gapSymbol) {
175
+ static detectAlphabet(freq, candidates, gapSymbol, cut = 0.65) {
165
176
  const candidatesSims = candidates.map((c) => {
166
177
  const sim = BioPackageDetectors.getAlphabetSimilarity(freq, c[1], gapSymbol);
167
178
  return [c[0], c[1], freq, sim];
@@ -169,7 +180,7 @@ class BioPackageDetectors extends DG.Package {
169
180
 
170
181
  let alphabetName;
171
182
  const maxSim = Math.max(...candidatesSims.map((cs) => cs[3]));
172
- if (maxSim > 0.65) {
183
+ if (maxSim > cut) {
173
184
  const sim = candidatesSims.find((cs) => cs[3] == maxSim);
174
185
  alphabetName = sim[0];
175
186
  } else {