@datagrok-libraries/bio 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  },
6
6
  "beta": true,
7
7
  "friendlyName": "Datagrok bio library",
8
- "version": "0.0.3",
8
+ "version": "0.0.4",
9
9
  "description": "",
10
10
  "dependencies": {
11
11
  "datagrok-api": ">=0.108.0",
@@ -0,0 +1,155 @@
1
+ import { assert } from '@datagrok-libraries/utils/src/operations';
2
+ class SideChainScales {
3
+ static getAvailableScales() {
4
+ return Object.entries(this.scales).map(([k, _]) => k);
5
+ }
6
+ static getScale(name) {
7
+ assert(!(this.scales[name] === undefined), `Scale '${name}' was not found.`);
8
+ return this.scales[name];
9
+ }
10
+ }
11
+ SideChainScales.scales = {
12
+ // Wimley-White interfacial hydrophobicity scale
13
+ 'WimleyWhite': {
14
+ '-': 0,
15
+ 'A': 0.17,
16
+ 'C': -0.24,
17
+ 'D': -0.07,
18
+ 'E': -0.01,
19
+ 'F': -1.13,
20
+ 'G': 0.01,
21
+ 'H': 0.17,
22
+ 'I': -0.31,
23
+ 'K': 0.99,
24
+ 'L': -0.56,
25
+ 'M': -0.23,
26
+ 'N': 0.42,
27
+ 'P': 0.45,
28
+ 'Q': 0.58,
29
+ 'R': 0.81,
30
+ 'S': 0.13,
31
+ 'T': 0.14,
32
+ 'V': 0.07,
33
+ 'W': -1.85,
34
+ 'Y': -0.94,
35
+ },
36
+ 'categorial': {
37
+ '-': 0,
38
+ 'A': 1,
39
+ 'C': 2,
40
+ 'D': 3,
41
+ 'E': 4,
42
+ 'F': 5,
43
+ 'G': 6,
44
+ 'H': 7,
45
+ 'I': 8,
46
+ 'K': 9,
47
+ 'L': 10,
48
+ 'M': 11,
49
+ 'N': 12,
50
+ 'P': 13,
51
+ 'Q': 14,
52
+ 'R': 15,
53
+ 'S': 16,
54
+ 'T': 17,
55
+ 'V': 18,
56
+ 'W': 19,
57
+ 'Y': 20,
58
+ },
59
+ };
60
+ /**
61
+ * Class to categorial encode/decode aligned amino acid residues sequence.
62
+ *
63
+ * @export
64
+ * @class AlignedSequenceEncoder
65
+ */
66
+ export class AlignedSequenceEncoder {
67
+ constructor(scale = 'categorial') {
68
+ this.aa2num = SideChainScales.getScale(scale);
69
+ this.num2aa = {};
70
+ Object.entries(this.aa2num).forEach(([k, v]) => (this.num2aa[v] = k));
71
+ }
72
+ /**
73
+ * Truncate NH2 and -COOH terminals of the given sequence.
74
+ *
75
+ * @static
76
+ * @param {string} seq The sequence provided.
77
+ * @return {string} Truncated sequence.
78
+ * @memberof AlignedSequenceEncoder
79
+ */
80
+ static _truncateSequence(seq) {
81
+ let start = 0;
82
+ let end = seq.length;
83
+ const termina = ['NH2', 'COOH'];
84
+ if (seq.startsWith(termina[0])) {
85
+ const l = termina[0].length; // Cut only 'NH2' without following '-'.
86
+ assert(seq[l] == '-', `Wrong sequence format: ${termina[0]} without following '-' in '${seq}'.`);
87
+ start = l;
88
+ }
89
+ if (seq.endsWith(termina[1])) {
90
+ const l = termina[1].length + 1; // Cut both 'COOH' and precending '-'.
91
+ assert(seq[end - l] == '-', `Wrong sequence format: ${termina[1]} without '-' precending in '${seq}'.`);
92
+ end -= l;
93
+ }
94
+ return seq.substring(start, end);
95
+ }
96
+ /**
97
+ * Cuts auxiliary defises before a residue.
98
+ *
99
+ * @static
100
+ * @param {string} seq The sequence to process.
101
+ * @return {string} Processed sequence.
102
+ * @memberof AlignedSequenceEncoder
103
+ */
104
+ static _dropDefises(seq) {
105
+ return seq.replace(/(-)([^-]+)/g, '$2');
106
+ }
107
+ /**
108
+ * Performs truncation and cutting auxiliary defises.
109
+ *
110
+ * @static
111
+ * @param {string} sequence The sequence work under process.
112
+ * @return {string} Result of cleaning.
113
+ * @memberof AlignedSequenceEncoder
114
+ */
115
+ static clean(sequence) {
116
+ return AlignedSequenceEncoder._dropDefises(AlignedSequenceEncoder._truncateSequence(sequence));
117
+ }
118
+ /**
119
+ * Categorial encode of the sequence provided.
120
+ *
121
+ * @param {string} sequence The sequence.
122
+ * @return {number[]} Encoded vector.
123
+ * @memberof AlignedSequenceEncoder
124
+ */
125
+ encode(sequence) {
126
+ const nItems = sequence.length;
127
+ const values = new Array(nItems).fill(0);
128
+ for (let i = 0; i < nItems; ++i) {
129
+ const char = sequence[i];
130
+ assert(char in this.aa2num, `Unknown char '${char}' found in sequence '${sequence}'`);
131
+ values[i] = this.encodeLettter(char);
132
+ }
133
+ return values;
134
+ }
135
+ encodeLettter(letter) {
136
+ return this.aa2num[letter];
137
+ }
138
+ /**
139
+ * Decode the encoded vector into the sequence back.
140
+ *
141
+ * @param {number[]} value The vector encoded.
142
+ * @return {string} Decoded sequence.
143
+ * @memberof AlignedSequenceEncoder
144
+ */
145
+ decode(value) {
146
+ let s = '';
147
+ for (let i = 0; i < value.length; ++i) {
148
+ const code = value[i];
149
+ assert(code in this.num2aa, `Unknown code '${code}' found in vector '${value}'`);
150
+ s += this.num2aa[code];
151
+ }
152
+ return s;
153
+ }
154
+ }
155
+ //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoic2VxdWVuY2UtZW5jb2Rlci5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbInNlcXVlbmNlLWVuY29kZXIudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IkFBQUEsT0FBTyxFQUFDLE1BQU0sRUFBQyxNQUFNLDBDQUEwQyxDQUFDO0FBS2hFLE1BQU0sZUFBZTtJQW1EbkIsTUFBTSxDQUFDLGtCQUFrQjtRQUN2QixPQUFPLE1BQU0sQ0FBQyxPQUFPLENBQUMsSUFBSSxDQUFDLE1BQU0sQ0FBQyxDQUFDLEdBQUcsQ0FBQyxDQUFDLENBQUMsQ0FBQyxFQUFFLENBQUMsQ0FBQyxFQUFFLEVBQUUsQ0FBQyxDQUFDLENBQUMsQ0FBQztJQUN4RCxDQUFDO0lBRUQsTUFBTSxDQUFDLFFBQVEsQ0FBQyxJQUFZO1FBQzFCLE1BQU0sQ0FBQyxDQUFDLENBQUMsSUFBSSxDQUFDLE1BQU0sQ0FBQyxJQUFJLENBQUMsS0FBSyxTQUFTLENBQUMsRUFBRSxVQUFVLElBQUksa0JBQWtCLENBQUMsQ0FBQztRQUM3RSxPQUFPLElBQUksQ0FBQyxNQUFNLENBQUMsSUFBSSxDQUFDLENBQUM7SUFDM0IsQ0FBQzs7QUF6RE0sc0JBQU0sR0FBNkI7SUFDeEMsZ0RBQWdEO0lBQ2hELGFBQWEsRUFBRTtRQUNiLEdBQUcsRUFBRSxDQUFDO1FBQ04sR0FBRyxFQUFFLElBQUk7UUFDVCxHQUFHLEVBQUUsQ0FBQyxJQUFJO1FBQ1YsR0FBRyxFQUFFLENBQUMsSUFBSTtRQUNWLEdBQUcsRUFBRSxDQUFDLElBQUk7UUFDVixHQUFHLEVBQUUsQ0FBQyxJQUFJO1FBQ1YsR0FBRyxFQUFFLElBQUk7UUFDVCxHQUFHLEVBQUUsSUFBSTtRQUNULEdBQUcsRUFBRSxDQUFDLElBQUk7UUFDVixHQUFHLEVBQUUsSUFBSTtRQUNULEdBQUcsRUFBRSxDQUFDLElBQUk7UUFDVixHQUFHLEVBQUUsQ0FBQyxJQUFJO1FBQ1YsR0FBRyxFQUFFLElBQUk7UUFDVCxHQUFHLEVBQUUsSUFBSTtRQUNULEdBQUcsRUFBRSxJQUFJO1FBQ1QsR0FBRyxFQUFFLElBQUk7UUFDVCxHQUFHLEVBQUUsSUFBSTtRQUNULEdBQUcsRUFBRSxJQUFJO1FBQ1QsR0FBRyxFQUFFLElBQUk7UUFDVCxHQUFHLEVBQUUsQ0FBQyxJQUFJO1FBQ1YsR0FBRyxFQUFFLENBQUMsSUFBSTtLQUNYO0lBQ0QsWUFBWSxFQUFFO1FBQ1osR0FBRyxFQUFFLENBQUM7UUFDTixHQUFHLEVBQUUsQ0FBQztRQUNOLEdBQUcsRUFBRSxDQUFDO1FBQ04sR0FBRyxFQUFFLENBQUM7UUFDTixHQUFHLEVBQUUsQ0FBQztRQUNOLEdBQUcsRUFBRSxDQUFDO1FBQ04sR0FBRyxFQUFFLENBQUM7UUFDTixHQUFHLEVBQUUsQ0FBQztRQUNOLEdBQUcsRUFBRSxDQUFDO1FBQ04sR0FBRyxFQUFFLENBQUM7UUFDTixHQUFHLEVBQUUsRUFBRTtRQUNQLEdBQUcsRUFBRSxFQUFFO1FBQ1AsR0FBRyxFQUFFLEVBQUU7UUFDUCxHQUFHLEVBQUUsRUFBRTtRQUNQLEdBQUcsRUFBRSxFQUFFO1FBQ1AsR0FBRyxFQUFFLEVBQUU7UUFDUCxHQUFHLEVBQUUsRUFBRTtRQUNQLEdBQUcsRUFBRSxFQUFFO1FBQ1AsR0FBRyxFQUFFLEVBQUU7UUFDUCxHQUFHLEVBQUUsRUFBRTtRQUNQLEdBQUcsRUFBRSxFQUFFO0tBQ1I7Q0FDRixDQUFDO0FBWUo7Ozs7O0dBS0c7QUFDSCxNQUFNLE9BQU8sc0JBQXNCO0lBSWpDLFlBQVksUUFBZ0IsWUFBWTtRQUN0QyxJQUFJLENBQUMsTUFBTSxHQUFHLGVBQWUsQ0FBQyxRQUFRLENBQUMsS0FBSyxDQUFDLENBQUM7UUFDOUMsSUFBSSxDQUFDLE1BQU0sR0FBRyxFQUFFLENBQUM7UUFDakIsTUFBTSxDQUFDLE9BQU8sQ0FBQyxJQUFJLENBQUMsTUFBTSxDQUFDLENBQUMsT0FBTyxDQUFDLENBQUMsQ0FBQyxDQUFDLEVBQUUsQ0FBQyxDQUFDLEVBQUUsRUFBRSxDQUFDLENBQUMsSUFBSSxDQUFDLE1BQU0sQ0FBQyxDQUFDLENBQUMsR0FBRyxDQUFDLENBQUMsQ0FBQyxDQUFDO0lBQ3hFLENBQUM7SUFFRDs7Ozs7OztTQU9LO0lBQ0wsTUFBTSxDQUFDLGlCQUFpQixDQUFDLEdBQVc7UUFDbEMsSUFBSSxLQUFLLEdBQUcsQ0FBQyxDQUFDO1FBQ2QsSUFBSSxHQUFHLEdBQUcsR0FBRyxDQUFDLE1BQU0sQ0FBQztRQUNyQixNQUFNLE9BQU8sR0FBRyxDQUFDLEtBQUssRUFBRSxNQUFNLENBQUMsQ0FBQztRQUVoQyxJQUFJLEdBQUcsQ0FBQyxVQUFVLENBQUMsT0FBTyxDQUFDLENBQUMsQ0FBQyxDQUFDLEVBQUU7WUFDOUIsTUFBTSxDQUFDLEdBQUcsT0FBTyxDQUFDLENBQUMsQ0FBQyxDQUFDLE1BQU0sQ0FBQyxDQUFDLHdDQUF3QztZQUNyRSxNQUFNLENBQUMsR0FBRyxDQUFDLENBQUMsQ0FBQyxJQUFJLEdBQUcsRUFBRSwwQkFBMEIsT0FBTyxDQUFDLENBQUMsQ0FBQyw4QkFBOEIsR0FBRyxJQUFJLENBQUMsQ0FBQztZQUNqRyxLQUFLLEdBQUcsQ0FBQyxDQUFDO1NBQ1g7UUFDRCxJQUFJLEdBQUcsQ0FBQyxRQUFRLENBQUMsT0FBTyxDQUFDLENBQUMsQ0FBQyxDQUFDLEVBQUU7WUFDNUIsTUFBTSxDQUFDLEdBQUcsT0FBTyxDQUFDLENBQUMsQ0FBQyxDQUFDLE1BQU0sR0FBQyxDQUFDLENBQUMsQ0FBQyxzQ0FBc0M7WUFDckUsTUFBTSxDQUFDLEdBQUcsQ0FBQyxHQUFHLEdBQUMsQ0FBQyxDQUFDLElBQUksR0FBRyxFQUFFLDBCQUEwQixPQUFPLENBQUMsQ0FBQyxDQUFDLCtCQUErQixHQUFHLElBQUksQ0FBQyxDQUFDO1lBQ3RHLEdBQUcsSUFBSSxDQUFDLENBQUM7U0FDVjtRQUNELE9BQU8sR0FBRyxDQUFDLFNBQVMsQ0FBQyxLQUFLLEVBQUUsR0FBRyxDQUFDLENBQUM7SUFDbkMsQ0FBQztJQUVEOzs7Ozs7O1NBT0s7SUFDTCxNQUFNLENBQUMsWUFBWSxDQUFDLEdBQVc7UUFDN0IsT0FBTyxHQUFHLENBQUMsT0FBTyxDQUFDLGFBQWEsRUFBRSxJQUFJLENBQUMsQ0FBQztJQUMxQyxDQUFDO0lBRUQ7Ozs7Ozs7U0FPSztJQUNMLE1BQU0sQ0FBQyxLQUFLLENBQUMsUUFBZ0I7UUFDM0IsT0FBTyxzQkFBc0IsQ0FBQyxZQUFZLENBQUMsc0JBQXNCLENBQUMsaUJBQWlCLENBQUMsUUFBUSxDQUFDLENBQUMsQ0FBQztJQUNqRyxDQUFDO0lBRUQ7Ozs7OztTQU1LO0lBQ0UsTUFBTSxDQUFDLFFBQWdCO1FBQzVCLE1BQU0sTUFBTSxHQUFHLFFBQVEsQ0FBQyxNQUFNLENBQUM7UUFDL0IsTUFBTSxNQUFNLEdBQUcsSUFBSSxLQUFLLENBQUMsTUFBTSxDQUFDLENBQUMsSUFBSSxDQUFDLENBQUMsQ0FBQyxDQUFDO1FBRXpDLEtBQUssSUFBSSxDQUFDLEdBQUcsQ0FBQyxFQUFFLENBQUMsR0FBRyxNQUFNLEVBQUUsRUFBRSxDQUFDLEVBQUU7WUFDL0IsTUFBTSxJQUFJLEdBQUcsUUFBUSxDQUFDLENBQUMsQ0FBQyxDQUFDO1lBRXpCLE1BQU0sQ0FBQyxJQUFJLElBQUksSUFBSSxDQUFDLE1BQU0sRUFBRSxpQkFBaUIsSUFBSSx3QkFBd0IsUUFBUSxHQUFHLENBQUMsQ0FBQztZQUV0RixNQUFNLENBQUMsQ0FBQyxDQUFDLEdBQUcsSUFBSSxDQUFDLGFBQWEsQ0FBQyxJQUFJLENBQUMsQ0FBQztTQUN0QztRQUNELE9BQU8sTUFBTSxDQUFDO0lBQ2hCLENBQUM7SUFFTSxhQUFhLENBQUMsTUFBYztRQUNqQyxPQUFPLElBQUksQ0FBQyxNQUFNLENBQUMsTUFBTSxDQUFDLENBQUM7SUFDN0IsQ0FBQztJQUVEOzs7Ozs7U0FNSztJQUNFLE1BQU0sQ0FBQyxLQUFlO1FBQzNCLElBQUksQ0FBQyxHQUFXLEVBQUUsQ0FBQztRQUVuQixLQUFLLElBQUksQ0FBQyxHQUFHLENBQUMsRUFBRSxDQUFDLEdBQUcsS0FBSyxDQUFDLE1BQU0sRUFBRSxFQUFFLENBQUMsRUFBRTtZQUNyQyxNQUFNLElBQUksR0FBRyxLQUFLLENBQUMsQ0FBQyxDQUFDLENBQUM7WUFFdEIsTUFBTSxDQUFDLElBQUksSUFBSSxJQUFJLENBQUMsTUFBTSxFQUFFLGlCQUFpQixJQUFJLHNCQUFzQixLQUFLLEdBQUcsQ0FBQyxDQUFDO1lBRWpGLENBQUMsSUFBSSxJQUFJLENBQUMsTUFBTSxDQUFDLElBQUksQ0FBQyxDQUFDO1NBQ3hCO1FBQ0QsT0FBTyxDQUFDLENBQUM7SUFDWCxDQUFDO0NBQ0YiLCJzb3VyY2VzQ29udGVudCI6WyJpbXBvcnQge2Fzc2VydH0gZnJvbSAnQGRhdGFncm9rLWxpYnJhcmllcy91dGlscy9zcmMvb3BlcmF0aW9ucyc7XHJcblxyXG50eXBlIFNpZGVDaGFpblNjYWxlID0ge1tuYW1lOiBzdHJpbmddOiBudW1iZXJ9O1xyXG50eXBlIFNpZGVDaGFpblNjYWxlQ29sbGVjdGlvbiA9IHtbbmFtZTogc3RyaW5nXTogU2lkZUNoYWluU2NhbGV9O1xyXG5cclxuY2xhc3MgU2lkZUNoYWluU2NhbGVzIHtcclxuICBzdGF0aWMgc2NhbGVzOiBTaWRlQ2hhaW5TY2FsZUNvbGxlY3Rpb24gPSB7XHJcbiAgICAvLyBXaW1sZXktV2hpdGUgaW50ZXJmYWNpYWwgaHlkcm9waG9iaWNpdHkgc2NhbGVcclxuICAgICdXaW1sZXlXaGl0ZSc6IHtcclxuICAgICAgJy0nOiAwLFxyXG4gICAgICAnQSc6IDAuMTcsXHJcbiAgICAgICdDJzogLTAuMjQsXHJcbiAgICAgICdEJzogLTAuMDcsIC8vIEFzcC06IDEuMjNcclxuICAgICAgJ0UnOiAtMC4wMSwgLy8gR2x1LTogMi4wMlxyXG4gICAgICAnRic6IC0xLjEzLCAvL1xyXG4gICAgICAnRyc6IDAuMDEsXHJcbiAgICAgICdIJzogMC4xNywgLy8gSGlzKzogMC45NlxyXG4gICAgICAnSSc6IC0wLjMxLFxyXG4gICAgICAnSyc6IDAuOTksIC8vIEx5cytcclxuICAgICAgJ0wnOiAtMC41NixcclxuICAgICAgJ00nOiAtMC4yMyxcclxuICAgICAgJ04nOiAwLjQyLFxyXG4gICAgICAnUCc6IDAuNDUsXHJcbiAgICAgICdRJzogMC41OCxcclxuICAgICAgJ1InOiAwLjgxLCAvLyBBcmcrXHJcbiAgICAgICdTJzogMC4xMyxcclxuICAgICAgJ1QnOiAwLjE0LFxyXG4gICAgICAnVic6IDAuMDcsXHJcbiAgICAgICdXJzogLTEuODUsXHJcbiAgICAgICdZJzogLTAuOTQsXHJcbiAgICB9LFxyXG4gICAgJ2NhdGVnb3JpYWwnOiB7XHJcbiAgICAgICctJzogMCxcclxuICAgICAgJ0EnOiAxLFxyXG4gICAgICAnQyc6IDIsXHJcbiAgICAgICdEJzogMyxcclxuICAgICAgJ0UnOiA0LFxyXG4gICAgICAnRic6IDUsXHJcbiAgICAgICdHJzogNixcclxuICAgICAgJ0gnOiA3LFxyXG4gICAgICAnSSc6IDgsXHJcbiAgICAgICdLJzogOSxcclxuICAgICAgJ0wnOiAxMCxcclxuICAgICAgJ00nOiAxMSxcclxuICAgICAgJ04nOiAxMixcclxuICAgICAgJ1AnOiAxMyxcclxuICAgICAgJ1EnOiAxNCxcclxuICAgICAgJ1InOiAxNSxcclxuICAgICAgJ1MnOiAxNixcclxuICAgICAgJ1QnOiAxNyxcclxuICAgICAgJ1YnOiAxOCxcclxuICAgICAgJ1cnOiAxOSxcclxuICAgICAgJ1knOiAyMCxcclxuICAgIH0sXHJcbiAgfTtcclxuXHJcbiAgc3RhdGljIGdldEF2YWlsYWJsZVNjYWxlcygpOiBzdHJpbmdbXSB7XHJcbiAgICByZXR1cm4gT2JqZWN0LmVudHJpZXModGhpcy5zY2FsZXMpLm1hcCgoW2ssIF9dKSA9PiBrKTtcclxuICB9XHJcblxyXG4gIHN0YXRpYyBnZXRTY2FsZShuYW1lOiBzdHJpbmcpOiBTaWRlQ2hhaW5TY2FsZSB7XHJcbiAgICBhc3NlcnQoISh0aGlzLnNjYWxlc1tuYW1lXSA9PT0gdW5kZWZpbmVkKSwgYFNjYWxlICcke25hbWV9JyB3YXMgbm90IGZvdW5kLmApO1xyXG4gICAgcmV0dXJuIHRoaXMuc2NhbGVzW25hbWVdO1xyXG4gIH1cclxufVxyXG5cclxuLyoqXHJcbiAqIENsYXNzIHRvIGNhdGVnb3JpYWwgZW5jb2RlL2RlY29kZSBhbGlnbmVkIGFtaW5vIGFjaWQgcmVzaWR1ZXMgc2VxdWVuY2UuXHJcbiAqXHJcbiAqIEBleHBvcnRcclxuICogQGNsYXNzIEFsaWduZWRTZXF1ZW5jZUVuY29kZXJcclxuICovXHJcbmV4cG9ydCBjbGFzcyBBbGlnbmVkU2VxdWVuY2VFbmNvZGVyIHtcclxuICBwcm90ZWN0ZWQgYWEybnVtOiBTaWRlQ2hhaW5TY2FsZTtcclxuICBwcm90ZWN0ZWQgbnVtMmFhOiB7W2NvZGU6IG51bWJlcl06IHN0cmluZ307XHJcblxyXG4gIGNvbnN0cnVjdG9yKHNjYWxlOiBzdHJpbmcgPSAnY2F0ZWdvcmlhbCcpIHtcclxuICAgIHRoaXMuYWEybnVtID0gU2lkZUNoYWluU2NhbGVzLmdldFNjYWxlKHNjYWxlKTtcclxuICAgIHRoaXMubnVtMmFhID0ge307XHJcbiAgICBPYmplY3QuZW50cmllcyh0aGlzLmFhMm51bSkuZm9yRWFjaCgoW2ssIHZdKSA9PiAodGhpcy5udW0yYWFbdl0gPSBrKSk7XHJcbiAgfVxyXG5cclxuICAvKipcclxuICAgICAqIFRydW5jYXRlIE5IMiBhbmQgLUNPT0ggdGVybWluYWxzIG9mIHRoZSBnaXZlbiBzZXF1ZW5jZS5cclxuICAgICAqXHJcbiAgICAgKiBAc3RhdGljXHJcbiAgICAgKiBAcGFyYW0ge3N0cmluZ30gc2VxIFRoZSBzZXF1ZW5jZSBwcm92aWRlZC5cclxuICAgICAqIEByZXR1cm4ge3N0cmluZ30gVHJ1bmNhdGVkIHNlcXVlbmNlLlxyXG4gICAgICogQG1lbWJlcm9mIEFsaWduZWRTZXF1ZW5jZUVuY29kZXJcclxuICAgICAqL1xyXG4gIHN0YXRpYyBfdHJ1bmNhdGVTZXF1ZW5jZShzZXE6IHN0cmluZyk6IHN0cmluZyB7XHJcbiAgICBsZXQgc3RhcnQgPSAwO1xyXG4gICAgbGV0IGVuZCA9IHNlcS5sZW5ndGg7XHJcbiAgICBjb25zdCB0ZXJtaW5hID0gWydOSDInLCAnQ09PSCddO1xyXG5cclxuICAgIGlmIChzZXEuc3RhcnRzV2l0aCh0ZXJtaW5hWzBdKSkge1xyXG4gICAgICBjb25zdCBsID0gdGVybWluYVswXS5sZW5ndGg7IC8vIEN1dCBvbmx5ICdOSDInIHdpdGhvdXQgZm9sbG93aW5nICctJy5cclxuICAgICAgYXNzZXJ0KHNlcVtsXSA9PSAnLScsIGBXcm9uZyBzZXF1ZW5jZSBmb3JtYXQ6ICR7dGVybWluYVswXX0gd2l0aG91dCBmb2xsb3dpbmcgJy0nIGluICcke3NlcX0nLmApO1xyXG4gICAgICBzdGFydCA9IGw7XHJcbiAgICB9XHJcbiAgICBpZiAoc2VxLmVuZHNXaXRoKHRlcm1pbmFbMV0pKSB7XHJcbiAgICAgIGNvbnN0IGwgPSB0ZXJtaW5hWzFdLmxlbmd0aCsxOyAvLyBDdXQgYm90aCAnQ09PSCcgYW5kIHByZWNlbmRpbmcgJy0nLlxyXG4gICAgICBhc3NlcnQoc2VxW2VuZC1sXSA9PSAnLScsIGBXcm9uZyBzZXF1ZW5jZSBmb3JtYXQ6ICR7dGVybWluYVsxXX0gd2l0aG91dCAnLScgcHJlY2VuZGluZyBpbiAnJHtzZXF9Jy5gKTtcclxuICAgICAgZW5kIC09IGw7XHJcbiAgICB9XHJcbiAgICByZXR1cm4gc2VxLnN1YnN0cmluZyhzdGFydCwgZW5kKTtcclxuICB9XHJcblxyXG4gIC8qKlxyXG4gICAgICogQ3V0cyBhdXhpbGlhcnkgZGVmaXNlcyBiZWZvcmUgYSByZXNpZHVlLlxyXG4gICAgICpcclxuICAgICAqIEBzdGF0aWNcclxuICAgICAqIEBwYXJhbSB7c3RyaW5nfSBzZXEgVGhlIHNlcXVlbmNlIHRvIHByb2Nlc3MuXHJcbiAgICAgKiBAcmV0dXJuIHtzdHJpbmd9IFByb2Nlc3NlZCBzZXF1ZW5jZS5cclxuICAgICAqIEBtZW1iZXJvZiBBbGlnbmVkU2VxdWVuY2VFbmNvZGVyXHJcbiAgICAgKi9cclxuICBzdGF0aWMgX2Ryb3BEZWZpc2VzKHNlcTogc3RyaW5nKTogc3RyaW5nIHtcclxuICAgIHJldHVybiBzZXEucmVwbGFjZSgvKC0pKFteLV0rKS9nLCAnJDInKTtcclxuICB9XHJcblxyXG4gIC8qKlxyXG4gICAgICogUGVyZm9ybXMgdHJ1bmNhdGlvbiBhbmQgY3V0dGluZyBhdXhpbGlhcnkgZGVmaXNlcy5cclxuICAgICAqXHJcbiAgICAgKiBAc3RhdGljXHJcbiAgICAgKiBAcGFyYW0ge3N0cmluZ30gc2VxdWVuY2UgVGhlIHNlcXVlbmNlIHdvcmsgdW5kZXIgcHJvY2Vzcy5cclxuICAgICAqIEByZXR1cm4ge3N0cmluZ30gUmVzdWx0IG9mIGNsZWFuaW5nLlxyXG4gICAgICogQG1lbWJlcm9mIEFsaWduZWRTZXF1ZW5jZUVuY29kZXJcclxuICAgICAqL1xyXG4gIHN0YXRpYyBjbGVhbihzZXF1ZW5jZTogc3RyaW5nKTogc3RyaW5nIHtcclxuICAgIHJldHVybiBBbGlnbmVkU2VxdWVuY2VFbmNvZGVyLl9kcm9wRGVmaXNlcyhBbGlnbmVkU2VxdWVuY2VFbmNvZGVyLl90cnVuY2F0ZVNlcXVlbmNlKHNlcXVlbmNlKSk7XHJcbiAgfVxyXG5cclxuICAvKipcclxuICAgICAqIENhdGVnb3JpYWwgZW5jb2RlIG9mIHRoZSBzZXF1ZW5jZSBwcm92aWRlZC5cclxuICAgICAqXHJcbiAgICAgKiBAcGFyYW0ge3N0cmluZ30gc2VxdWVuY2UgVGhlIHNlcXVlbmNlLlxyXG4gICAgICogQHJldHVybiB7bnVtYmVyW119IEVuY29kZWQgdmVjdG9yLlxyXG4gICAgICogQG1lbWJlcm9mIEFsaWduZWRTZXF1ZW5jZUVuY29kZXJcclxuICAgICAqL1xyXG4gIHB1YmxpYyBlbmNvZGUoc2VxdWVuY2U6IHN0cmluZyk6IG51bWJlcltdIHtcclxuICAgIGNvbnN0IG5JdGVtcyA9IHNlcXVlbmNlLmxlbmd0aDtcclxuICAgIGNvbnN0IHZhbHVlcyA9IG5ldyBBcnJheShuSXRlbXMpLmZpbGwoMCk7XHJcblxyXG4gICAgZm9yIChsZXQgaSA9IDA7IGkgPCBuSXRlbXM7ICsraSkge1xyXG4gICAgICBjb25zdCBjaGFyID0gc2VxdWVuY2VbaV07XHJcblxyXG4gICAgICBhc3NlcnQoY2hhciBpbiB0aGlzLmFhMm51bSwgYFVua25vd24gY2hhciAnJHtjaGFyfScgZm91bmQgaW4gc2VxdWVuY2UgJyR7c2VxdWVuY2V9J2ApO1xyXG5cclxuICAgICAgdmFsdWVzW2ldID0gdGhpcy5lbmNvZGVMZXR0dGVyKGNoYXIpO1xyXG4gICAgfVxyXG4gICAgcmV0dXJuIHZhbHVlcztcclxuICB9XHJcblxyXG4gIHB1YmxpYyBlbmNvZGVMZXR0dGVyKGxldHRlcjogc3RyaW5nKTogbnVtYmVyIHtcclxuICAgIHJldHVybiB0aGlzLmFhMm51bVtsZXR0ZXJdO1xyXG4gIH1cclxuXHJcbiAgLyoqXHJcbiAgICAgKiBEZWNvZGUgdGhlIGVuY29kZWQgdmVjdG9yIGludG8gdGhlIHNlcXVlbmNlIGJhY2suXHJcbiAgICAgKlxyXG4gICAgICogQHBhcmFtIHtudW1iZXJbXX0gdmFsdWUgVGhlIHZlY3RvciBlbmNvZGVkLlxyXG4gICAgICogQHJldHVybiB7c3RyaW5nfSBEZWNvZGVkIHNlcXVlbmNlLlxyXG4gICAgICogQG1lbWJlcm9mIEFsaWduZWRTZXF1ZW5jZUVuY29kZXJcclxuICAgICAqL1xyXG4gIHB1YmxpYyBkZWNvZGUodmFsdWU6IG51bWJlcltdKTogc3RyaW5nIHtcclxuICAgIGxldCBzOiBzdHJpbmcgPSAnJztcclxuXHJcbiAgICBmb3IgKGxldCBpID0gMDsgaSA8IHZhbHVlLmxlbmd0aDsgKytpKSB7XHJcbiAgICAgIGNvbnN0IGNvZGUgPSB2YWx1ZVtpXTtcclxuXHJcbiAgICAgIGFzc2VydChjb2RlIGluIHRoaXMubnVtMmFhLCBgVW5rbm93biBjb2RlICcke2NvZGV9JyBmb3VuZCBpbiB2ZWN0b3IgJyR7dmFsdWV9J2ApO1xyXG5cclxuICAgICAgcyArPSB0aGlzLm51bTJhYVtjb2RlXTtcclxuICAgIH1cclxuICAgIHJldHVybiBzO1xyXG4gIH1cclxufVxyXG4iXX0=
@@ -1,177 +0,0 @@
1
- import {assert} from '@datagrok-libraries/utils/src/operations';
2
-
3
- type SideChainScale = {[name: string]: number};
4
- type SideChainScaleCollection = {[name: string]: SideChainScale};
5
-
6
- class SideChainScales {
7
- static scales: SideChainScaleCollection = {
8
- // Wimley-White interfacial hydrophobicity scale
9
- 'WimleyWhite': {
10
- '-': 0,
11
- 'A': 0.17,
12
- 'C': -0.24,
13
- 'D': -0.07, // Asp-: 1.23
14
- 'E': -0.01, // Glu-: 2.02
15
- 'F': -1.13, //
16
- 'G': 0.01,
17
- 'H': 0.17, // His+: 0.96
18
- 'I': -0.31,
19
- 'K': 0.99, // Lys+
20
- 'L': -0.56,
21
- 'M': -0.23,
22
- 'N': 0.42,
23
- 'P': 0.45,
24
- 'Q': 0.58,
25
- 'R': 0.81, // Arg+
26
- 'S': 0.13,
27
- 'T': 0.14,
28
- 'V': 0.07,
29
- 'W': -1.85,
30
- 'Y': -0.94,
31
- },
32
- 'categorial': {
33
- '-': 0,
34
- 'A': 1,
35
- 'C': 2,
36
- 'D': 3,
37
- 'E': 4,
38
- 'F': 5,
39
- 'G': 6,
40
- 'H': 7,
41
- 'I': 8,
42
- 'K': 9,
43
- 'L': 10,
44
- 'M': 11,
45
- 'N': 12,
46
- 'P': 13,
47
- 'Q': 14,
48
- 'R': 15,
49
- 'S': 16,
50
- 'T': 17,
51
- 'V': 18,
52
- 'W': 19,
53
- 'Y': 20,
54
- },
55
- };
56
-
57
- static getAvailableScales(): string[] {
58
- return Object.entries(this.scales).map(([k, _]) => k);
59
- }
60
-
61
- static getScale(name: string): SideChainScale {
62
- assert(!(this.scales[name] === undefined), `Scale '${name}' was not found.`);
63
- return this.scales[name];
64
- }
65
- }
66
-
67
- /**
68
- * Class to categorial encode/decode aligned amino acid residues sequence.
69
- *
70
- * @export
71
- * @class AlignedSequenceEncoder
72
- */
73
- export class AlignedSequenceEncoder {
74
- protected aa2num: SideChainScale;
75
- protected num2aa: {[code: number]: string};
76
-
77
- constructor(scale: string = 'categorial') {
78
- this.aa2num = SideChainScales.getScale(scale);
79
- this.num2aa = {};
80
- Object.entries(this.aa2num).forEach(([k, v]) => (this.num2aa[v] = k));
81
- }
82
-
83
- /**
84
- * Truncate NH2 and -COOH terminals of the given sequence.
85
- *
86
- * @static
87
- * @param {string} seq The sequence provided.
88
- * @return {string} Truncated sequence.
89
- * @memberof AlignedSequenceEncoder
90
- */
91
- static _truncateSequence(seq: string): string {
92
- let start = 0;
93
- let end = seq.length;
94
- const termina = ['NH2', 'COOH'];
95
-
96
- if (seq.startsWith(termina[0])) {
97
- const l = termina[0].length; // Cut only 'NH2' without following '-'.
98
- assert(seq[l] == '-', `Wrong sequence format: ${termina[0]} without following '-' in '${seq}'.`);
99
- start = l;
100
- }
101
- if (seq.endsWith(termina[1])) {
102
- const l = termina[1].length+1; // Cut both 'COOH' and precending '-'.
103
- assert(seq[end-l] == '-', `Wrong sequence format: ${termina[1]} without '-' precending in '${seq}'.`);
104
- end -= l;
105
- }
106
- return seq.substring(start, end);
107
- }
108
-
109
- /**
110
- * Cuts auxiliary defises before a residue.
111
- *
112
- * @static
113
- * @param {string} seq The sequence to process.
114
- * @return {string} Processed sequence.
115
- * @memberof AlignedSequenceEncoder
116
- */
117
- static _dropDefises(seq: string): string {
118
- return seq.replace(/(-)([^-]+)/g, '$2');
119
- }
120
-
121
- /**
122
- * Performs truncation and cutting auxiliary defises.
123
- *
124
- * @static
125
- * @param {string} sequence The sequence work under process.
126
- * @return {string} Result of cleaning.
127
- * @memberof AlignedSequenceEncoder
128
- */
129
- static clean(sequence: string): string {
130
- return AlignedSequenceEncoder._dropDefises(AlignedSequenceEncoder._truncateSequence(sequence));
131
- }
132
-
133
- /**
134
- * Categorial encode of the sequence provided.
135
- *
136
- * @param {string} sequence The sequence.
137
- * @return {number[]} Encoded vector.
138
- * @memberof AlignedSequenceEncoder
139
- */
140
- public encode(sequence: string): number[] {
141
- const nItems = sequence.length;
142
- const values = new Array(nItems).fill(0);
143
-
144
- for (let i = 0; i < nItems; ++i) {
145
- const char = sequence[i];
146
-
147
- assert(char in this.aa2num, `Unknown char '${char}' found in sequence '${sequence}'`);
148
-
149
- values[i] = this.encodeLettter(char);
150
- }
151
- return values;
152
- }
153
-
154
- public encodeLettter(letter: string): number {
155
- return this.aa2num[letter];
156
- }
157
-
158
- /**
159
- * Decode the encoded vector into the sequence back.
160
- *
161
- * @param {number[]} value The vector encoded.
162
- * @return {string} Decoded sequence.
163
- * @memberof AlignedSequenceEncoder
164
- */
165
- public decode(value: number[]): string {
166
- let s: string = '';
167
-
168
- for (let i = 0; i < value.length; ++i) {
169
- const code = value[i];
170
-
171
- assert(code in this.num2aa, `Unknown code '${code}' found in vector '${value}'`);
172
-
173
- s += this.num2aa[code];
174
- }
175
- return s;
176
- }
177
- }