@datagrok/sequence-translator 1.5.3 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +2 -2
- package/CHANGELOG.md +6 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +2 -2
- package/src/package.ts +1 -0
- package/src/polytool/conversion/pt-chain.ts +38 -635
- package/src/polytool/conversion/pt-conversion.ts +2 -3
- package/src/polytool/conversion/pt-misc.ts +8 -182
- package/src/polytool/conversion/pt-synthetic.ts +279 -0
- package/src/polytool/conversion/pt-tools-helmmol.ts +114 -0
- package/src/polytool/conversion/pt-tools-parse.ts +356 -0
- package/src/polytool/pt-dialog.ts +1 -1
- package/src/polytool/pt-enumerate-seq-dialog.ts +1 -1
- package/src/polytool/pt-enumeration-helm.ts +7 -5
- package/src/polytool/pt-unrule.ts +3 -3
- package/src/tests/polytool-chain-from-notation-tests.ts +34 -33
- package/src/tests/polytool-chain-parse-notation-tests.ts +2 -2
- package/src/tests/polytool-convert-tests.ts +14 -14
- package/src/tests/polytool-detectors-custom-notation-test.ts +1 -1
- package/src/tests/toAtomicLevel-tests.ts +4 -4
- package/src/utils/cyclized.ts +6 -4
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
import {getInnerIdx, getOuterIdx, Linkage} from './pt-misc';
|
|
2
|
+
import {cleanupHelmSymbol} from '@datagrok-libraries/bio/src/helm/utils';
|
|
3
|
+
import {getMonomerPairs, RuleLink, RuleReaction, Rules} from './pt-rules';
|
|
4
|
+
|
|
5
|
+
type LinkedPosition = {
|
|
6
|
+
firstIdx: number,
|
|
7
|
+
secondIdx: number,
|
|
8
|
+
ruleIdx: number
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function parseSeparator(sequence: string): [Linkage[], string[][]] {
|
|
12
|
+
const mainFragments: string[][] = [];
|
|
13
|
+
const linkages: Linkage[] = [];
|
|
14
|
+
|
|
15
|
+
const rxp = /(\(.\d+\))?\{[^\}]*\}/g;
|
|
16
|
+
const seqs: string [] = [];
|
|
17
|
+
seqs.push(sequence.replaceAll(rxp, ''));
|
|
18
|
+
|
|
19
|
+
//const l = (rxpRes?.length) ?? -1;
|
|
20
|
+
|
|
21
|
+
const matches = sequence.matchAll(rxp);
|
|
22
|
+
//const rxpRes = rxp.exec(sequence);
|
|
23
|
+
for (const m of matches) {
|
|
24
|
+
const str = m![0];
|
|
25
|
+
if (str)
|
|
26
|
+
seqs.push(str);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
let counter = 0;
|
|
30
|
+
for (let i = 0; i < seqs.length; i++) {
|
|
31
|
+
const splMonomers = seqs[i].split('-');
|
|
32
|
+
const monomers: string [] = new Array<string>(splMonomers.length);
|
|
33
|
+
let spmCount: number = 0;
|
|
34
|
+
for (let j = 0; j < splMonomers.length; j++) {
|
|
35
|
+
const monomer = splMonomers[j].replace('{', '').replace('}', '');
|
|
36
|
+
if (monomer !== '') {
|
|
37
|
+
monomers[j] = monomer;
|
|
38
|
+
counter++;
|
|
39
|
+
spmCount++;
|
|
40
|
+
} else {
|
|
41
|
+
linkages.push({fChain: i, sChain: i + 1, fMonomer: counter, sMonomer: counter + 1, fR: 1, sR: 1});
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
mainFragments.push(monomers.slice(0, spmCount));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return [linkages, mainFragments];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function parseHelm(sequence: string): [Linkage[], string[][]] {
|
|
51
|
+
const ea = /(\w+\{.*\})\$(.*)\$(.*)\$(.*)\$/g.exec(sequence)!;
|
|
52
|
+
// const fragmentation = helm.split('$');
|
|
53
|
+
const fragmentation = [ea[1], ea[2], ea[3], ea[4]];
|
|
54
|
+
|
|
55
|
+
const rawFragments = fragmentation[0].split('|');
|
|
56
|
+
const rawLinkages = fragmentation[1].split('|');
|
|
57
|
+
|
|
58
|
+
const monomers = new Array<Array<string>>(rawFragments.length);
|
|
59
|
+
const linkages: Linkage[] = [];
|
|
60
|
+
|
|
61
|
+
//HELM parsing
|
|
62
|
+
for (let i = 0; i < rawFragments.length; i++) {
|
|
63
|
+
const idxStart = rawFragments[i].indexOf('{');
|
|
64
|
+
const idxEnd = rawFragments[i].indexOf('}');
|
|
65
|
+
|
|
66
|
+
monomers[i] = rawFragments[i].slice(idxStart + 1, idxEnd).split('.').map((s) => cleanupHelmSymbol(s));
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
for (let i = 0; i < rawLinkages.length; i++) {
|
|
70
|
+
if (rawLinkages[i] !== '' && rawLinkages[i] !== 'V2.0') {
|
|
71
|
+
const rawData = rawLinkages[i].split(',');
|
|
72
|
+
const fChainIdx = parseInt(rawData[0].replace('PEPTIDE', '')) - 1;
|
|
73
|
+
const sChainIdx = parseInt(rawData[1].replace('PEPTIDE', '')) - 1;
|
|
74
|
+
const rawDataConnections = rawData[2].split('-');
|
|
75
|
+
const rawDataConnection1 = rawDataConnections[0].split(':');
|
|
76
|
+
const rawDataConnection2 = rawDataConnections[1].split(':');
|
|
77
|
+
|
|
78
|
+
linkages.push({
|
|
79
|
+
fChain: fChainIdx,
|
|
80
|
+
sChain: sChainIdx,
|
|
81
|
+
fMonomer: getOuterIdx(parseInt(rawDataConnection1[0]), fChainIdx, monomers),
|
|
82
|
+
sMonomer: getOuterIdx(parseInt(rawDataConnection2[0]), sChainIdx, monomers),
|
|
83
|
+
fR: parseInt(rawDataConnection1[1].replace('R', '')),
|
|
84
|
+
sR: parseInt(rawDataConnection2[1].replace('R', '')),
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return [linkages, monomers];
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function fromObjectsToHelm(linkages: Linkage[], monomers: string[][]): string {
|
|
93
|
+
let helm = '';
|
|
94
|
+
for (let i = 0; i < monomers.length; i++) {
|
|
95
|
+
if (i > 0)
|
|
96
|
+
helm += '|';
|
|
97
|
+
|
|
98
|
+
helm += `PEPTIDE${i + 1}{`;
|
|
99
|
+
|
|
100
|
+
for (let j = 0; j < monomers[i].length; j++) {
|
|
101
|
+
if (j > 0)
|
|
102
|
+
helm += '.';
|
|
103
|
+
const symbol = monomers[i][j];
|
|
104
|
+
helm += symbol.length > 1 ? `[${symbol}]` : symbol;
|
|
105
|
+
}
|
|
106
|
+
helm += `}`;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
helm += '$';
|
|
110
|
+
|
|
111
|
+
for (let i = 0; i < linkages.length; i++) {
|
|
112
|
+
if (i > 0)
|
|
113
|
+
helm += '|';
|
|
114
|
+
helm += `PEPTIDE${linkages[i].fChain + 1},PEPTIDE${linkages[i].sChain + 1},`;
|
|
115
|
+
|
|
116
|
+
helm += `${getInnerIdx(linkages[i].fMonomer - 1, monomers)[0] + 1}:R${linkages[i].fR}-`;
|
|
117
|
+
helm += `${getInnerIdx(linkages[i].sMonomer - 1, monomers)[0] + 1}:R${linkages[i].sR}`;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
helm += '$$$' + 'V2.0';
|
|
121
|
+
return helm;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
//homo and hetero dimers
|
|
125
|
+
export function handleDuplicated(sequence: string, rules: Rules): [Linkage[], string[]] {
|
|
126
|
+
const mainFragments: string[] = [];
|
|
127
|
+
const linkages: Linkage[] = [];
|
|
128
|
+
const heterodimerCode = rules.heterodimerCode;
|
|
129
|
+
const homodimerCode = rules.homodimerCode;
|
|
130
|
+
|
|
131
|
+
//NOTICE: this works only with simple single heterodimers
|
|
132
|
+
const heterodimeric = heterodimerCode !== null ? sequence.split(`(${rules.heterodimerCode!})`) : '';
|
|
133
|
+
if (heterodimerCode !== null && heterodimeric.length > 1) {
|
|
134
|
+
linkages.push({fChain: 0, sChain: 1, fMonomer: 1, sMonomer: 1, fR: 1, sR: 1});
|
|
135
|
+
mainFragments.push(heterodimeric[1].replaceAll('{', '').replaceAll('}', ''));
|
|
136
|
+
mainFragments.push(heterodimeric[2].replaceAll('{', '').replaceAll('}', ''));
|
|
137
|
+
} else {
|
|
138
|
+
mainFragments.push(sequence);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
//NOTICE: this works only with simple single dimers
|
|
142
|
+
for (let i = 0; i < mainFragments.length; i++) {
|
|
143
|
+
if (homodimerCode !== null && mainFragments[i].includes(`(${homodimerCode!})`)) {
|
|
144
|
+
const idxSequence = mainFragments.length;
|
|
145
|
+
|
|
146
|
+
linkages.push({fChain: i, sChain: idxSequence, fMonomer: 1, sMonomer: 1, fR: 1, sR: 1});
|
|
147
|
+
const rawDimer = mainFragments[i].replace(`(${homodimerCode!})`, '');
|
|
148
|
+
const idx = rawDimer.indexOf('{');
|
|
149
|
+
const linker = rawDimer.slice(0, idx);
|
|
150
|
+
const body = rawDimer.replace(linker, '').replaceAll('{', '').replaceAll('}', '');
|
|
151
|
+
|
|
152
|
+
mainFragments[i] = linker + body;
|
|
153
|
+
mainFragments.push(body);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
for (let i = 0; i < mainFragments.length; i++) {
|
|
158
|
+
if (homodimerCode !== null && mainFragments[i].includes(`(${homodimerCode!})`)) {
|
|
159
|
+
const idxSequence = mainFragments.length;
|
|
160
|
+
|
|
161
|
+
linkages.push({fChain: i, sChain: idxSequence, fMonomer: 1, sMonomer: 1, fR: 1, sR: 1});
|
|
162
|
+
const rawDimer = mainFragments[i].replace(`(${homodimerCode!})`, '');
|
|
163
|
+
const idx = rawDimer.indexOf('{');
|
|
164
|
+
const linker = rawDimer.slice(0, idx);
|
|
165
|
+
const body = rawDimer.replace(linker, '').replaceAll('{', '').replaceAll('}', '');
|
|
166
|
+
|
|
167
|
+
mainFragments[i] = linker + body;
|
|
168
|
+
mainFragments.push(body);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return [linkages, mainFragments];
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
export function handleLinkRules(mf: string[], monomers: string[][], linkages: Linkage[], rules: Rules): void {
|
|
176
|
+
for (let i = 0; i < mf.length; i++) {
|
|
177
|
+
const rawMonomers = mf[i].split('-');
|
|
178
|
+
const linkedPositions = getLinkedPositions(rawMonomers, rules.linkRules);
|
|
179
|
+
const [allPos1, allPos2, allAttaches1, allAttaches2] =
|
|
180
|
+
getAllCycles(rules.linkRules, rawMonomers, linkedPositions);
|
|
181
|
+
|
|
182
|
+
for (let j = 0; j < allPos1.length; j++) {
|
|
183
|
+
linkages.push({
|
|
184
|
+
fChain: i,
|
|
185
|
+
sChain: i,
|
|
186
|
+
fMonomer: allPos1[j],
|
|
187
|
+
sMonomer: allPos2[j],
|
|
188
|
+
fR: allAttaches1[j],
|
|
189
|
+
sR: allAttaches2[j],
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
monomers[i] = rawMonomers;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
export function handleReactionRules(monomers: string[][], linkages: Linkage[], rules: Rules): void {
|
|
198
|
+
for (let i = 0; i < monomers.length; i++) {
|
|
199
|
+
const linkedPositions = getLinkedPositions(monomers[i], rules.reactionRules);
|
|
200
|
+
const [allPos1, allPos2, ruleN] = getAllReactants(rules.reactionRules, monomers[i], linkedPositions);
|
|
201
|
+
|
|
202
|
+
if (allPos1.length >= 1) {
|
|
203
|
+
linkages.push({
|
|
204
|
+
fChain: i,
|
|
205
|
+
sChain: monomers.length,
|
|
206
|
+
fMonomer: allPos1[0],
|
|
207
|
+
sMonomer: 1,
|
|
208
|
+
fR: 3,
|
|
209
|
+
sR: 1,
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
linkages.push({
|
|
213
|
+
fChain: i,
|
|
214
|
+
sChain: monomers.length,
|
|
215
|
+
fMonomer: allPos2[0],
|
|
216
|
+
sMonomer: 1,
|
|
217
|
+
fR: 3,
|
|
218
|
+
sR: 2,
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
monomers.push([rules.reactionRules[ruleN[0]].name]);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function reset(): [boolean, boolean, boolean, number, number] {
|
|
227
|
+
return [false, false, false, -1, -1];
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function find(monomers: string[], add: string, unlim: boolean, idx: number = 0,
|
|
231
|
+
firstMonomers: string[] = [], secondMonomers: string[] = []) : [boolean, boolean, boolean, number, number] {
|
|
232
|
+
let [firstFound, secondFound, firstIsFirst, firstIdx, secondIdx] = reset();
|
|
233
|
+
for (let k = 0; k < monomers.length; k++) {
|
|
234
|
+
if (monomers[k].includes(add)) {
|
|
235
|
+
if (firstFound) {
|
|
236
|
+
if (firstIsFirst && (unlim || monomers[k] == secondMonomers[idx] + add)) {
|
|
237
|
+
secondFound = true;
|
|
238
|
+
secondIdx = k;
|
|
239
|
+
break;
|
|
240
|
+
} else if (!firstIsFirst && (unlim || monomers[k] == firstMonomers[idx] + add)) {
|
|
241
|
+
secondFound = true;
|
|
242
|
+
secondIdx = k;
|
|
243
|
+
break;
|
|
244
|
+
} else {
|
|
245
|
+
continue;
|
|
246
|
+
}
|
|
247
|
+
} else if (unlim) {
|
|
248
|
+
firstFound = true;
|
|
249
|
+
firstIsFirst = true;
|
|
250
|
+
firstIdx = k;
|
|
251
|
+
} else {
|
|
252
|
+
if (monomers[k] == firstMonomers[idx] + add) {
|
|
253
|
+
firstFound = true;
|
|
254
|
+
firstIsFirst = true;
|
|
255
|
+
firstIdx = k;
|
|
256
|
+
} else if (monomers[k] == secondMonomers[idx] + add) {
|
|
257
|
+
firstFound = true;
|
|
258
|
+
firstIsFirst = unlim ? true : false;
|
|
259
|
+
firstIdx = k;
|
|
260
|
+
} else {
|
|
261
|
+
continue;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
return [firstFound, secondFound, firstIsFirst, firstIdx, secondIdx];
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function getLinkedPositions(monomers: string[], rules: RuleLink[] | RuleReaction []) : LinkedPosition[] {
|
|
271
|
+
const result: LinkedPosition[] = [];
|
|
272
|
+
|
|
273
|
+
for (let i = 0; i < rules.length; i++) {
|
|
274
|
+
const add = `(${rules[i].code})`;
|
|
275
|
+
|
|
276
|
+
const [firstMonomers, secondMonomers] = getMonomerPairs(rules[i]);
|
|
277
|
+
|
|
278
|
+
if (firstMonomers.length > 0) {
|
|
279
|
+
for (let j = 0; j < firstMonomers.length; j++) {
|
|
280
|
+
const [firstFound, secondFound, firstIsFirst, firstIdx, secondIdx] =
|
|
281
|
+
find(monomers, add, false, j, firstMonomers, secondMonomers);
|
|
282
|
+
|
|
283
|
+
if (!(firstFound && secondFound))
|
|
284
|
+
continue;
|
|
285
|
+
else if (firstIsFirst)
|
|
286
|
+
result.push({firstIdx: firstIdx, secondIdx: secondIdx, ruleIdx: i});
|
|
287
|
+
else
|
|
288
|
+
result.push({firstIdx: secondIdx, secondIdx: firstIdx, ruleIdx: i});
|
|
289
|
+
}
|
|
290
|
+
} else {
|
|
291
|
+
const [firstFound, secondFound, firstIsFirst, firstIdx, secondIdx] = find(monomers, add, true);
|
|
292
|
+
|
|
293
|
+
if (!(firstFound && secondFound))
|
|
294
|
+
continue;
|
|
295
|
+
else if (firstIsFirst)
|
|
296
|
+
result.push({firstIdx: firstIdx, secondIdx: secondIdx, ruleIdx: i});
|
|
297
|
+
else
|
|
298
|
+
result.push({firstIdx: secondIdx, secondIdx: firstIdx, ruleIdx: i});
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
return result;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
function getAllCycles(rules: RuleLink[], monomers: string [], positions: LinkedPosition[]):
|
|
306
|
+
[number [], number [], number [], number []] {
|
|
307
|
+
const allPos1: number [] = [];
|
|
308
|
+
const allPos2: number [] = [];
|
|
309
|
+
const allAttaches1: number [] = [];
|
|
310
|
+
const allAttaches2: number [] = [];
|
|
311
|
+
const count = positions.length;
|
|
312
|
+
|
|
313
|
+
for (let i = 0; i < count; i++) {
|
|
314
|
+
if (positions[i].firstIdx == -1)
|
|
315
|
+
continue;
|
|
316
|
+
|
|
317
|
+
const ruleNum = positions[i].ruleIdx;
|
|
318
|
+
const code = rules[ruleNum].code;
|
|
319
|
+
|
|
320
|
+
monomers[positions[i].firstIdx] = monomers[positions[i].firstIdx].replace(`(${code})`, '');
|
|
321
|
+
monomers[positions[i].secondIdx] = monomers[positions[i].secondIdx].replace(`(${code})`, '');
|
|
322
|
+
|
|
323
|
+
allPos1.push(positions[i].firstIdx + 1);
|
|
324
|
+
allPos2.push(positions[i].secondIdx + 1);
|
|
325
|
+
allAttaches1.push(rules[ruleNum].firstLinkingGroup);
|
|
326
|
+
allAttaches2.push(rules[ruleNum].secondLinkingGroup);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
return [allPos1, allPos2, allAttaches1, allAttaches2];
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
function getAllReactants(rules: RuleReaction[], monomers: string [], positions: LinkedPosition[]):
|
|
333
|
+
[number [], number [], number []] {
|
|
334
|
+
const allPos1: number [] = [];
|
|
335
|
+
const allPos2: number [] = [];
|
|
336
|
+
const rule: number [] = [];
|
|
337
|
+
const count = positions.length;
|
|
338
|
+
|
|
339
|
+
for (let i = 0; i < count; i++) {
|
|
340
|
+
if (positions[i].firstIdx == -1)
|
|
341
|
+
continue;
|
|
342
|
+
const fIdx = positions[i].firstIdx;
|
|
343
|
+
const sIdx = positions[i].secondIdx;
|
|
344
|
+
|
|
345
|
+
const ruleNum = positions[i].ruleIdx;
|
|
346
|
+
const code = rules[ruleNum].code;
|
|
347
|
+
monomers[fIdx] = monomers[fIdx].replace(`(${code})`, '') + `_${rules[ruleNum].name}`;
|
|
348
|
+
monomers[sIdx] = monomers[sIdx].replace(`(${code})`, '') + `_${rules[ruleNum].name}`;
|
|
349
|
+
|
|
350
|
+
allPos1.push(fIdx + 1);
|
|
351
|
+
allPos2.push(sIdx + 1);
|
|
352
|
+
rule.push(positions[i].ruleIdx);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
return [allPos1, allPos2, rule];
|
|
356
|
+
}
|
|
@@ -16,7 +16,7 @@ import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
|
16
16
|
|
|
17
17
|
import {getRules, RuleInputs, RULES_PATH, RULES_STORAGE_NAME} from './conversion/pt-rules';
|
|
18
18
|
import {doPolyToolConvert} from './conversion/pt-conversion';
|
|
19
|
-
import {getOverriddenLibrary} from './conversion/pt-
|
|
19
|
+
import {getOverriddenLibrary} from './conversion/pt-synthetic';
|
|
20
20
|
import {defaultErrorHandler} from '../utils/err-info';
|
|
21
21
|
import {getLibrariesList} from './utils';
|
|
22
22
|
import {getEnumerationChem, PT_CHEM_EXAMPLE} from './pt-enumeration-chem';
|
|
@@ -615,7 +615,7 @@ async function polyToolEnumerateSeq(
|
|
|
615
615
|
const templateList: string[] = new Array<string>(resList.length);
|
|
616
616
|
for (let rowIdx = 0; rowIdx < resList.length; rowIdx++) {
|
|
617
617
|
const pseudoHelm = resList[rowIdx][0];
|
|
618
|
-
const chain = Chain.
|
|
618
|
+
const chain = Chain.fromHelm(pseudoHelm, helmHelper);
|
|
619
619
|
templateList[rowIdx] = chain.getNotation();
|
|
620
620
|
}
|
|
621
621
|
enumCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'Enumerated', templateList);
|
|
@@ -9,9 +9,10 @@ import {
|
|
|
9
9
|
} from '@datagrok-libraries/bio/src/helm/types';
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
import {
|
|
12
|
+
import {PtBio} from './conversion/pt-tools-helmmol';
|
|
13
13
|
import {getAvailableMonomers} from './utils';
|
|
14
|
-
import {PolyToolEnumeratorParams, PolyToolEnumeratorTypes,
|
|
14
|
+
import {PolyToolEnumeratorParams, PolyToolEnumeratorTypes,
|
|
15
|
+
PolyToolPlaceholder, PolyToolBreadthPlaceholder} from './types';
|
|
15
16
|
|
|
16
17
|
// For example keep monomers presented in HELMCoreLibrary.json only (not [NH2])
|
|
17
18
|
export const PT_HELM_EXAMPLE = 'PEPTIDE1{R.[Aca].T.G.H.F.G.A.A.Y.P.E.[meI]}$$$$';
|
|
@@ -97,14 +98,15 @@ export function doPolyToolEnumerateHelm(
|
|
|
97
98
|
}
|
|
98
99
|
|
|
99
100
|
let resBreadthMolList: HelmMol[] = [];
|
|
100
|
-
if (params.breadthPlaceholders)
|
|
101
|
+
if (params.breadthPlaceholders)
|
|
101
102
|
resBreadthMolList = getPtEnumeratorBreadth(molHandler.m, params.breadthPlaceholders);
|
|
102
|
-
|
|
103
|
+
|
|
103
104
|
resMolList = resMolList.concat(resBreadthMolList);
|
|
104
105
|
|
|
105
106
|
if (params.keepOriginal)
|
|
106
107
|
resMolList = [m, ...resMolList];
|
|
107
108
|
|
|
108
|
-
const resList = resMolList
|
|
109
|
+
const resList = resMolList
|
|
110
|
+
.map<[string, string]>((m: HelmMol) => { return [org.helm.webeditor.IO.getHelm(m)!, m.name!]; });
|
|
109
111
|
return resList;
|
|
110
112
|
}
|
|
@@ -27,10 +27,10 @@ export async function polyToolUnruleUI(): Promise<void> {
|
|
|
27
27
|
export function doPolyToolUnrule(helms: string[], rules: Rules, helmHelper: IHelmHelper): string[] {
|
|
28
28
|
const resHrzSeqList = new Array<string>(helms.length);
|
|
29
29
|
for (let i = 0; i < helms.length; ++i) {
|
|
30
|
-
if (!helms[i])
|
|
30
|
+
if (!helms[i]) {
|
|
31
31
|
resHrzSeqList[i] = '';
|
|
32
|
-
else {
|
|
33
|
-
const chain = Chain.
|
|
32
|
+
} else {
|
|
33
|
+
const chain = Chain.fromHelm(helms[i], helmHelper);
|
|
34
34
|
resHrzSeqList[i] = chain.getNotation();
|
|
35
35
|
}
|
|
36
36
|
}
|
|
@@ -23,31 +23,31 @@ category('PolyTool: Chain', () => {
|
|
|
23
23
|
},
|
|
24
24
|
tgt: {
|
|
25
25
|
templateChain: {monomerCount: [11], linkageCount: 0},
|
|
26
|
-
mmChain: {monomerCount: [11], linkageCount:
|
|
27
|
-
},
|
|
28
|
-
},
|
|
29
|
-
'reaction1': {
|
|
30
|
-
data: {
|
|
31
|
-
templateSeq: 'R-F-azG(4)-T-G-H-F-Y-P-aG(4)-meI',
|
|
32
|
-
templateHelm: 'PEPTIDE1{R.F.[azG(4)].T.G.H.F.Y.P.[aG(4)].[meI]}$$$$V2.0',
|
|
33
|
-
mmHelm: 'PEPTIDE1{R.F.[GGaz].T.G.H.F.Y.P}|PEPTIDE2{[meI]}$PEPTIDE1,PEPTIDE1,3:R3-9:R2|PEPTIDE1,PEPTIDE2,3:R4-1:R1$$$V2.0',
|
|
34
|
-
},
|
|
35
|
-
tgt: {
|
|
36
|
-
templateChain: {monomerCount: [11], linkageCount: 0,},
|
|
37
|
-
mmChain: {monomerCount: [9, 1], linkageCount: 2,}
|
|
38
|
-
}
|
|
39
|
-
},
|
|
40
|
-
'reaction2': {
|
|
41
|
-
data: {
|
|
42
|
-
templateSeq: 'R-F-aG(4)-T-G-H-F-Y-P-azG(4)-meI',
|
|
43
|
-
templateHelm: 'PEPTIDE1{R.F.[aG(4)].T.G.H.F.Y.P.[azG(4)].[meI]}$$$$V2.0',
|
|
44
|
-
mmHelm: 'PEPTIDE1{R.F}|PEPTIDE2{T.G.H.F.Y.P.[GGaz].[meI]}$PEPTIDE1,PEPTIDE2,2:R2-7:R3|PEPTIDE2,PEPTIDE2,1:R1-7:R4,$$$V2.0',
|
|
26
|
+
mmChain: {monomerCount: [11], linkageCount: 0}
|
|
45
27
|
},
|
|
46
|
-
tgt: {
|
|
47
|
-
templateChain: {monomerCount: [11], linkageCount: 0,},
|
|
48
|
-
mmChain: {monomerCount: [2, 8], linkageCount: 2,}
|
|
49
|
-
}
|
|
50
28
|
},
|
|
29
|
+
// 'reaction1': {
|
|
30
|
+
// data: {
|
|
31
|
+
// templateSeq: 'R-F-azG(4)-T-G-H-F-Y-P-aG(4)-meI',
|
|
32
|
+
// templateHelm: 'PEPTIDE1{R.F.[azG(4)].T.G.H.F.Y.P.[aG(4)].[meI]}$$$$V2.0',
|
|
33
|
+
// mmHelm: 'PEPTIDE1{R.F.[azG_GGaz].T.G.H.F.Y.P.[aG_GGaz].[meI]}|PEPTIDE2{[GGaz]}$PEPTIDE1,PEPTIDE2,3:R3-1:R1|PEPTIDE1,PEPTIDE2,10:R3-1:R2$$$V2.0',
|
|
34
|
+
// },
|
|
35
|
+
// tgt: {
|
|
36
|
+
// templateChain: {monomerCount: [11], linkageCount: 0},
|
|
37
|
+
// mmChain: {monomerCount: [9, 1], linkageCount: 2}
|
|
38
|
+
// }
|
|
39
|
+
// },
|
|
40
|
+
// 'reaction2': {
|
|
41
|
+
// data: {
|
|
42
|
+
// templateSeq: 'R-F-aG(4)-T-G-H-F-Y-P-azG(4)-meI',
|
|
43
|
+
// templateHelm: 'PEPTIDE1{R.F.[aG(4)].T.G.H.F.Y.P.[azG(4)].[meI]}$$$$V2.0',
|
|
44
|
+
// mmHelm: 'PEPTIDE1{R.F.[aG_GGaz].T.G.H.F.Y.P.[azG_GGaz].[meI]}|PEPTIDE2{[GGaz]}$PEPTIDE1,PEPTIDE2,3:R3-1:R2|PEPTIDE1,PEPTIDE2,10:R3-1:R1$$$V2.0',
|
|
45
|
+
// },
|
|
46
|
+
// tgt: {
|
|
47
|
+
// templateChain: {monomerCount: [11], linkageCount: 0},
|
|
48
|
+
// mmChain: {monomerCount: [2, 8], linkageCount: 2}
|
|
49
|
+
// }
|
|
50
|
+
//},
|
|
51
51
|
'dimerized1': {
|
|
52
52
|
data: {
|
|
53
53
|
templateSeq: '(#3)Succ-{A(CHOL)-F-C(1)-T-G-H-Y-P-C(1)-NH2}',
|
|
@@ -80,18 +80,19 @@ category('PolyTool: Chain', () => {
|
|
|
80
80
|
for (const [testName, {data, tgt}] of Object.entries(tests)) {
|
|
81
81
|
test(`fromNotation-${testName}`, async () => {
|
|
82
82
|
const rules = await getRules(['rules_example.json']);
|
|
83
|
-
const resMmChain = Chain.
|
|
83
|
+
const resMmChain = Chain.fromSeparator(data.templateSeq, helmHelper);
|
|
84
|
+
resMmChain.applyRules(rules);
|
|
84
85
|
resMmChain.check(true);
|
|
85
86
|
expectArray(resMmChain.monomers.map((mL) => mL.length), tgt.mmChain.monomerCount);
|
|
86
87
|
expect(resMmChain.linkages.length, tgt.mmChain.linkageCount);
|
|
87
88
|
expect(resMmChain.getHelm(), data.mmHelm);
|
|
88
|
-
},
|
|
89
|
+
}, {skipReason: '3157'});
|
|
89
90
|
}
|
|
90
91
|
|
|
91
92
|
for (const [testName, {data, tgt}] of Object.entries(tests)) {
|
|
92
93
|
test(`parseNotation-${testName}`, async () => {
|
|
93
94
|
const rules = await getRules(['rules_example.json']);
|
|
94
|
-
const resTemplateChain = Chain.
|
|
95
|
+
const resTemplateChain = Chain.fromSeparator(data.templateSeq, helmHelper);
|
|
95
96
|
resTemplateChain.check(true);
|
|
96
97
|
expectArray(resTemplateChain.monomers.map((mL) => mL.length), tgt.templateChain.monomerCount);
|
|
97
98
|
expect(resTemplateChain.linkages.length, tgt.templateChain.linkageCount);
|
|
@@ -103,7 +104,7 @@ category('PolyTool: Chain', () => {
|
|
|
103
104
|
for (const [testName, {data, tgt}] of Object.entries(tests)) {
|
|
104
105
|
test(`parseHelm-${testName}`, async () => {
|
|
105
106
|
const rules = await getRules(['rules_example.json']);
|
|
106
|
-
const resTemplateChain = Chain.
|
|
107
|
+
const resTemplateChain = Chain.fromHelm(data.templateHelm, helmHelper);
|
|
107
108
|
resTemplateChain.check(true);
|
|
108
109
|
expectArray(resTemplateChain.monomers.map((mL) => mL.length), tgt.templateChain.monomerCount);
|
|
109
110
|
expect(resTemplateChain.linkages.length, tgt.templateChain.linkageCount);
|
|
@@ -115,12 +116,12 @@ category('PolyTool: Chain', () => {
|
|
|
115
116
|
for (const [testName, {data, tgt}] of Object.entries(tests)) {
|
|
116
117
|
test(`applyRules-${testName}`, async () => {
|
|
117
118
|
const rules = await getRules(['rules_example.json']);
|
|
118
|
-
const resTemplateChain = Chain.
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
expectArray(
|
|
122
|
-
expect(
|
|
123
|
-
expect(
|
|
119
|
+
const resTemplateChain = Chain.fromSeparator(data.templateSeq, helmHelper);
|
|
120
|
+
resTemplateChain.applyRules(rules);
|
|
121
|
+
resTemplateChain.check(true);
|
|
122
|
+
expectArray(resTemplateChain.monomers.map((mL) => mL.length), tgt.mmChain.monomerCount);
|
|
123
|
+
expect(resTemplateChain.linkages.length, tgt.mmChain.linkageCount);
|
|
124
|
+
expect(resTemplateChain.getHelm(), data.mmHelm);
|
|
124
125
|
}, {skipReason: 'applyRules is not implemented'});
|
|
125
126
|
}
|
|
126
127
|
});
|
|
@@ -49,7 +49,7 @@ category('PolyTool: Chain: parseNotation', () => {
|
|
|
49
49
|
for (const [testName, testData] of Object.entries(tests)) {
|
|
50
50
|
test(`${testName}`, async () => {
|
|
51
51
|
const rules = await getRules(['rules_example.json']);
|
|
52
|
-
const resChain = await Chain.
|
|
52
|
+
const resChain = await Chain.fromSeparator(testData.src.seq, helmHelper);
|
|
53
53
|
//expectArray(resChain.monomers.map((mL) => mL.length), testData.tgt.monomerCount);
|
|
54
54
|
//expect(resChain.linkages.length, testData.tgt.linkageCount);
|
|
55
55
|
// expect(resChain.getNotationHelm(), testData.tgt.helm);
|
|
@@ -60,7 +60,7 @@ category('PolyTool: Chain: parseNotation', () => {
|
|
|
60
60
|
hwe.editor.setMol(resMol!);
|
|
61
61
|
const resMolHelm = hwe.editor.getHelm();
|
|
62
62
|
|
|
63
|
-
const resHelm = resChain.
|
|
63
|
+
const resHelm = resChain.getHelm();
|
|
64
64
|
|
|
65
65
|
expect(resMolHelm, testData.tgt.helm);
|
|
66
66
|
expect(resHelm, testData.tgt.helm);
|
|
@@ -16,7 +16,7 @@ import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
|
16
16
|
import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
17
17
|
|
|
18
18
|
import {doPolyToolConvert} from '../polytool/conversion/pt-conversion';
|
|
19
|
-
import {getOverriddenLibrary} from '../polytool/conversion/pt-
|
|
19
|
+
import {getOverriddenLibrary} from '../polytool/conversion/pt-synthetic';
|
|
20
20
|
import {getRules} from '../polytool/conversion/pt-rules';
|
|
21
21
|
|
|
22
22
|
|
|
@@ -49,51 +49,51 @@ category('PolyTool: Convert', () => {
|
|
|
49
49
|
src: {seq: 'R-F-C(1)-T-G-H-F-Y-P-C(1)-meI'},
|
|
50
50
|
tgt: {
|
|
51
51
|
helm: 'PEPTIDE1{R.F.C.T.G.H.F.Y.P.C.[meI]}$PEPTIDE1,PEPTIDE1,3:R3-10:R3$$$V2.0',
|
|
52
|
-
mol: {atomCount: 95, bondCount: 100, inchiKey: 'LMJUFVBPWWJJPN-AJJYTACESA-N'
|
|
52
|
+
mol: {atomCount: 95, bondCount: 100, inchiKey: 'LMJUFVBPWWJJPN-AJJYTACESA-N'},
|
|
53
53
|
}
|
|
54
54
|
},
|
|
55
55
|
'cyclized-C(1)-0-1': {
|
|
56
56
|
src: {seq: 'C(1)-T-G-H-F-Y-P-C(1)-meI'},
|
|
57
57
|
tgt: {
|
|
58
58
|
helm: 'PEPTIDE1{C.T.G.H.F.Y.P.C.[meI]}$PEPTIDE1,PEPTIDE1,1:R3-8:R3$$$V2.0',
|
|
59
|
-
mol: {atomCount: 73, bondCount: 77, inchiKey: 'KLFRBMUPPMMGJM-HXTBFBBASA-N'
|
|
59
|
+
mol: {atomCount: 73, bondCount: 77, inchiKey: 'KLFRBMUPPMMGJM-HXTBFBBASA-N'},
|
|
60
60
|
}
|
|
61
61
|
},
|
|
62
62
|
'cyclized-C(1)-2-0': {
|
|
63
63
|
src: {seq: 'R-F-C(1)-T-G-H-F-Y-P-C(1)'},
|
|
64
64
|
tgt: {
|
|
65
65
|
helm: 'PEPTIDE1{R.F.C.T.G.H.F.Y.P.C}$PEPTIDE1,PEPTIDE1,3:R3-10:R3$$$V2.0',
|
|
66
|
-
mol: {atomCount: 86, bondCount: 91, inchiKey: 'WIHSRTQGMICACU-DDDKLKPZSA-N'
|
|
66
|
+
mol: {atomCount: 86, bondCount: 91, inchiKey: 'WIHSRTQGMICACU-DDDKLKPZSA-N'},
|
|
67
67
|
}
|
|
68
68
|
},
|
|
69
69
|
'cyclized-C(1)-0-0': {
|
|
70
70
|
src: {seq: 'C(1)-T-G-H-F-Y-P-C(1)'},
|
|
71
71
|
tgt: {
|
|
72
72
|
helm: 'PEPTIDE1{C.T.G.H.F.Y.P.C}$PEPTIDE1,PEPTIDE1,1:R3-8:R3$$$V2.0',
|
|
73
|
-
mol: {atomCount: 64, bondCount: 68, inchiKey: 'LOSMDBLEXLWPLB-OFZKBENXSA-N'
|
|
73
|
+
mol: {atomCount: 64, bondCount: 68, inchiKey: 'LOSMDBLEXLWPLB-OFZKBENXSA-N'},
|
|
74
74
|
}
|
|
75
75
|
},
|
|
76
76
|
'cyclized-D(2)-NH2(2)-3-0': {
|
|
77
77
|
src: {seq: 'R-F-D(2)-T-G-H-F-Y-P-NH2(2)'},
|
|
78
78
|
tgt: {
|
|
79
79
|
helm: 'PEPTIDE1{R.F.D.T.G.H.F.Y.P.[NH2]}$PEPTIDE1,PEPTIDE1,10:R2-3:R3$$$V2.0',
|
|
80
|
-
mol: {atomCount: 81, bondCount: 86, inchiKey: 'CBMGNYKOZWNVNK-AHGCAHLCSA-N'
|
|
80
|
+
mol: {atomCount: 81, bondCount: 86, inchiKey: 'CBMGNYKOZWNVNK-AHGCAHLCSA-N'},
|
|
81
81
|
}
|
|
82
82
|
},
|
|
83
83
|
'cyclized-D(2)-NH2(2)-0-0': {
|
|
84
84
|
src: {seq: 'D(2)-T-G-H-F-Y-P-NH2(2)'},
|
|
85
85
|
tgt: {
|
|
86
86
|
helm: 'PEPTIDE1{D.T.G.H.F.Y.P.[NH2]}$PEPTIDE1,PEPTIDE1,8:R2-1:R3$$$V2.0',
|
|
87
|
-
mol: {atomCount: 59, bondCount: 63, inchiKey: 'HGRHAUQBJXFERJ-MUFWPYSASA-N'
|
|
88
|
-
}
|
|
89
|
-
},
|
|
90
|
-
'cyclized-azG(4)-aG(4)-2-1': {
|
|
91
|
-
src: {seq: 'R-F-azG(4)-T-G-H-F-Y-P-aG(4)-meI'},
|
|
92
|
-
tgt: {
|
|
93
|
-
helm: 'PEPTIDE1{R.F.[GGaz].T.G.H.F.Y.P}|PEPTIDE2{[meI]}$PEPTIDE1,PEPTIDE1,3:R3-9:R2|PEPTIDE1,PEPTIDE2,3:R4-1:R1$$$V2.0',
|
|
94
|
-
mol: {atomCount: 97, bondCount: 103, inchiKey: 'WJSYGVBGPCCSJF-PERUNASMSA-N',},
|
|
87
|
+
mol: {atomCount: 59, bondCount: 63, inchiKey: 'HGRHAUQBJXFERJ-MUFWPYSASA-N'},
|
|
95
88
|
}
|
|
96
89
|
},
|
|
90
|
+
// 'cyclized-azG(4)-aG(4)-2-1': {
|
|
91
|
+
// src: {seq: 'R-F-azG(4)-T-G-H-F-Y-P-aG(4)-meI'},
|
|
92
|
+
// tgt: {
|
|
93
|
+
// helm: 'PEPTIDE1{R.F.[azG_GGaz].T.G.H.F.Y.P.[aG_GGaz].[meI]}|PEPTIDE2{[GGaz]}$PEPTIDE1,PEPTIDE2,3:R3-1:R1|PEPTIDE1,PEPTIDE2,10:R3-1:R2$$$V2.0',
|
|
94
|
+
// mol: {atomCount: 97, bondCount: 103, inchiKey: 'WJSYGVBGPCCSJF-PERUNASMSA-N'},
|
|
95
|
+
// }
|
|
96
|
+
// },
|
|
97
97
|
};
|
|
98
98
|
|
|
99
99
|
for (const [testName, testData] of Object.entries(tests)) {
|
|
@@ -15,7 +15,7 @@ import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
|
|
|
15
15
|
import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
16
16
|
|
|
17
17
|
import {_package} from '../package-test';
|
|
18
|
-
import {getNewMonomers} from '../polytool/conversion/pt-
|
|
18
|
+
import {getNewMonomers} from '../polytool/conversion/pt-synthetic';
|
|
19
19
|
import {getRules, RuleReaction} from '../polytool/conversion/pt-rules';
|
|
20
20
|
|
|
21
21
|
category('toAtomicLevel', () => {
|
|
@@ -79,14 +79,14 @@ category('toAtomicLevel', () => {
|
|
|
79
79
|
const reactionRule = rules.reactionRules.find((r) => r.name == 'GGaz')!;
|
|
80
80
|
|
|
81
81
|
const [newSymbols, newMonomers] = getNewMonomers(rdKitModule, systemMonomerLib, reactionRule);
|
|
82
|
-
expect(newSymbols[0],
|
|
82
|
+
expect(newSymbols[0], 'azG_GGaz');
|
|
83
83
|
|
|
84
84
|
const mol = rdKitModule.get_mol(newMonomers[0].molfile);
|
|
85
85
|
try {
|
|
86
86
|
const molInchi = mol.get_inchi();
|
|
87
87
|
const molInchiKey = rdKitModule.get_inchikey_for_inchi(molInchi);
|
|
88
|
-
expect(mol.get_num_bonds(),
|
|
89
|
-
expect(mol.get_num_atoms(),
|
|
88
|
+
expect(mol.get_num_bonds(), 6);
|
|
89
|
+
expect(mol.get_num_atoms(), 7);
|
|
90
90
|
// TODO: Check inchi key for the new monomer molfile
|
|
91
91
|
// expect(molInchiKey, 'V2H10N2O3S-UHFFFAOYSA-N');
|
|
92
92
|
} finally {
|
package/src/utils/cyclized.ts
CHANGED
|
@@ -46,13 +46,15 @@ export class CyclizedNotationProvider implements INotationProvider {
|
|
|
46
46
|
}
|
|
47
47
|
|
|
48
48
|
public getHelm(seq: string, options?: any): string {
|
|
49
|
-
const seqChain = Chain.
|
|
50
|
-
const resPseudoHelm = seqChain.
|
|
49
|
+
const seqChain = Chain.fromSeparator(seq, this.helmHelper);
|
|
50
|
+
const resPseudoHelm = seqChain.getHelm();
|
|
51
51
|
return resPseudoHelm;
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
-
public createCellRendererBack(gridCol: DG.GridColumn | null, tableCol: DG.Column<string>):
|
|
55
|
-
|
|
54
|
+
public createCellRendererBack(gridCol: DG.GridColumn | null, tableCol: DG.Column<string>):
|
|
55
|
+
CellRendererBackBase<string> {
|
|
56
|
+
const maxLengthOfMonomer: number = 4;
|
|
57
|
+
// (_package.bioProperties ? _package.bioProperties.maxMonomerLength : 4) ?? 50;
|
|
56
58
|
const back = new CyclizedCellRendererBack(gridCol, tableCol,
|
|
57
59
|
maxLengthOfMonomer, this.helmHelper.seqHelper);
|
|
58
60
|
|